mac80211: add fq performace improvements

Improves performance under load

Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
Felix Fietkau 2020-11-25 19:51:31 +01:00
parent 6d103beb15
commit 268210cec8
4 changed files with 554 additions and 1 deletions

View File

@ -0,0 +1,95 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 25 Nov 2020 18:03:46 +0100
Subject: [PATCH] net/fq_impl: bulk-free packets from a flow on overmemory
This is similar to what sch_fq_codel does. It also amortizes the worst
case cost of a follow-up patch that changes the selection of the biggest
flow for dropping packets
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -11,17 +11,25 @@
/* functions that are embedded into includer */
+
+static void
+__fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets,
+ unsigned int bytes, unsigned int truesize)
+{
+ struct fq_tin *tin = flow->tin;
+
+ tin->backlog_bytes -= bytes;
+ tin->backlog_packets -= packets;
+ flow->backlog -= bytes;
+ fq->backlog -= packets;
+ fq->memory_usage -= truesize;
+}
+
static void fq_adjust_removal(struct fq *fq,
struct fq_flow *flow,
struct sk_buff *skb)
{
- struct fq_tin *tin = flow->tin;
-
- tin->backlog_bytes -= skb->len;
- tin->backlog_packets--;
- flow->backlog -= skb->len;
- fq->backlog--;
- fq->memory_usage -= skb->truesize;
+ __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize);
}
static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
@@ -59,6 +67,34 @@ static struct sk_buff *fq_flow_dequeue(s
return skb;
}
+static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
+ fq_skb_free_t free_func)
+{
+ unsigned int packets = 0, bytes = 0, truesize = 0;
+ struct fq_tin *tin = flow->tin;
+ struct sk_buff *skb;
+ int pending;
+
+ lockdep_assert_held(&fq->lock);
+
+ pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2);
+ do {
+ skb = __skb_dequeue(&flow->queue);
+ if (!skb)
+ break;
+
+ packets++;
+ bytes += skb->len;
+ truesize += skb->truesize;
+ free_func(fq, tin, flow, skb);
+ } while (packets < pending);
+
+ __fq_adjust_removal(fq, flow, packets, bytes, truesize);
+ fq_rejigger_backlog(fq, flow);
+
+ return packets;
+}
+
static struct sk_buff *fq_tin_dequeue(struct fq *fq,
struct fq_tin *tin,
fq_tin_dequeue_t dequeue_func)
@@ -190,12 +226,9 @@ static void fq_tin_enqueue(struct fq *fq
if (!flow)
return;
- skb = fq_flow_dequeue(fq, flow);
- if (!skb)
+ if (!fq_flow_drop(fq, flow, free_func))
return;
- free_func(fq, flow->tin, flow, skb);
-
flow->tin->overlimit++;
fq->overlimit++;
if (oom) {

View File

@ -0,0 +1,144 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 25 Nov 2020 18:09:10 +0100
Subject: [PATCH] net/fq_impl: drop get_default_func, move default flow to
fq_tin
Simplifies the code and prepares for a rework of scanning for flows on
overmemory drop.
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -47,6 +47,7 @@ struct fq_flow {
struct fq_tin {
struct list_head new_flows;
struct list_head old_flows;
+ struct fq_flow default_flow;
u32 backlog_bytes;
u32 backlog_packets;
u32 overlimit;
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -151,8 +151,7 @@ static u32 fq_flow_idx(struct fq *fq, st
static struct fq_flow *fq_flow_classify(struct fq *fq,
struct fq_tin *tin, u32 idx,
- struct sk_buff *skb,
- fq_flow_get_default_t get_default_func)
+ struct sk_buff *skb)
{
struct fq_flow *flow;
@@ -160,7 +159,7 @@ static struct fq_flow *fq_flow_classify(
flow = &fq->flows[idx];
if (flow->tin && flow->tin != tin) {
- flow = get_default_func(fq, tin, idx, skb);
+ flow = &tin->default_flow;
tin->collisions++;
fq->collisions++;
}
@@ -192,15 +191,14 @@ static void fq_recalc_backlog(struct fq
static void fq_tin_enqueue(struct fq *fq,
struct fq_tin *tin, u32 idx,
struct sk_buff *skb,
- fq_skb_free_t free_func,
- fq_flow_get_default_t get_default_func)
+ fq_skb_free_t free_func)
{
struct fq_flow *flow;
bool oom;
lockdep_assert_held(&fq->lock);
- flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
+ flow = fq_flow_classify(fq, tin, idx, skb);
flow->tin = tin;
flow->backlog += skb->len;
@@ -331,6 +329,7 @@ static void fq_tin_init(struct fq_tin *t
{
INIT_LIST_HEAD(&tin->new_flows);
INIT_LIST_HEAD(&tin->old_flows);
+ fq_flow_init(&tin->default_flow);
}
static int fq_init(struct fq *fq, int flows_cnt)
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -855,7 +855,6 @@ enum txq_info_flags {
*/
struct txq_info {
struct fq_tin tin;
- struct fq_flow def_flow;
struct codel_vars def_cvars;
struct codel_stats cstats;
struct sk_buff_head frags;
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1322,7 +1322,7 @@ static struct sk_buff *codel_dequeue_fun
fq = &local->fq;
if (cvars == &txqi->def_cvars)
- flow = &txqi->def_flow;
+ flow = &txqi->tin.default_flow;
else
flow = &fq->flows[cvars - local->cvars];
@@ -1365,7 +1365,7 @@ static struct sk_buff *fq_tin_dequeue_fu
cparams = &local->cparams;
}
- if (flow == &txqi->def_flow)
+ if (flow == &tin->default_flow)
cvars = &txqi->def_cvars;
else
cvars = &local->cvars[flow - fq->flows];
@@ -1392,17 +1392,6 @@ static void fq_skb_free_func(struct fq *
ieee80211_free_txskb(&local->hw, skb);
}
-static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
- struct fq_tin *tin,
- int idx,
- struct sk_buff *skb)
-{
- struct txq_info *txqi;
-
- txqi = container_of(tin, struct txq_info, tin);
- return &txqi->def_flow;
-}
-
static void ieee80211_txq_enqueue(struct ieee80211_local *local,
struct txq_info *txqi,
struct sk_buff *skb)
@@ -1415,8 +1404,7 @@ static void ieee80211_txq_enqueue(struct
spin_lock_bh(&fq->lock);
fq_tin_enqueue(fq, tin, flow_idx, skb,
- fq_skb_free_func,
- fq_flow_get_default_func);
+ fq_skb_free_func);
spin_unlock_bh(&fq->lock);
}
@@ -1459,7 +1447,6 @@ void ieee80211_txq_init(struct ieee80211
struct txq_info *txqi, int tid)
{
fq_tin_init(&txqi->tin);
- fq_flow_init(&txqi->def_flow);
codel_vars_init(&txqi->def_cvars);
codel_stats_init(&txqi->cstats);
__skb_queue_head_init(&txqi->frags);
@@ -3310,8 +3297,7 @@ static bool ieee80211_amsdu_aggregate(st
*/
tin = &txqi->tin;
- flow = fq_flow_classify(fq, tin, flow_idx, skb,
- fq_flow_get_default_func);
+ flow = fq_flow_classify(fq, tin, flow_idx, skb);
head = skb_peek_tail(&flow->queue);
if (!head || skb_is_gso(head))
goto out;

View File

@ -0,0 +1,314 @@
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 25 Nov 2020 18:10:34 +0100
Subject: [PATCH] net/fq_impl: do not maintain a backlog-sorted list of
flows
A sorted flow list is only needed to drop packets in the biggest flow when
hitting the overmemory condition.
By scanning flows only when needed, we can avoid paying the cost of
maintaining the list under normal conditions
In order to avoid scanning lots of empty flows and touching too many cold
cache lines, a bitmap of flows with backlog is maintained
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -19,8 +19,6 @@ struct fq_tin;
* @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++
* (deficit round robin) based round robin queuing similar to the one
* found in net/sched/sch_fq_codel.c
- * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of
- * fat flows and efficient head-dropping if packet limit is reached
* @queue: sk_buff queue to hold packets
* @backlog: number of bytes pending in the queue. The number of packets can be
* found in @queue.qlen
@@ -29,7 +27,6 @@ struct fq_tin;
struct fq_flow {
struct fq_tin *tin;
struct list_head flowchain;
- struct list_head backlogchain;
struct sk_buff_head queue;
u32 backlog;
int deficit;
@@ -47,6 +44,7 @@ struct fq_flow {
struct fq_tin {
struct list_head new_flows;
struct list_head old_flows;
+ struct list_head tin_list;
struct fq_flow default_flow;
u32 backlog_bytes;
u32 backlog_packets;
@@ -60,14 +58,14 @@ struct fq_tin {
/**
* struct fq - main container for fair queuing purposes
*
- * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient
- * head-dropping when @backlog reaches @limit
* @limit: max number of packets that can be queued across all flows
* @backlog: number of packets queued across all flows
*/
struct fq {
struct fq_flow *flows;
- struct list_head backlogs;
+ unsigned long *flows_bitmap;
+
+ struct list_head tin_backlog;
spinlock_t lock;
u32 flows_cnt;
u32 limit;
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -17,12 +17,24 @@ __fq_adjust_removal(struct fq *fq, struc
unsigned int bytes, unsigned int truesize)
{
struct fq_tin *tin = flow->tin;
+ int idx;
tin->backlog_bytes -= bytes;
tin->backlog_packets -= packets;
flow->backlog -= bytes;
fq->backlog -= packets;
fq->memory_usage -= truesize;
+
+ if (flow->backlog)
+ return;
+
+ if (flow == &tin->default_flow) {
+ list_del_init(&tin->tin_list);
+ return;
+ }
+
+ idx = flow - fq->flows;
+ __clear_bit(idx, fq->flows_bitmap);
}
static void fq_adjust_removal(struct fq *fq,
@@ -32,24 +44,6 @@ static void fq_adjust_removal(struct fq
__fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize);
}
-static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
-{
- struct fq_flow *i;
-
- if (flow->backlog == 0) {
- list_del_init(&flow->backlogchain);
- } else {
- i = flow;
-
- list_for_each_entry_continue(i, &fq->backlogs, backlogchain)
- if (i->backlog < flow->backlog)
- break;
-
- list_move_tail(&flow->backlogchain,
- &i->backlogchain);
- }
-}
-
static struct sk_buff *fq_flow_dequeue(struct fq *fq,
struct fq_flow *flow)
{
@@ -62,7 +56,6 @@ static struct sk_buff *fq_flow_dequeue(s
return NULL;
fq_adjust_removal(fq, flow, skb);
- fq_rejigger_backlog(fq, flow);
return skb;
}
@@ -90,7 +83,6 @@ static int fq_flow_drop(struct fq *fq, s
} while (packets < pending);
__fq_adjust_removal(fq, flow, packets, bytes, truesize);
- fq_rejigger_backlog(fq, flow);
return packets;
}
@@ -170,22 +162,36 @@ static struct fq_flow *fq_flow_classify(
return flow;
}
-static void fq_recalc_backlog(struct fq *fq,
- struct fq_tin *tin,
- struct fq_flow *flow)
+static struct fq_flow *fq_find_fattest_flow(struct fq *fq)
{
- struct fq_flow *i;
+ struct fq_tin *tin;
+ struct fq_flow *flow = NULL;
+ u32 len = 0;
+ int i;
- if (list_empty(&flow->backlogchain))
- list_add_tail(&flow->backlogchain, &fq->backlogs);
+ for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) {
+ struct fq_flow *cur = &fq->flows[i];
+ unsigned int cur_len;
- i = flow;
- list_for_each_entry_continue_reverse(i, &fq->backlogs,
- backlogchain)
- if (i->backlog > flow->backlog)
- break;
+ cur_len = cur->backlog;
+ if (cur_len <= len)
+ continue;
- list_move(&flow->backlogchain, &i->backlogchain);
+ flow = cur;
+ len = cur_len;
+ }
+
+ list_for_each_entry(tin, &fq->tin_backlog, tin_list) {
+ unsigned int cur_len = tin->default_flow.backlog;
+
+ if (cur_len <= len)
+ continue;
+
+ flow = &tin->default_flow;
+ len = cur_len;
+ }
+
+ return flow;
}
static void fq_tin_enqueue(struct fq *fq,
@@ -200,6 +206,13 @@ static void fq_tin_enqueue(struct fq *fq
flow = fq_flow_classify(fq, tin, idx, skb);
+ if (!flow->backlog) {
+ if (flow != &tin->default_flow)
+ __set_bit(idx, fq->flows_bitmap);
+ else if (list_empty(&tin->tin_list))
+ list_add(&tin->tin_list, &fq->tin_backlog);
+ }
+
flow->tin = tin;
flow->backlog += skb->len;
tin->backlog_bytes += skb->len;
@@ -207,8 +220,6 @@ static void fq_tin_enqueue(struct fq *fq
fq->memory_usage += skb->truesize;
fq->backlog++;
- fq_recalc_backlog(fq, tin, flow);
-
if (list_empty(&flow->flowchain)) {
flow->deficit = fq->quantum;
list_add_tail(&flow->flowchain,
@@ -218,9 +229,7 @@ static void fq_tin_enqueue(struct fq *fq
__skb_queue_tail(&flow->queue, skb);
oom = (fq->memory_usage > fq->memory_limit);
while (fq->backlog > fq->limit || oom) {
- flow = list_first_entry_or_null(&fq->backlogs,
- struct fq_flow,
- backlogchain);
+ flow = fq_find_fattest_flow(fq);
if (!flow)
return;
@@ -255,8 +264,6 @@ static void fq_flow_filter(struct fq *fq
fq_adjust_removal(fq, flow, skb);
free_func(fq, tin, flow, skb);
}
-
- fq_rejigger_backlog(fq, flow);
}
static void fq_tin_filter(struct fq *fq,
@@ -279,16 +286,18 @@ static void fq_flow_reset(struct fq *fq,
struct fq_flow *flow,
fq_skb_free_t free_func)
{
+ struct fq_tin *tin = flow->tin;
struct sk_buff *skb;
while ((skb = fq_flow_dequeue(fq, flow)))
- free_func(fq, flow->tin, flow, skb);
+ free_func(fq, tin, flow, skb);
- if (!list_empty(&flow->flowchain))
+ if (!list_empty(&flow->flowchain)) {
list_del_init(&flow->flowchain);
-
- if (!list_empty(&flow->backlogchain))
- list_del_init(&flow->backlogchain);
+ if (list_empty(&tin->new_flows) &&
+ list_empty(&tin->old_flows))
+ list_del_init(&tin->tin_list);
+ }
flow->tin = NULL;
@@ -314,6 +323,7 @@ static void fq_tin_reset(struct fq *fq,
fq_flow_reset(fq, flow, free_func);
}
+ WARN_ON_ONCE(!list_empty(&tin->tin_list));
WARN_ON_ONCE(tin->backlog_bytes);
WARN_ON_ONCE(tin->backlog_packets);
}
@@ -321,7 +331,6 @@ static void fq_tin_reset(struct fq *fq,
static void fq_flow_init(struct fq_flow *flow)
{
INIT_LIST_HEAD(&flow->flowchain);
- INIT_LIST_HEAD(&flow->backlogchain);
__skb_queue_head_init(&flow->queue);
}
@@ -329,6 +338,7 @@ static void fq_tin_init(struct fq_tin *t
{
INIT_LIST_HEAD(&tin->new_flows);
INIT_LIST_HEAD(&tin->old_flows);
+ INIT_LIST_HEAD(&tin->tin_list);
fq_flow_init(&tin->default_flow);
}
@@ -337,8 +347,8 @@ static int fq_init(struct fq *fq, int fl
int i;
memset(fq, 0, sizeof(fq[0]));
- INIT_LIST_HEAD(&fq->backlogs);
spin_lock_init(&fq->lock);
+ INIT_LIST_HEAD(&fq->tin_backlog);
fq->flows_cnt = max_t(u32, flows_cnt, 1);
fq->quantum = 300;
fq->limit = 8192;
@@ -348,6 +358,14 @@ static int fq_init(struct fq *fq, int fl
if (!fq->flows)
return -ENOMEM;
+ fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long),
+ GFP_KERNEL);
+ if (!fq->flows_bitmap) {
+ kvfree(fq->flows);
+ fq->flows = NULL;
+ return -ENOMEM;
+ }
+
for (i = 0; i < fq->flows_cnt; i++)
fq_flow_init(&fq->flows[i]);
@@ -364,6 +382,9 @@ static void fq_reset(struct fq *fq,
kvfree(fq->flows);
fq->flows = NULL;
+
+ kfree(fq->flows_bitmap);
+ fq->flows_bitmap = NULL;
}
#endif
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3364,8 +3364,6 @@ out_recalc:
if (head->len != orig_len) {
flow->backlog += head->len - orig_len;
tin->backlog_bytes += head->len - orig_len;
-
- fq_recalc_backlog(fq, tin, flow);
}
out:
spin_unlock_bh(&fq->lock);

View File

@ -87,7 +87,7 @@
CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1401,6 +1401,7 @@ struct ieee80211_local {
@@ -1400,6 +1400,7 @@ struct ieee80211_local {
int dynamic_ps_forced_timeout;
int user_power_level; /* in dBm, for all interfaces */