--- zzzz-none-000/linux-4.4.271/net/sched/sch_fq_codel.c 2021-06-03 06:22:09.000000000 +0000 +++ hawkeye-5590-750/linux-4.4.271/net/sched/sch_fq_codel.c 2023-04-19 10:22:30.000000000 +0000 @@ -57,6 +57,7 @@ u32 flows_cnt; /* number of flows */ siphash_key_t perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ + u32 drop_batch_size; struct codel_params cparams; struct codel_stats cstats; u32 drop_overlimit; @@ -133,17 +134,20 @@ skb->next = NULL; } -static unsigned int fq_codel_drop(struct Qdisc *sch) +static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets) { struct fq_codel_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; unsigned int maxbacklog = 0, idx = 0, i, len; struct fq_codel_flow *flow; + unsigned int threshold; - /* Queue is full! Find the fat flow and drop packet from it. + /* Queue is full! Find the fat flow and drop packet(s) from it. * This might sound expensive, but with 1024 flows, we scan * 4KB of memory, and we dont need to handle a complex tree * in fast path (packet queue/enqueue) with many cache misses. + * In stress mode, we'll try to drop 64 packets from the flow, + * amortizing this linear lookup to one cache line per drop. */ for (i = 0; i < q->flows_cnt; i++) { if (q->backlogs[i] > maxbacklog) { @@ -151,15 +155,24 @@ idx = i; } } + + /* Our goal is to drop half of this fat flow backlog */ + threshold = maxbacklog >> 1; + flow = &q->flows[idx]; - skb = dequeue_head(flow); - len = qdisc_pkt_len(skb); + len = 0; + i = 0; + do { + skb = dequeue_head(flow); + len += qdisc_pkt_len(skb); + kfree_skb(skb); + } while (++i < max_packets && len < threshold); + + flow->dropped += i; q->backlogs[idx] -= len; - sch->q.qlen--; - qdisc_qstats_drop(sch); - qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); - flow->dropped++; + sch->qstats.drops += i; + sch->qstats.backlog -= len; + sch->q.qlen -= i; return idx; } @@ -168,14 +181,14 @@ unsigned int prev_backlog; prev_backlog = sch->qstats.backlog; - fq_codel_drop(sch); + fq_codel_drop(sch, 1U); return prev_backlog - sch->qstats.backlog; } static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fq_codel_sched_data *q = qdisc_priv(sch); - unsigned int idx, prev_backlog; + unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; int uninitialized_var(ret); @@ -198,22 +211,27 @@ list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; flow->deficit = q->quantum; - flow->dropped = 0; } if (++sch->q.qlen <= sch->limit) return NET_XMIT_SUCCESS; prev_backlog = sch->qstats.backlog; - q->drop_overlimit++; - /* Return Congestion Notification only if we dropped a packet - * from this flow. + prev_qlen = sch->q.qlen; + + /* fq_codel_drop() is quite expensive, as it performs a linear search + * in q->backlogs[] to find a fat flow. + * So instead of dropping a single packet, drop half of its backlog + * with a 64 packets limit to not add a too big cpu spike here. */ - if (fq_codel_drop(sch) == idx) - return NET_XMIT_CN; + ret = fq_codel_drop(sch, q->drop_batch_size); + + q->drop_overlimit += prev_qlen - sch->q.qlen; - /* As we dropped a packet, better let upper stack know this */ - qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); - return NET_XMIT_SUCCESS; + /* As we dropped packet(s), better let upper stack know this */ + qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen, + prev_backlog - sch->qstats.backlog); + + return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS; } /* This is the specific function called from codel_dequeue() @@ -323,6 +341,7 @@ [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 }, [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 }, [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 }, + [TCA_FQ_CODEL_DROP_BATCH_SIZE] = { .type = NLA_U32 }, }; static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt) @@ -374,6 +393,9 @@ if (tb[TCA_FQ_CODEL_QUANTUM]) q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) + q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); + while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_codel_dequeue(sch); @@ -417,8 +439,9 @@ struct fq_codel_sched_data *q = qdisc_priv(sch); int i; - sch->limit = 10*1024; + sch->limit = 1024; q->flows_cnt = 1024; + q->drop_batch_size = 64; q->quantum = psched_mtu(qdisc_dev(sch)); get_random_bytes(&q->perturbation, sizeof(q->perturbation)); INIT_LIST_HEAD(&q->new_flows); @@ -476,6 +499,8 @@ q->cparams.ecn) || nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM, q->quantum) || + nla_put_u32(skb, TCA_FQ_CODEL_DROP_BATCH_SIZE, + q->drop_batch_size) || nla_put_u32(skb, TCA_FQ_CODEL_FLOWS, q->flows_cnt)) goto nla_put_failure; @@ -629,7 +654,7 @@ .walk = fq_codel_walk, }; -static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { +struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = { .cl_ops = &fq_codel_class_ops, .id = "fq_codel", .priv_size = sizeof(struct fq_codel_sched_data), @@ -645,6 +670,7 @@ .dump_stats = fq_codel_dump_stats, .owner = THIS_MODULE, }; +EXPORT_SYMBOL(fq_codel_qdisc_ops); static int __init fq_codel_module_init(void) {