--- zzzz-none-000/linux-3.10.107/net/sched/sch_generic.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/sched/sch_generic.c 2021-02-04 17:41:59.000000000 +0000 @@ -25,10 +25,15 @@ #include #include #include +#include #include #include #include +/* Qdisc to use by default */ +const struct Qdisc_ops *default_qdisc_ops = &fq_codel_qdisc_ops; +EXPORT_SYMBOL(default_qdisc_ops); + /* Main transmission queue. */ /* Modifications to data participating in scheduling must be protected with @@ -42,7 +47,6 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - skb_dst_force(skb); q->gso_skb = skb; q->qstats.requeues++; q->q.qlen++; /* it's still part of the queue */ @@ -51,24 +55,56 @@ return 0; } -static inline struct sk_buff *dequeue_skb(struct Qdisc *q) +static void try_bulk_dequeue_skb(struct Qdisc *q, + struct sk_buff *skb, + const struct netdev_queue *txq, + int *packets) +{ + int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; + + while (bytelimit > 0) { + struct sk_buff *nskb = q->dequeue(q); + + if (!nskb) + break; + + bytelimit -= nskb->len; /* covers GSO len */ + skb->next = nskb; + skb = nskb; + (*packets)++; /* GSO counts as one pkt */ + } + skb->next = NULL; +} + +/* Note that dequeue_skb can possibly return a SKB list (via skb->next). + * A requeued skb (via q->gso_skb) can also be a SKB list. + */ +static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, + int *packets) { struct sk_buff *skb = q->gso_skb; const struct netdev_queue *txq = q->dev_queue; + *packets = 1; + *validate = true; if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ - txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else skb = NULL; + /* skb in gso_skb were already validated */ + *validate = false; } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) + if (!(q->flags & TCQ_F_ONETXQUEUE) || + !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); + if (skb && qdisc_may_bulk(q)) + try_bulk_dequeue_skb(q, skb, txq, packets); + } } - return skb; } @@ -85,7 +121,7 @@ * detect it by checking xmit owner and drop the packet when * deadloop is detected. Return OK to try the next skb. */ - kfree_skb(skb); + kfree_skb_list(skb); net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n", dev_queue->dev->name); ret = qdisc_qlen(q); @@ -102,9 +138,9 @@ } /* - * Transmit one skb, and handle the return status as required. Holding the - * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this - * function. + * Transmit possibly several skbs, and handle the return status as + * required. Holding the __QDISC___STATE_RUNNING bit guarantees that + * only one CPU can execute this function. * * Returns to the caller: * 0 - queue is empty or throttled. @@ -112,19 +148,27 @@ */ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, - spinlock_t *root_lock) + spinlock_t *root_lock, bool validate) { int ret = NETDEV_TX_BUSY; /* And release qdisc */ spin_unlock(root_lock); - HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_stopped(txq)) - ret = dev_hard_start_xmit(skb, dev, txq); - - HARD_TX_UNLOCK(dev, txq); + /* Note that we validate skb (GSO, checksum, ...) outside of locks */ + if (validate) + skb = validate_xmit_skb_list(skb, dev); + + if (likely(skb)) { + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_stopped(txq)) + skb = dev_hard_start_xmit(skb, dev, txq, &ret); + HARD_TX_UNLOCK(dev, txq); + } else { + spin_lock(root_lock); + return qdisc_qlen(q); + } spin_lock(root_lock); if (dev_xmit_complete(ret)) { @@ -151,7 +195,7 @@ /* * NOTE: Called under qdisc_lock(q) with locally disabled BH. * - * __QDISC_STATE_RUNNING guarantees only one CPU can process + * __QDISC___STATE_RUNNING guarantees only one CPU can process * this qdisc at a time. qdisc_lock(q) serializes queue accesses for * this queue. * @@ -167,36 +211,39 @@ * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct Qdisc *q) +static inline int qdisc_restart(struct Qdisc *q, int *packets) { struct netdev_queue *txq; struct net_device *dev; spinlock_t *root_lock; struct sk_buff *skb; + bool validate; /* Dequeue packet */ - skb = dequeue_skb(q); + skb = dequeue_skb(q, &validate, packets); if (unlikely(!skb)) return 0; - WARN_ON_ONCE(skb_dst_is_noref(skb)); + root_lock = qdisc_lock(q); dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); - return sch_direct_xmit(skb, q, dev, txq, root_lock); + return sch_direct_xmit(skb, q, dev, txq, root_lock, validate); } void __qdisc_run(struct Qdisc *q) { int quota = weight_p; + int packets; - while (qdisc_restart(q)) { + while (qdisc_restart(q, &packets)) { /* * Ordered by possible occurrence: Postpone processing if * 1. we've exceeded packet quota * 2. another process needs the CPU; */ - if (--quota <= 0 || need_resched()) { + quota -= packets; + if (quota <= 0 || need_resched()) { __netif_schedule(q); break; } @@ -207,15 +254,19 @@ unsigned long dev_trans_start(struct net_device *dev) { - unsigned long val, res = dev->trans_start; + unsigned long val, res; unsigned int i; + if (is_vlan_dev(dev)) + dev = vlan_dev_real_dev(dev); + res = dev->trans_start; for (i = 0; i < dev->num_tx_queues; i++) { val = netdev_get_tx_queue(dev, i)->trans_start; if (val && time_after(val, res)) res = val; } dev->trans_start = res; + return res; } EXPORT_SYMBOL(dev_trans_start); @@ -301,6 +352,7 @@ if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; + atomic_inc(&dev->carrier_changes); linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -319,6 +371,7 @@ if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return; + atomic_inc(&dev->carrier_changes); linkwatch_fire_event(dev); } } @@ -329,13 +382,13 @@ cheaper. */ -static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc) +static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) { kfree_skb(skb); return NET_XMIT_CN; } -static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) +static struct sk_buff *noop_dequeue(struct Qdisc *qdisc) { return NULL; } @@ -366,171 +419,29 @@ }; EXPORT_SYMBOL(noop_qdisc); -static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { +static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt) +{ + /* register_qdisc() assigns a default of noop_enqueue if unset, + * but __dev_queue_xmit() treats noqueue only as such + * if this is NULL - so clear it here. */ + qdisc->enqueue = NULL; + return 0; +} + +struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .id = "noqueue", .priv_size = 0, + .init = noqueue_init, .enqueue = noop_enqueue, .dequeue = noop_dequeue, .peek = noop_dequeue, .owner = THIS_MODULE, }; -static struct Qdisc noqueue_qdisc; -static struct netdev_queue noqueue_netdev_queue = { - .qdisc = &noqueue_qdisc, - .qdisc_sleeping = &noqueue_qdisc, -}; - -static struct Qdisc noqueue_qdisc = { - .enqueue = NULL, - .dequeue = noop_dequeue, - .flags = TCQ_F_BUILTIN, - .ops = &noqueue_qdisc_ops, - .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), - .dev_queue = &noqueue_netdev_queue, - .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock), -}; - - -static const u8 prio2band[TC_PRIO_MAX + 1] = { - 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 -}; - -/* 3-band FIFO queue: old style, but should be a bit faster than - generic prio+fifo combination. - */ - -#define PFIFO_FAST_BANDS 3 - -/* - * Private data for a pfifo_fast scheduler containing: - * - queues for the three band - * - bitmap indicating which of the bands contain skbs - */ -struct pfifo_fast_priv { - u32 bitmap; - struct sk_buff_head q[PFIFO_FAST_BANDS]; -}; - -/* - * Convert a bitmap to the first band number where an skb is queued, where: - * bitmap=0 means there are no skbs on any band. - * bitmap=1 means there is an skb on band 0. - * bitmap=7 means there are skbs on all 3 bands, etc. - */ -static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; - -static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, - int band) -{ - return priv->q + band; -} - -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) -{ - if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { - int band = prio2band[skb->priority & TC_PRIO_MAX]; - struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); - - priv->bitmap |= (1 << band); - qdisc->q.qlen++; - return __qdisc_enqueue_tail(skb, qdisc, list); - } - - return qdisc_drop(skb, qdisc); -} - -static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) -{ - struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - int band = bitmap2band[priv->bitmap]; - - if (likely(band >= 0)) { - struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); - - qdisc->q.qlen--; - if (skb_queue_empty(list)) - priv->bitmap &= ~(1 << band); - - return skb; - } - - return NULL; -} - -static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) -{ - struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - int band = bitmap2band[priv->bitmap]; - - if (band >= 0) { - struct sk_buff_head *list = band2list(priv, band); - - return skb_peek(list); - } - - return NULL; -} - -static void pfifo_fast_reset(struct Qdisc *qdisc) -{ - int prio; - struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __qdisc_reset_queue(qdisc, band2list(priv, prio)); - - priv->bitmap = 0; - qdisc->qstats.backlog = 0; - qdisc->q.qlen = 0; -} - -static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) -{ - struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); - if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - return -1; -} - -static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) -{ - int prio; - struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - - for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - skb_queue_head_init(band2list(priv, prio)); - - /* Can by-pass the queue discipline */ - qdisc->flags |= TCQ_F_CAN_BYPASS; - return 0; -} - -struct Qdisc_ops pfifo_fast_ops __read_mostly = { - .id = "pfifo_fast", - .priv_size = sizeof(struct pfifo_fast_priv), - .enqueue = pfifo_fast_enqueue, - .dequeue = pfifo_fast_dequeue, - .peek = pfifo_fast_peek, - .init = pfifo_fast_init, - .reset = pfifo_fast_reset, - .dump = pfifo_fast_dump, - .owner = THIS_MODULE, -}; -EXPORT_SYMBOL(pfifo_fast_ops); - static struct lock_class_key qdisc_tx_busylock; struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, - struct Qdisc_ops *ops) + const struct Qdisc_ops *ops) { void *p; struct Qdisc *sch; @@ -574,10 +485,14 @@ } struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, - struct Qdisc_ops *ops, unsigned int parentid) + const struct Qdisc_ops *ops, + unsigned int parentid) { struct Qdisc *sch; + if (!try_module_get(ops->owner)) + goto errout; + sch = qdisc_alloc(dev_queue, ops); if (IS_ERR(sch)) goto errout; @@ -602,7 +517,7 @@ ops->reset(qdisc); if (qdisc->gso_skb) { - kfree_skb(qdisc->gso_skb); + kfree_skb_list(qdisc->gso_skb); qdisc->gso_skb = NULL; qdisc->q.qlen = 0; } @@ -613,6 +528,11 @@ { struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); + if (qdisc_is_percpu_stats(qdisc)) { + free_percpu(qdisc->cpu_bstats); + free_percpu(qdisc->cpu_qstats); + } + kfree((char *) qdisc - qdisc->padded); } @@ -638,7 +558,7 @@ module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - kfree_skb(qdisc->gso_skb); + kfree_skb_list(qdisc->gso_skb); /* * gen_estimator est_timer() might access qdisc->q.lock, * wait a RCU grace period before freeing qdisc. @@ -677,18 +597,19 @@ struct netdev_queue *dev_queue, void *_unused) { - struct Qdisc *qdisc = &noqueue_qdisc; + struct Qdisc *qdisc; + const struct Qdisc_ops *ops = &fq_codel_qdisc_ops; - if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev_queue, - &pfifo_fast_ops, TC_H_ROOT); - if (!qdisc) { - netdev_info(dev, "activation failed\n"); - return; - } - if (!netif_is_multiqueue(dev)) - qdisc->flags |= TCQ_F_ONETXQUEUE; + if (dev->priv_flags & IFF_NO_QUEUE) + ops = &noqueue_qdisc_ops; + + qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT); + if (!qdisc) { + netdev_info(dev, "activation failed\n"); + return; } + if (!netif_is_multiqueue(dev)) + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; dev_queue->qdisc_sleeping = qdisc; } @@ -699,15 +620,16 @@ txq = netdev_get_tx_queue(dev, 0); - if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) { + if (!netif_is_multiqueue(dev) || + dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); dev->qdisc = txq->qdisc_sleeping; atomic_inc(&dev->qdisc->refcnt); } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { - qdisc->ops->attach(qdisc); dev->qdisc = qdisc; + qdisc->ops->attach(qdisc); } } } @@ -723,7 +645,7 @@ clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); rcu_assign_pointer(dev_queue->qdisc, new_qdisc); - if (need_watchdog_p && new_qdisc != &noqueue_qdisc) { + if (need_watchdog_p) { dev_queue->trans_start = 0; *need_watchdog_p = 1; } @@ -734,9 +656,8 @@ int need_watchdog; /* No queueing discipline is attached to device; - create default one i.e. pfifo_fast for devices, - which need queueing and noqueue_qdisc for - virtual interfaces + * create default one for devices, which need queueing + * and noqueue_qdisc for virtual interfaces */ if (dev->qdisc == &noop_qdisc) @@ -765,7 +686,7 @@ struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; - qdisc = dev_queue->qdisc; + qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); @@ -818,7 +739,7 @@ struct net_device *dev; bool sync_needed = false; - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); if (dev_ingress_queue(dev)) @@ -837,7 +758,7 @@ synchronize_net(); /* Wait for outstanding qdisc_run calls. */ - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, close_list) while (some_qdisc_is_busy(dev)) yield(); } @@ -846,7 +767,7 @@ { LIST_HEAD(single); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); dev_deactivate_many(&single); list_del(&single); } @@ -858,7 +779,7 @@ { struct Qdisc *qdisc = _qdisc; - dev_queue->qdisc = qdisc; + rcu_assign_pointer(dev_queue->qdisc, qdisc); dev_queue->qdisc_sleeping = qdisc; } @@ -897,42 +818,40 @@ WARN_ON(timer_pending(&dev->watchdog_timer)); } +EXPORT_SYMBOL(dev_shutdown); void psched_ratecfg_precompute(struct psched_ratecfg *r, - const struct tc_ratespec *conf) + const struct tc_ratespec *conf, + u64 rate64) { - u64 factor; - u64 mult; - int shift; - memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bps = (u64)conf->rate << 3; + r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) + if (r->rate_bytes_ps > 0) { + u64 factor = NSEC_PER_SEC; + + for (;;) { + r->mult = div64_u64(factor, r->rate_bytes_ps); + if (r->mult & (1U << 31) || factor & (1ULL << 63)) break; + factor <<= 1; + r->shift++; } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); } } EXPORT_SYMBOL(psched_ratecfg_precompute);