--- zzzz-none-000/linux-3.10.107/net/sched/sch_api.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/sched/sch_api.c 2021-02-04 17:41:59.000000000 +0000 @@ -135,7 +135,7 @@ static struct Qdisc_ops *qdisc_base; -/* Register/uregister queueing discipline */ +/* Register/unregister queueing discipline */ int register_qdisc(struct Qdisc_ops *qops) { @@ -200,8 +200,61 @@ } EXPORT_SYMBOL(unregister_qdisc); +/* Get default qdisc if not otherwise specified */ +void qdisc_get_default(char *name, size_t len) +{ + read_lock(&qdisc_mod_lock); + strlcpy(name, default_qdisc_ops->id, len); + read_unlock(&qdisc_mod_lock); +} + +static struct Qdisc_ops *qdisc_lookup_default(const char *name) +{ + struct Qdisc_ops *q = NULL; + + for (q = qdisc_base; q; q = q->next) { + if (!strcmp(name, q->id)) { + if (!try_module_get(q->owner)) + q = NULL; + break; + } + } + + return q; +} + +/* Set new default qdisc to use */ +int qdisc_set_default(const char *name) +{ + const struct Qdisc_ops *ops; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + write_lock(&qdisc_mod_lock); + ops = qdisc_lookup_default(name); + if (!ops) { + /* Not found, drop lock and try to load module */ + write_unlock(&qdisc_mod_lock); + request_module("sch_%s", name); + write_lock(&qdisc_mod_lock); + + ops = qdisc_lookup_default(name); + } + + if (ops) { + /* Set new default */ + module_put(default_qdisc_ops->owner); + default_qdisc_ops = ops; + } + write_unlock(&qdisc_mod_lock); + + return ops ? 0 : -ENOENT; +} + /* We know handle. Find qdisc among all qdisc's attached to device - (root qdisc, all its children, children of children etc.) + * (root qdisc, all its children, children of children etc.) + * Note: caller either uses rtnl or rcu_read_lock() */ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) @@ -212,23 +265,31 @@ root->handle == handle) return root; - list_for_each_entry(q, &root->list, list) { + list_for_each_entry_rcu(q, &root->list, list) { if (q->handle == handle) return q; } return NULL; } -static void qdisc_list_add(struct Qdisc *q) +void qdisc_list_add(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + struct Qdisc *root = qdisc_dev(q)->qdisc; + + WARN_ON_ONCE(root == &noop_qdisc); + ASSERT_RTNL(); + list_add_tail_rcu(&q->list, &root->list); + } } +EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_del(&q->list); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + ASSERT_RTNL(); + list_del_rcu(&q->list); + } } EXPORT_SYMBOL(qdisc_list_del); @@ -247,6 +308,7 @@ out: return q; } +EXPORT_SYMBOL(qdisc_lookup); static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { @@ -267,7 +329,7 @@ /* Find queueing discipline by name */ -static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) +struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) { struct Qdisc_ops *q = NULL; @@ -284,6 +346,7 @@ } return q; } +EXPORT_SYMBOL(qdisc_lookup_ops); /* The linklayer setting were not transferred from iproute2, in older * versions, and the rate tables lookup systems have been dropped in @@ -506,7 +569,7 @@ } EXPORT_SYMBOL(__qdisc_calculate_pkt_len); -void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) +void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) { if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", @@ -521,31 +584,34 @@ struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); + rcu_read_lock(); qdisc_unthrottled(wd->qdisc); __netif_schedule(qdisc_root(wd->qdisc)); + rcu_read_unlock(); return HRTIMER_NORESTART; } void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) { - hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); wd->timer.function = qdisc_watchdog; wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init); -void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) +void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle) { if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) return; - qdisc_throttled(wd->qdisc); + if (throttle) + qdisc_throttled(wd->qdisc); hrtimer_start(&wd->timer, ns_to_ktime(expires), - HRTIMER_MODE_ABS); + HRTIMER_MODE_ABS_PINNED); } EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); @@ -680,22 +746,29 @@ return 0; } -void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) +void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, + unsigned int len) { const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; + int drops; - if (n == 0) + if (n == 0 && len == 0) return; + drops = max_t(int, n, 0); + rcu_read_lock(); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) - return; + break; + if (sch->flags & TCQ_F_NOPARENT) + break; + /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { - WARN_ON(parentid != TC_H_ROOT); - return; + WARN_ON_ONCE(parentid != TC_H_ROOT); + break; } cops = sch->ops->cl_ops; if (cops->qlen_notify) { @@ -704,9 +777,12 @@ cops->put(sch, cl); } sch->q.qlen -= n; + sch->qstats.backlog -= len; + __qdisc_qstats_drop(sch, drops); } + rcu_read_unlock(); } -EXPORT_SYMBOL(qdisc_tree_decrease_qlen); +EXPORT_SYMBOL(qdisc_tree_reduce_backlog); static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, @@ -884,6 +960,17 @@ sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { + if (qdisc_is_percpu_stats(sch)) { + sch->cpu_bstats = + netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); + if (!sch->cpu_bstats) + goto err_out4; + + sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); + if (!sch->cpu_qstats) + goto err_out4; + } + if (tca[TCA_STAB]) { stab = qdisc_get_stab(tca[TCA_STAB]); if (IS_ERR(stab)) { @@ -906,8 +993,11 @@ else root_lock = qdisc_lock(sch); - err = gen_new_estimator(&sch->bstats, &sch->rate_est, - root_lock, tca[TCA_RATE]); + err = gen_new_estimator(&sch->bstats, + sch->cpu_bstats, + &sch->rate_est, + root_lock, + tca[TCA_RATE]); if (err) goto err_out4; } @@ -926,6 +1016,8 @@ return NULL; err_out4: + free_percpu(sch->cpu_bstats); + free_percpu(sch->cpu_qstats); /* * Any broken qdiscs that would require a ops->reset() here? * The qdisc was never in action so it shouldn't be necessary. @@ -964,9 +1056,11 @@ because change can't be undone. */ if (sch->flags & TCQ_F_MQROOT) goto out; - gen_replace_estimator(&sch->bstats, &sch->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE]); + gen_replace_estimator(&sch->bstats, + sch->cpu_bstats, + &sch->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); } out: return 0; @@ -1026,7 +1120,8 @@ struct Qdisc *p = NULL; int err; - if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETQDISC) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1093,7 +1188,7 @@ struct Qdisc *q, *p; int err; - if (!netlink_capable(skb, CAP_NET_ADMIN)) + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; replay: @@ -1240,12 +1335,16 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event) { + struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; + struct gnet_stats_queue __percpu *cpu_qstats = NULL; struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; struct qdisc_size_table *stab; + __u32 qlen; + cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; @@ -1261,7 +1360,7 @@ goto nla_put_failure; if (q->ops->dump && q->ops->dump(q, skb) < 0) goto nla_put_failure; - q->qstats.qlen = q->q.qlen; + qlen = q->q.qlen; stab = rtnl_dereference(q->stab); if (stab && qdisc_dump_stab(skb, stab) < 0) @@ -1274,9 +1373,14 @@ if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) goto nla_put_failure; - if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || + if (qdisc_is_percpu_stats(q)) { + cpu_bstats = q->cpu_bstats; + cpu_qstats = q->cpu_qstats; + } + + if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats) < 0 || gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 || - gnet_stats_copy_queue(&d, &q->qstats) < 0) + gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; if (gnet_stats_finish_copy(&d) < 0) @@ -1377,9 +1481,9 @@ s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; - rcu_read_lock(); idx = 0; - for_each_netdev_rcu(net, dev) { + ASSERT_RTNL(); + for_each_netdev(net, dev) { struct netdev_queue *dev_queue; if (idx < s_idx) @@ -1402,8 +1506,6 @@ } done: - rcu_read_unlock(); - cb->args[0] = idx; cb->args[1] = q_idx; @@ -1433,7 +1535,8 @@ u32 qid; int err; - if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN)) + if ((n->nlmsg_type != RTM_GETTCLASS) && + !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1560,6 +1663,7 @@ struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; + cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; @@ -1715,74 +1819,67 @@ * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. */ -int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res, bool compat_mode) { - __be16 protocol = skb->protocol; - int err; + __be16 protocol = tc_skb_protocol(skb); +#ifdef CONFIG_NET_CLS_ACT + const struct tcf_proto *old_tp = tp; + int limit = 0; + +reclassify: +#endif + for (; tp; tp = rcu_dereference_bh(tp->next)) { + int err; - for (; tp; tp = tp->next) { if (tp->protocol != protocol && tp->protocol != htons(ETH_P_ALL)) continue; - err = tp->classify(skb, tp, res); - if (err >= 0) { + err = tp->classify(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT - if (err != TC_ACT_RECLASSIFY && skb->tc_verd) - skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); + if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) + goto reset; #endif + if (err >= 0) return err; - } } - return -1; -} -EXPORT_SYMBOL(tc_classify_compat); - -int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - int err = 0; -#ifdef CONFIG_NET_CLS_ACT - const struct tcf_proto *otp = tp; -reclassify: -#endif - err = tc_classify_compat(skb, tp, res); + return -1; #ifdef CONFIG_NET_CLS_ACT - if (err == TC_ACT_RECLASSIFY) { - u32 verd = G_TC_VERD(skb->tc_verd); - tp = otp; - - if (verd++ >= MAX_REC_LOOP) { - net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n", - tp->q->ops->id, - tp->prio & 0xffff, - ntohs(tp->protocol)); - return TC_ACT_SHOT; - } - skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); - goto reclassify; +reset: + if (unlikely(limit++ >= MAX_REC_LOOP)) { + net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n", + tp->q->ops->id, tp->prio & 0xffff, + ntohs(tp->protocol)); + return TC_ACT_SHOT; } + + tp = old_tp; + protocol = tc_skb_protocol(skb); + goto reclassify; #endif - return err; } EXPORT_SYMBOL(tc_classify); -void tcf_destroy(struct tcf_proto *tp) +bool tcf_destroy(struct tcf_proto *tp, bool force) { - tp->ops->destroy(tp); - module_put(tp->ops->owner); - kfree(tp); + if (tp->ops->destroy(tp, force)) { + module_put(tp->ops->owner); + kfree_rcu(tp, rcu); + return true; + } + + return false; } -void tcf_destroy_chain(struct tcf_proto **fl) +void tcf_destroy_chain(struct tcf_proto __rcu **fl) { struct tcf_proto *tp; - while ((tp = *fl) != NULL) { - *fl = tp->next; - tcf_destroy(tp); + while ((tp = rtnl_dereference(*fl)) != NULL) { + RCU_INIT_POINTER(*fl, tp->next); + tcf_destroy(tp, true); } } EXPORT_SYMBOL(tcf_destroy_chain); @@ -1790,13 +1887,10 @@ #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { - struct timespec ts; - - hrtimer_get_res(CLOCK_MONOTONIC, &ts); seq_printf(seq, "%08x %08x %08x %08x\n", (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 1000000, - (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts))); + (u32)NSEC_PER_SEC / hrtimer_resolution); return 0; } @@ -1856,10 +1950,12 @@ return err; } + register_qdisc(&fq_codel_qdisc_ops); register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); + register_qdisc(&noqueue_qdisc_ops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);