/* * net/sched/sch_prio.c Simple 3-band priority "scheduler". * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Authors: Alexey Kuznetsov, * Fixes: 19990609: J Hadi Salim : * Init -- EINVAL when opt undefined */ #include #include #include #include #include #include #include #include #include #include struct prio_sched_data { int bands; struct tcf_proto __rcu *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; }; static struct Qdisc * prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct prio_sched_data *q = qdisc_priv(sch); u32 band = skb->priority; struct tcf_result res; struct tcf_proto *fl; int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { fl = rcu_dereference_bh(q->filter_list); err = tc_classify(skb, fl, &res, false); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; case TC_ACT_SHOT: return NULL; } #endif if (!fl || err < 0) { if (TC_H_MAJ(band)) band = 0; return q->queues[q->prio2band[band & TC_PRIO_MAX]]; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) return q->queues[q->prio2band[0]]; return q->queues[band]; } static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct Qdisc *qdisc; int ret; qdisc = prio_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT if (qdisc == NULL) { if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); kfree_skb(skb); return ret; } #endif ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } if (net_xmit_drop_count(ret)) qdisc_qstats_drop(sch); return ret; } static struct sk_buff *prio_peek(struct Qdisc *sch) { struct prio_sched_data *q = qdisc_priv(sch); int prio; for (prio = 0; prio < q->bands; prio++) { struct Qdisc *qdisc = q->queues[prio]; struct sk_buff *skb = qdisc->ops->peek(qdisc); if (skb) return skb; } return NULL; } static struct sk_buff *prio_dequeue(struct Qdisc *sch) { struct prio_sched_data *q = qdisc_priv(sch); int prio; for (prio = 0; prio < q->bands; prio++) { struct Qdisc *qdisc = q->queues[prio]; struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } } return NULL; } static void prio_reset(struct Qdisc *sch) { int prio; struct prio_sched_data *q = qdisc_priv(sch); for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); sch->qstats.backlog = 0; sch->q.qlen = 0; } static int prio_offload(struct Qdisc *sch, bool enable) { struct prio_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload opt = { .handle = sch->handle, .parent = sch->parent, }; struct tc_to_netdev tc = { .type = TC_SETUP_QDISC_PRIO, { .sch_prio = &opt } }; if (!(dev->features & NETIF_F_HW_TC) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; if (enable) { opt.command = TC_PRIO_REPLACE; opt.replace_params.bands = q->bands; memcpy(&opt.replace_params.priomap, q->prio2band, TC_PRIO_MAX + 1); opt.replace_params.qstats = &sch->qstats; } else { opt.command = TC_PRIO_DESTROY; } return dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); } static void prio_destroy(struct Qdisc *sch) { int prio; struct prio_sched_data *q = qdisc_priv(sch); tcf_destroy_chain(&q->filter_list); prio_offload(sch, false); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); struct Qdisc *queues[TCQ_PRIO_BANDS]; int oldbands = q->bands, i; struct tc_prio_qopt *qopt; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; for (i = 0; i <= TC_PRIO_MAX; i++) { if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } /* Before commit, make sure we can allocate all new qdiscs */ for (i = oldbands; i < qopt->bands; i++) { queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (!queues[i]) { while (i > oldbands) qdisc_destroy(queues[--i]); return -ENOMEM; } } sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < oldbands; i++) { struct Qdisc *child = q->queues[i]; qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); qdisc_destroy(child); } for (i = oldbands; i < q->bands; i++) q->queues[i] = queues[i]; sch_tree_unlock(sch); prio_offload(sch, true); return 0; } static int prio_init(struct Qdisc *sch, struct nlattr *opt) { if (!opt) return -EINVAL; return prio_tune(sch, opt); } static int prio_dump_offload(struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload hw_stats = { .command = TC_PRIO_STATS, .handle = sch->handle, .parent = sch->parent, { .stats = { .bstats = &sch->bstats, .qstats = &sch->qstats, }, }, }; struct tc_to_netdev tc = { .type = TC_SETUP_QDISC_PRIO, { .sch_prio = &hw_stats } }; int err; sch->flags &= ~TCQ_F_OFFLOADED; if (!(dev->features & NETIF_F_HW_TC) || !dev->netdev_ops->ndo_setup_tc) return 0; err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); if (err == -EOPNOTSUPP) return 0; if (!err) sch->flags |= TCQ_F_OFFLOADED; return err; } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_prio_qopt opt; int err; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); err = prio_dump_offload(sch); if (err) goto nla_put_failure; if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; nla_put_failure: nlmsg_trim(skb, b); return -1; } static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old) { struct prio_sched_data *q = qdisc_priv(sch); struct tc_prio_qopt_offload graft_offload; struct net_device *dev = qdisc_dev(sch); unsigned long band = arg - 1; bool any_qdisc_is_offloaded; int err; struct tc_to_netdev tc = { .type = TC_SETUP_QDISC_PRIO, { .sch_prio = &graft_offload } }; if (!new) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, arg)); if (!new) new = &noop_qdisc; else qdisc_hash_add(new); } *old = qdisc_replace(sch, new, &q->queues[band]); if (!(dev->features & NETIF_F_HW_TC) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; graft_offload.handle = sch->handle; graft_offload.parent = sch->parent; graft_offload.graft_params.band = band; graft_offload.graft_params.child_handle = new->handle; graft_offload.command = TC_PRIO_GRAFT; err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc); /* Don't report error if the graft is part of destroy operation. */ if (err && new != &noop_qdisc) { /* Don't report error if the parent, the old child and the new * one are not offloaded. */ any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; any_qdisc_is_offloaded |= new->flags &TCQ_F_OFFLOADED; if (*old) any_qdisc_is_offloaded |= (*old)->flags & TCQ_F_OFFLOADED; if (any_qdisc_is_offloaded) pr_err("Offloading graft operation failed."); } return 0; } static struct Qdisc * prio_leaf(struct Qdisc *sch, unsigned long arg) { struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; return q->queues[band]; } static unsigned long prio_get(struct Qdisc *sch, u32 classid) { struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = TC_H_MIN(classid); if (band - 1 >= q->bands) return 0; return band; } static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { return prio_get(sch, classid); } static void prio_put(struct Qdisc *q, unsigned long cl) { } static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct prio_sched_data *q = qdisc_priv(sch); tcm->tcm_handle |= TC_H_MIN(cl); tcm->tcm_info = q->queues[cl-1]->handle; return 0; } static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct prio_sched_data *q = qdisc_priv(sch); struct Qdisc *cl_q; cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; return 0; } static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct prio_sched_data *q = qdisc_priv(sch); int prio; if (arg->stop) return; for (prio = 0; prio < q->bands; prio++) { if (arg->count < arg->skip) { arg->count++; continue; } if (arg->fn(sch, prio + 1, arg) < 0) { arg->stop = 1; break; } arg->count++; } } static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch, unsigned long cl) { struct prio_sched_data *q = qdisc_priv(sch); if (cl) return NULL; return &q->filter_list; } static const struct Qdisc_class_ops prio_class_ops = { .graft = prio_graft, .leaf = prio_leaf, .get = prio_get, .put = prio_put, .walk = prio_walk, .tcf_chain = prio_find_tcf, .bind_tcf = prio_bind, .unbind_tcf = prio_put, .dump = prio_dump_class, .dump_stats = prio_dump_class_stats, }; static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .next = NULL, .cl_ops = &prio_class_ops, .id = "prio", .priv_size = sizeof(struct prio_sched_data), .enqueue = prio_enqueue, .dequeue = prio_dequeue, .peek = prio_peek, .init = prio_init, .reset = prio_reset, .destroy = prio_destroy, .change = prio_tune, .dump = prio_dump, .owner = THIS_MODULE, }; static int __init prio_module_init(void) { return register_qdisc(&prio_qdisc_ops); } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL");