/* * Copyright (c) 2019 AVM GmbH . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include "avm_qos.h" #define CONFIG_NET_SCH_HW_BYPASS 1 static struct Qdisc_class_ops hw_sched_tbf_class_ops; static struct Qdisc_ops tbf_ops; #define SCH_HW_MAX_CLASSES 32 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0) #define QDISC_HAS_DROP 1 #endif struct hw_sched_priv { struct gnet_stats_basic_packed class_stats[SCH_HW_MAX_CLASSES]; struct Qdisc *leaf; struct Qdisc_ops *orig_ops; bool hw_disable; uint32_t latency_bytes; struct avm_qos_priv *hw_priv; }; static struct hw_sched_priv *qdisc_priv_priv(const struct Qdisc *qdisc) { uint32_t orig_priv_size = qdisc->ops->priv_size - sizeof(struct hw_sched_priv); return (struct hw_sched_priv *)(qdisc_priv((struct Qdisc *)qdisc) + orig_priv_size); } static int hw_sched_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old) { struct hw_sched_priv *priv = qdisc_priv_priv(sch); int rv; rv = priv->orig_ops->cl_ops->graft(sch, arg, new, old); if(rv) return rv; priv->leaf = priv->orig_ops->cl_ops->leaf(sch, arg); return 0; } #if IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS) static struct sk_buff *hw_sched_peek(struct Qdisc *sch) { struct hw_sched_priv *priv; priv = qdisc_priv_priv(sch); if(priv->hw_disable) return priv->orig_ops->peek(sch); return qdisc_peek_head(sch); } #if defined(QDISC_HAS_DROP) static unsigned int hw_sched_drop(struct Qdisc *sch) { struct hw_sched_priv *priv; priv = qdisc_priv_priv(sch); if(priv->hw_disable) return priv->orig_ops->drop(sch); return qdisc_queue_drop(sch); } #endif static void hw_sched_reset(struct Qdisc *sch) { struct hw_sched_priv *priv; priv = qdisc_priv_priv(sch); if(priv->hw_disable) return priv->orig_ops->reset(sch); return qdisc_reset_queue(sch); } static struct sk_buff *hw_sched_dequeue(struct Qdisc *sch) { struct sk_buff *skb; struct hw_sched_priv *priv; u32 classid; priv = qdisc_priv_priv(sch); if(priv->hw_disable) return priv->orig_ops->dequeue(sch); if(likely(priv->leaf)) { skb = priv->leaf->dequeue(priv->leaf); } else { pr_debug("no leaf found, dequeue from self\n"); skb = qdisc_dequeue_head(sch); } if(unlikely(!skb)) return NULL; sch->q.qlen--; qdisc_bstats_update(sch, skb); classid = TC_H_MIN(skb->priority); if(likely(classid && classid < SCH_HW_MAX_CLASSES)) { bstats_update(&priv->class_stats[TC_H_MIN(skb->priority)], skb); } return skb; } static int hw_sched_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **free) { int rv; struct hw_sched_priv *priv; priv = qdisc_priv_priv(sch); if(priv->hw_disable) return priv->orig_ops->enqueue(skb, sch, free); if(likely(priv->leaf)) { rv = qdisc_enqueue(skb, priv->leaf, free); } else { pr_debug("no leaf found, enqueue to self\n"); rv = qdisc_enqueue_tail(skb, sch); } if(rv != NET_XMIT_SUCCESS) { if(net_xmit_drop_count(rv)) sch->qstats.drops++; return rv; } sch->q.qlen++; return rv; } #endif static void hw_sched_tbf_destroy(struct Qdisc *sch) { struct hw_sched_priv *priv = qdisc_priv_priv(sch); if(priv->hw_disable == false) { if(sch->parent == TC_H_ROOT) { avm_qos_reset_port_shaper(sch->dev_queue->dev); } else { avm_qos_reset_prio_shaper(sch->dev_queue->dev, TC_H_MIN(sch->parent)); } avm_qos_free(priv->hw_priv); } tbf_ops.destroy(sch); } static __u32 hw_sched_get_root_qlen(struct Qdisc *q) { struct Qdisc *root; struct hw_sched_priv *priv; root = q->dev_queue->dev->qdisc; priv = qdisc_priv_priv(root); return priv->latency_bytes; } static __u32 hw_sched_get_class_handle(struct Qdisc *q, unsigned long arg) { struct tcmsg *tcm; struct sk_buff *skb; uint32_t handle; handle = 0; if(q->ops->cl_ops->dump == NULL) return handle; /* fake a nl message to dump class information to */ skb = alloc_skb(sizeof(*tcm), GFP_KERNEL); if(!skb) return 0; tcm = (void *)skb_put(skb, sizeof(*tcm)); memset(tcm, 0, sizeof(*tcm)); q->ops->cl_ops->dump(q, arg, skb, tcm); handle = tcm->tcm_handle; dev_kfree_skb(skb); return handle; } static bool hw_sched_qdisc_is_compatible(const struct Qdisc *q) { return (!strcmp(q->ops->id, "llq") || !strcmp(q->ops->id, "tbf")); } static int hw_sched_propagate_walker_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) { uint32_t handle; handle = hw_sched_get_class_handle(q, cl); pr_debug("%d: class handle: %08x\n", w->count, handle); avm_qos_set_queue_len(q->dev_queue->dev, handle, hw_sched_get_root_qlen(q)); return 1; } static int hw_sched_tbf_propagate(struct Qdisc *sch, struct nlattr *opt) { struct Qdisc *q; struct qdisc_walker w; int bucket; BUG_ON(sch->parent != TC_H_ROOT); w.fn = hw_sched_propagate_walker_fn; pr_debug("start propagation\n"); hash_for_each_rcu (qdisc_dev(sch)->qdisc_hash, bucket, q, hash) { if (hw_sched_qdisc_is_compatible(q)) { pr_debug("start walking %08x\n", q->handle); w.stop = w.skip = w.count = 0; q->ops->cl_ops->walk(q, &w); } else { pr_debug("skip walking %08x\n", q->handle); } } return 0; } static int hw_sched_tbf_set(struct Qdisc *sch, struct nlattr *opt, bool init) { int err; struct nlattr *tb[TCA_TBF_PTAB + 1]; struct tc_tbf_qopt *qopt; struct qdisc_rate_table *rtab; struct qdisc_rate_table *ptab; int size, peak_size, n; struct hw_sched_priv *priv; ptab = NULL; priv = qdisc_priv_priv(sch); err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, NULL); if(err < 0) return err; err = -EINVAL; if(tb[TCA_TBF_PARMS] == NULL) return err; qopt = nla_data(tb[TCA_TBF_PARMS]); rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); if(rtab == NULL) goto done; if(qopt->peakrate.rate) { if(qopt->peakrate.rate > qopt->rate.rate) ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); if(ptab == NULL) goto done; } for(n = 0; n < 256; n++) if(rtab->data[n] > qopt->buffer) break; size = (n << qopt->rate.cell_log) - 1; if(ptab) { for(n = 0; n < 256; n++) if(ptab->data[n] > qopt->mtu) break; peak_size = (n << qopt->peakrate.cell_log) - 1; } /* do not tolerate 100ms of latency or above */ if(qopt->limit / (rtab->rate.rate / 1000) >= 100) { priv->latency_bytes = (rtab->rate.rate / 1000) * 5; pr_info("sch_hw: adjust excessive latency to ~5ms (%u Bytes)\n", priv->latency_bytes); } else { priv->latency_bytes = qopt->limit; } if(sch->parent == TC_H_ROOT) { struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab); err = avm_qos_set_port_shaper(sch->dev_queue->dev, ptab ? ptab->rate.rate : rtab->rate.rate, rtab->rate.rate, ptab ? peak_size : size, size, stab ? stab->szopts.overhead : 0, init, priv->hw_priv); hw_sched_tbf_propagate(sch, opt); } else { err = avm_qos_set_prio_shaper(sch->dev_queue->dev, TC_H_MIN(sch->parent), ptab ? ptab->rate.rate : 0, rtab->rate.rate, ptab ? peak_size : 0, size, init, priv->hw_priv); } done: if(rtab) qdisc_put_rtab(rtab); if(ptab) qdisc_put_rtab(ptab); return err; } static int hw_sched_tbf_change(struct Qdisc *sch, struct nlattr *opt) { int err; err = tbf_ops.change(sch, opt); if(err < 0) return err; if(qdisc_priv_priv(sch)->hw_disable) return err; pr_debug("changing tbf filter\n"); return hw_sched_tbf_set(sch, opt, false); } static int hw_sched_tbf_init(struct Qdisc *sch, struct nlattr *opt) { struct hw_sched_priv *priv; int err; err = tbf_ops.init(sch, opt); if(err < 0) return err; memcpy(&hw_sched_tbf_class_ops, tbf_ops.cl_ops, sizeof(hw_sched_tbf_class_ops)); if(IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS)) { hw_sched_tbf_class_ops.graft = hw_sched_graft; } priv = qdisc_priv_priv(sch); memset(priv, 0, sizeof(*priv)); priv->orig_ops = &tbf_ops; if(!avm_qos_netdev_supported(sch->dev_queue->dev)) goto no_hw; priv->hw_priv = avm_qos_alloc(); if (!priv->hw_priv) goto no_hw; pr_debug("init tbf filter\n"); return hw_sched_tbf_set(sch, opt, true); no_hw: priv->hw_disable = true; return err; } #if IS_ENABLED(CONFIG_AVM_PA_SCH_LLQ) static struct Qdisc_class_ops hw_sched_llq_class_ops; static struct Qdisc_ops llq_ops; static u32 _hw_sched_llq_get_classid(struct Qdisc *sch, unsigned long cl) { static struct sk_buff skb_dummy = { 0 }; struct tcmsg tcm; sch->ops->cl_ops->dump(sch, cl, &skb_dummy, &tcm); BUG_ON(TC_H_MAJ(sch->handle) != TC_H_MAJ(tcm.tcm_handle)); return tcm.tcm_handle; } static int hw_sched_llq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { u32 classid; struct avm_qos_stats avm_stats; struct gnet_stats_basic_packed gnet_stats; struct hw_sched_priv *priv; priv = qdisc_priv_priv(sch); if (priv->hw_disable) { return priv->orig_ops->cl_ops->dump_stats(sch, cl, d); } classid = _hw_sched_llq_get_classid(sch, cl); BUG_ON(TC_H_MIN(classid) >= SCH_HW_MAX_CLASSES); if(!avm_qos_get_prio_stats(sch->dev_queue->dev, classid, &avm_stats)) { gnet_stats.bytes = avm_stats.valid_bytes ? avm_stats.bytes : 0; gnet_stats.packets = avm_stats.valid_packets ? avm_stats.packets : 0; } else { /* SW counter fallback */ gnet_stats = priv->class_stats[TC_H_MIN(classid)]; } if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &gnet_stats) < 0) { /* error handling seems broken, so just throw a bug here */ BUG(); } else { return 0; } } static int hw_sched_llq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_LLQ_MAX + 1]; struct tc_llq_copt *params; int err = -EINVAL; struct hw_sched_priv *priv; priv = qdisc_priv_priv(sch); /* call original SW-implementation*/ err = llq_ops.cl_ops->change(sch, classid, parentid, tca, arg); if(priv->hw_disable || err) return err; BUG_ON(TC_H_MIN(classid) >= SCH_HW_MAX_CLASSES); if(opt == NULL || nla_parse_nested(tb, TCA_LLQ_MAX, opt, NULL)) return -EINVAL; if(!tb[TCA_LLQ_OPTIONS]) return -EINVAL; params = nla_data(tb[TCA_LLQ_OPTIONS]); /* TODO identify non-effective weights properly */ avm_qos_add_hw_queue(sch->dev_queue->dev, classid, params->priority, params->weight != 1 ? params->weight : 0); { uint32_t qlen = hw_sched_get_root_qlen(sch); if(qlen) { avm_qos_set_queue_len(sch->dev_queue->dev, classid, qlen); } } return err; } static void hw_sched_llq_destroy(struct Qdisc *sch) { struct net_device *netdev; if(qdisc_priv_priv(sch)->hw_disable == false) { netdev = sch->dev_queue->dev; avm_qos_flush_hw_queues(netdev); } llq_ops.destroy(sch); } static int hw_sched_llq_init(struct Qdisc *sch, struct nlattr *opt) { struct hw_sched_priv *priv; bool hw_disable; struct tc_llq_qopt *qopt = nla_data(opt); pr_debug("new qdisc maxq=%d minq=%d defaultclass=%u\n", qopt->maxq, qopt->minq, qopt->defaultclass); hw_disable = !avm_qos_netdev_supported(sch->dev_queue->dev); memcpy(&hw_sched_llq_class_ops, llq_ops.cl_ops, sizeof(hw_sched_llq_class_ops)); /* don't tamper with class or platform stuff if hw_disable */ if(hw_disable == false) { avm_qos_set_default_queue(sch->dev_queue->dev, qopt->defaultclass); } hw_sched_llq_class_ops.change = hw_sched_llq_change_class; if(IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS)) { /* fall back to generic stats */ hw_sched_llq_class_ops.dump_stats = hw_sched_llq_dump_class_stats; hw_sched_llq_class_ops.graft = hw_sched_graft; } priv = qdisc_priv_priv(sch); memset(priv, 0, sizeof(*priv)); priv->orig_ops = &llq_ops; priv->hw_disable = hw_disable; BUG_ON(llq_ops.init == NULL); llq_ops.init(sch, opt); return 0; } static struct Qdisc_ops hw_sched_llq_ops = { .cl_ops = &hw_sched_llq_class_ops, .id = "llq", /* mimic llq in hardware */ .init = hw_sched_llq_init, .destroy = hw_sched_llq_destroy, #if IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS) /* fold original discipline to save performance */ .enqueue = hw_sched_enqueue, .dequeue = hw_sched_dequeue, .peek = hw_sched_peek, #if defined(QDISC_HAS_DROP) .drop = hw_sched_drop, #endif .reset = hw_sched_reset, #endif .priv_size = sizeof(struct hw_sched_priv), }; #endif // CONFIG_AVM_PA_SCH_LLQ static struct Qdisc_ops hw_sched_tbf_ops = { .cl_ops = &hw_sched_tbf_class_ops, .id = "tbf", /* mimic tbf in hardware */ .destroy = hw_sched_tbf_destroy, .change = hw_sched_tbf_change, .init = hw_sched_tbf_init, #if IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS) /* fold original discipline to save performance */ .enqueue = hw_sched_enqueue, .dequeue = hw_sched_dequeue, .peek = hw_sched_peek, #if defined(QDISC_HAS_DROP) .drop = hw_sched_drop, #endif .reset = hw_sched_reset, #endif .priv_size = sizeof(struct hw_sched_priv), }; int hook_qdisc_ops(struct Qdisc_ops *sw_ops, const struct Qdisc_ops *hw_ops) { struct nlattr *kind; struct Qdisc_ops *ops; pr_debug("hooking qdisc ops for sch_%s... ", hw_ops->id); kind = kmalloc(strlen(hw_ops->id) + 1 + NLA_HDRLEN, GFP_KERNEL); if(!kind) return -ENOMEM; kind->nla_len = strlen(hw_ops->id) + 1 + NLA_HDRLEN; kind->nla_type = NLA_NUL_STRING; strcpy(nla_data(kind), hw_ops->id); ops = qdisc_lookup_ops(kind); #ifdef CONFIG_MODULES if(!ops) request_module("sch_%s", hw_ops->id); ops = qdisc_lookup_ops(kind); #endif kfree(kind); if(!ops) { pr_debug("not found\n"); return -ENOENT; } memcpy(sw_ops, ops, sizeof(*ops)); ops->priv_size += hw_ops->priv_size; #define QDISC_OP_CHECK_AND_REPLACE(name) \ do { \ if(hw_ops->name) ops->name = hw_ops->name; \ } while(0) QDISC_OP_CHECK_AND_REPLACE(cl_ops); QDISC_OP_CHECK_AND_REPLACE(enqueue); QDISC_OP_CHECK_AND_REPLACE(dequeue); QDISC_OP_CHECK_AND_REPLACE(peek); #if defined(QDISC_HAS_DROP) QDISC_OP_CHECK_AND_REPLACE(drop); #endif QDISC_OP_CHECK_AND_REPLACE(init); QDISC_OP_CHECK_AND_REPLACE(reset); QDISC_OP_CHECK_AND_REPLACE(destroy); QDISC_OP_CHECK_AND_REPLACE(change); QDISC_OP_CHECK_AND_REPLACE(attach); QDISC_OP_CHECK_AND_REPLACE(dump); QDISC_OP_CHECK_AND_REPLACE(dump_stats); #undef QDISC_OP_CHECK_AND_REPLACE pr_debug("done\n"); return 0; } static int __init hw_sched_module_init(void) { int rv = 0; #if IS_ENABLED(CONFIG_AVM_PA_SCH_LLQ) rv = hook_qdisc_ops(&llq_ops, &hw_sched_llq_ops); #endif rv |= hook_qdisc_ops(&tbf_ops, &hw_sched_tbf_ops); return rv; } static void __exit hw_sched_module_exit(void) { /* TODO */ BUG(); } module_init(hw_sched_module_init) module_exit(hw_sched_module_exit)