/*
 * Copyright (c) 2019 AVM GmbH <info@avm.de>.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#include <linux/kconfig.h>
#include <linux/version.h>
#include <linux/kmod.h>
#include <linux/hashtable.h>
#include <net/pkt_sched.h>
#include <linux/version.h>

#include "avm_qos.h"

#define CONFIG_NET_SCH_HW_BYPASS 1

static struct Qdisc_class_ops hw_sched_tbf_class_ops;
static struct Qdisc_ops tbf_ops;
#define SCH_HW_MAX_CLASSES 32

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
#define QDISC_HAS_DROP 1
#endif

struct hw_sched_priv {
	struct gnet_stats_basic_packed class_stats[SCH_HW_MAX_CLASSES];
	struct Qdisc *leaf;
	struct Qdisc_ops *orig_ops;
	bool hw_disable;
	uint32_t latency_bytes;
	struct avm_qos_priv *hw_priv;
};

static struct hw_sched_priv *qdisc_priv_priv(const struct Qdisc *qdisc)
{
	uint32_t orig_priv_size =
	 qdisc->ops->priv_size - sizeof(struct hw_sched_priv);
	return (struct hw_sched_priv *)(qdisc_priv((struct Qdisc *)qdisc) +
	                                orig_priv_size);
}

static int hw_sched_graft(struct Qdisc *sch,
                          unsigned long arg,
                          struct Qdisc *new,
                          struct Qdisc **old)
{
	struct hw_sched_priv *priv = qdisc_priv_priv(sch);
	int rv;

	rv = priv->orig_ops->cl_ops->graft(sch, arg, new, old);
	if(rv) return rv;

	priv->leaf = priv->orig_ops->cl_ops->leaf(sch, arg);

	return 0;
}

#if IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS)
static struct sk_buff *hw_sched_peek(struct Qdisc *sch)
{
	struct hw_sched_priv *priv;
	priv = qdisc_priv_priv(sch);
	if(priv->hw_disable) return priv->orig_ops->peek(sch);
	return qdisc_peek_head(sch);
}

#if defined(QDISC_HAS_DROP)
static unsigned int hw_sched_drop(struct Qdisc *sch)
{
	struct hw_sched_priv *priv;
	priv = qdisc_priv_priv(sch);
	if(priv->hw_disable) return priv->orig_ops->drop(sch);
	return qdisc_queue_drop(sch);
}
#endif

static void hw_sched_reset(struct Qdisc *sch)
{
	struct hw_sched_priv *priv;
	priv = qdisc_priv_priv(sch);
	if(priv->hw_disable) return priv->orig_ops->reset(sch);
	return qdisc_reset_queue(sch);
}

static struct sk_buff *hw_sched_dequeue(struct Qdisc *sch)
{
	struct sk_buff *skb;
	struct hw_sched_priv *priv;
	u32 classid;
	priv = qdisc_priv_priv(sch);

	if(priv->hw_disable) return priv->orig_ops->dequeue(sch);

	if(likely(priv->leaf)) {
		skb = priv->leaf->dequeue(priv->leaf);
	} else {
		pr_debug("no leaf found, dequeue from self\n");
		skb = qdisc_dequeue_head(sch);
	}
	if(unlikely(!skb)) return NULL;

	sch->q.qlen--;
	qdisc_bstats_update(sch, skb);
	classid = TC_H_MIN(skb->priority);
	if(likely(classid && classid < SCH_HW_MAX_CLASSES)) {
		bstats_update(&priv->class_stats[TC_H_MIN(skb->priority)], skb);
	}

	return skb;
}

static int hw_sched_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **free)
{
	int rv;
	struct hw_sched_priv *priv;
	priv = qdisc_priv_priv(sch);

	if(priv->hw_disable) return priv->orig_ops->enqueue(skb, sch, free);

	if(likely(priv->leaf)) {
		rv = qdisc_enqueue(skb, priv->leaf, free);
	} else {
		pr_debug("no leaf found, enqueue to self\n");
		rv = qdisc_enqueue_tail(skb, sch);
	}
	if(rv != NET_XMIT_SUCCESS) {
		if(net_xmit_drop_count(rv)) sch->qstats.drops++;
		return rv;
	}

	sch->q.qlen++;
	return rv;
}
#endif


static void hw_sched_tbf_destroy(struct Qdisc *sch)
{
	struct hw_sched_priv *priv = qdisc_priv_priv(sch);

	if(priv->hw_disable == false) {
		if(sch->parent == TC_H_ROOT) {
			avm_qos_reset_port_shaper(sch->dev_queue->dev);
		} else {
			avm_qos_reset_prio_shaper(sch->dev_queue->dev,
			                          TC_H_MIN(sch->parent));
		}

		avm_qos_free(priv->hw_priv);
	}
	tbf_ops.destroy(sch);
}

static __u32 hw_sched_get_root_qlen(struct Qdisc *q)
{
	struct Qdisc *root;
	struct hw_sched_priv *priv;

	root = q->dev_queue->dev->qdisc;
	priv = qdisc_priv_priv(root);

	return priv->latency_bytes;
}

static __u32 hw_sched_get_class_handle(struct Qdisc *q, unsigned long arg)
{
	struct tcmsg *tcm;
	struct sk_buff *skb;
	uint32_t handle;

	handle = 0;
	if(q->ops->cl_ops->dump == NULL) return handle;

	/* fake a nl message to dump class information to */
	skb = alloc_skb(sizeof(*tcm), GFP_KERNEL);
	if(!skb) return 0;
	tcm = (void *)skb_put(skb, sizeof(*tcm));
	memset(tcm, 0, sizeof(*tcm));

	q->ops->cl_ops->dump(q, arg, skb, tcm);
	handle = tcm->tcm_handle;

	dev_kfree_skb(skb);

	return handle;
}

static bool hw_sched_qdisc_is_compatible(const struct Qdisc *q)
{
	return (!strcmp(q->ops->id, "llq") || !strcmp(q->ops->id, "tbf"));
}

static int hw_sched_propagate_walker_fn(struct Qdisc *q,
                                        unsigned long cl,
                                        struct qdisc_walker *w)
{
	uint32_t handle;
	handle = hw_sched_get_class_handle(q, cl);
	pr_debug("%d: class handle: %08x\n", w->count, handle);
	avm_qos_set_queue_len(q->dev_queue->dev, handle, hw_sched_get_root_qlen(q));

	return 1;
}

static int hw_sched_tbf_propagate(struct Qdisc *sch, struct nlattr *opt)
{
	struct Qdisc *q;
	struct qdisc_walker w;
	int bucket;

	BUG_ON(sch->parent != TC_H_ROOT);

	w.fn = hw_sched_propagate_walker_fn;

	pr_debug("start propagation\n");

	hash_for_each_rcu (qdisc_dev(sch)->qdisc_hash, bucket, q, hash) {
		if (hw_sched_qdisc_is_compatible(q)) {
			pr_debug("start walking %08x\n", q->handle);
			w.stop = w.skip = w.count = 0;
			q->ops->cl_ops->walk(q, &w);
		} else {
			pr_debug("skip walking %08x\n", q->handle);
		}
	}

	return 0;
}

static int hw_sched_tbf_set(struct Qdisc *sch, struct nlattr *opt, bool init)
{
	int err;
	struct nlattr *tb[TCA_TBF_PTAB + 1];
	struct tc_tbf_qopt *qopt;
	struct qdisc_rate_table *rtab;
	struct qdisc_rate_table *ptab;
	int size, peak_size, n;
	struct hw_sched_priv *priv;

	ptab = NULL;

	priv = qdisc_priv_priv(sch);

	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, NULL);
	if(err < 0) return err;

	err = -EINVAL;
	if(tb[TCA_TBF_PARMS] == NULL) return err;

	qopt = nla_data(tb[TCA_TBF_PARMS]);
	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
	if(rtab == NULL) goto done;

	if(qopt->peakrate.rate) {
		if(qopt->peakrate.rate > qopt->rate.rate)
			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
		if(ptab == NULL) goto done;
	}

	for(n = 0; n < 256; n++)
		if(rtab->data[n] > qopt->buffer) break;
	size = (n << qopt->rate.cell_log) - 1;
	if(ptab) {
		for(n = 0; n < 256; n++)
			if(ptab->data[n] > qopt->mtu) break;
		peak_size = (n << qopt->peakrate.cell_log) - 1;
	}

	/* do not tolerate 100ms of latency or above */
	if(qopt->limit / (rtab->rate.rate / 1000) >= 100) {
		priv->latency_bytes = (rtab->rate.rate / 1000) * 5;
		pr_info("sch_hw: adjust excessive latency to ~5ms (%u Bytes)\n",
		        priv->latency_bytes);
	} else {
		priv->latency_bytes = qopt->limit;
	}

	if(sch->parent == TC_H_ROOT) {
		struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
		err = avm_qos_set_port_shaper(sch->dev_queue->dev,
		                              ptab ? ptab->rate.rate : rtab->rate.rate,
		                              rtab->rate.rate,
		                              ptab ? peak_size : size,
		                              size,
		                              stab ? stab->szopts.overhead : 0,
		                              init,
		                              priv->hw_priv);

		hw_sched_tbf_propagate(sch, opt);
	} else {
		err = avm_qos_set_prio_shaper(sch->dev_queue->dev,
		                              TC_H_MIN(sch->parent),
		                              ptab ? ptab->rate.rate : 0,
		                              rtab->rate.rate,
		                              ptab ? peak_size : 0,
		                              size,
		                              init,
		                              priv->hw_priv);
	}

done:
	if(rtab) qdisc_put_rtab(rtab);
	if(ptab) qdisc_put_rtab(ptab);

	return err;
}

static int hw_sched_tbf_change(struct Qdisc *sch, struct nlattr *opt)
{
	int err;
	err = tbf_ops.change(sch, opt);
	if(err < 0) return err;

	if(qdisc_priv_priv(sch)->hw_disable) return err;

	pr_debug("changing tbf filter\n");
	return hw_sched_tbf_set(sch, opt, false);
}

static int hw_sched_tbf_init(struct Qdisc *sch, struct nlattr *opt)
{
	struct hw_sched_priv *priv;
	int err;
	err = tbf_ops.init(sch, opt);
	if(err < 0) return err;

	memcpy(&hw_sched_tbf_class_ops,
	       tbf_ops.cl_ops,
	       sizeof(hw_sched_tbf_class_ops));

	if(IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS)) {
		hw_sched_tbf_class_ops.graft = hw_sched_graft;
	}

	priv = qdisc_priv_priv(sch);
	memset(priv, 0, sizeof(*priv));
	priv->orig_ops = &tbf_ops;

	if(!avm_qos_netdev_supported(sch->dev_queue->dev))
		goto no_hw;

	priv->hw_priv = avm_qos_alloc();
	if (!priv->hw_priv)
		goto no_hw;

	pr_debug("init tbf filter\n");

	return hw_sched_tbf_set(sch, opt, true);
no_hw:
	priv->hw_disable = true;
	return err;
}

#if IS_ENABLED(CONFIG_AVM_PA_SCH_LLQ)
static struct Qdisc_class_ops hw_sched_llq_class_ops;
static struct Qdisc_ops llq_ops;

static u32 _hw_sched_llq_get_classid(struct Qdisc *sch, unsigned long cl)
{
	static struct sk_buff skb_dummy = { 0 };
	struct tcmsg tcm;

	sch->ops->cl_ops->dump(sch, cl, &skb_dummy, &tcm);
	BUG_ON(TC_H_MAJ(sch->handle) != TC_H_MAJ(tcm.tcm_handle));

	return tcm.tcm_handle;
}

static int hw_sched_llq_dump_class_stats(struct Qdisc *sch,
                                         unsigned long cl,
                                         struct gnet_dump *d)
{
	u32 classid;
	struct avm_qos_stats avm_stats;
	struct gnet_stats_basic_packed gnet_stats;
	struct hw_sched_priv *priv;

	priv = qdisc_priv_priv(sch);

	if (priv->hw_disable) {
		return priv->orig_ops->cl_ops->dump_stats(sch, cl, d);
	}

	classid = _hw_sched_llq_get_classid(sch, cl);
	BUG_ON(TC_H_MIN(classid) >= SCH_HW_MAX_CLASSES);

	if(!avm_qos_get_prio_stats(sch->dev_queue->dev, classid, &avm_stats)) {
		gnet_stats.bytes = avm_stats.valid_bytes ? avm_stats.bytes : 0;
		gnet_stats.packets = avm_stats.valid_packets ? avm_stats.packets : 0;
	} else {
		/* SW counter fallback */
		gnet_stats = priv->class_stats[TC_H_MIN(classid)];
	}

	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL,
				  &gnet_stats) < 0) {
		/* error handling seems broken, so just throw a bug here */
		BUG();
	} else {
		return 0;
	}
}

static int hw_sched_llq_change_class(struct Qdisc *sch,
                                     u32 classid,
                                     u32 parentid,
                                     struct nlattr **tca,
                                     unsigned long *arg)
{
	struct nlattr *opt = tca[TCA_OPTIONS];
	struct nlattr *tb[TCA_LLQ_MAX + 1];
	struct tc_llq_copt *params;
	int err = -EINVAL;
	struct hw_sched_priv *priv;

	priv = qdisc_priv_priv(sch);

	/* call original SW-implementation*/
	err = llq_ops.cl_ops->change(sch, classid, parentid, tca, arg);
	if(priv->hw_disable || err) return err;

	BUG_ON(TC_H_MIN(classid) >= SCH_HW_MAX_CLASSES);

	if(opt == NULL || nla_parse_nested(tb, TCA_LLQ_MAX, opt, NULL))
		return -EINVAL;

	if(!tb[TCA_LLQ_OPTIONS]) return -EINVAL;

	params = nla_data(tb[TCA_LLQ_OPTIONS]);

	/* TODO identify non-effective weights properly */
	avm_qos_add_hw_queue(sch->dev_queue->dev,
	                     classid,
	                     params->priority,
	                     params->weight != 1 ? params->weight : 0);
	{
		uint32_t qlen = hw_sched_get_root_qlen(sch);
		if(qlen) {
			avm_qos_set_queue_len(sch->dev_queue->dev, classid, qlen);
		}
	}


	return err;
}

static void hw_sched_llq_destroy(struct Qdisc *sch)
{
	struct net_device *netdev;

	if(qdisc_priv_priv(sch)->hw_disable == false) {
		netdev = sch->dev_queue->dev;
		avm_qos_flush_hw_queues(netdev);
	}

	llq_ops.destroy(sch);
}

static int hw_sched_llq_init(struct Qdisc *sch, struct nlattr *opt)
{
	struct hw_sched_priv *priv;
	bool hw_disable;
	struct tc_llq_qopt *qopt = nla_data(opt);
	pr_debug("new qdisc maxq=%d minq=%d defaultclass=%u\n",
	         qopt->maxq,
	         qopt->minq,
	         qopt->defaultclass);

	hw_disable = !avm_qos_netdev_supported(sch->dev_queue->dev);


	memcpy(&hw_sched_llq_class_ops,
	       llq_ops.cl_ops,
	       sizeof(hw_sched_llq_class_ops));

	/* don't tamper with class or platform stuff if hw_disable */
	if(hw_disable == false) {
		avm_qos_set_default_queue(sch->dev_queue->dev, qopt->defaultclass);
	}

	hw_sched_llq_class_ops.change = hw_sched_llq_change_class;

	if(IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS)) {
		/* fall back to generic stats */
		hw_sched_llq_class_ops.dump_stats = hw_sched_llq_dump_class_stats;
		hw_sched_llq_class_ops.graft = hw_sched_graft;
	}

	priv = qdisc_priv_priv(sch);
	memset(priv, 0, sizeof(*priv));
	priv->orig_ops = &llq_ops;
	priv->hw_disable = hw_disable;

	BUG_ON(llq_ops.init == NULL);
	llq_ops.init(sch, opt);

	return 0;
}

static struct Qdisc_ops hw_sched_llq_ops = {
	.cl_ops = &hw_sched_llq_class_ops,
	.id = "llq", /* mimic llq in hardware */
	.init = hw_sched_llq_init,
	.destroy = hw_sched_llq_destroy,

#if IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS)
	/* fold original discipline to save performance */
	.enqueue = hw_sched_enqueue,
	.dequeue = hw_sched_dequeue,
	.peek = hw_sched_peek,
#if defined(QDISC_HAS_DROP)
	.drop = hw_sched_drop,
#endif
	.reset = hw_sched_reset,
#endif
	.priv_size = sizeof(struct hw_sched_priv),
};
#endif // CONFIG_AVM_PA_SCH_LLQ

static struct Qdisc_ops hw_sched_tbf_ops = {
	.cl_ops = &hw_sched_tbf_class_ops,
	.id = "tbf", /* mimic tbf in hardware */
	.destroy = hw_sched_tbf_destroy,
	.change = hw_sched_tbf_change,
	.init = hw_sched_tbf_init,

#if IS_ENABLED(CONFIG_NET_SCH_HW_BYPASS)
	/* fold original discipline to save performance */
	.enqueue = hw_sched_enqueue,
	.dequeue = hw_sched_dequeue,
	.peek = hw_sched_peek,
#if defined(QDISC_HAS_DROP)
	.drop = hw_sched_drop,
#endif
	.reset = hw_sched_reset,
#endif
	.priv_size = sizeof(struct hw_sched_priv),
};

int hook_qdisc_ops(struct Qdisc_ops *sw_ops, const struct Qdisc_ops *hw_ops)
{
	struct nlattr *kind;
	struct Qdisc_ops *ops;

	pr_debug("hooking qdisc ops for sch_%s... ", hw_ops->id);
	kind = kmalloc(strlen(hw_ops->id) + 1 + NLA_HDRLEN, GFP_KERNEL);
	if(!kind) return -ENOMEM;
	kind->nla_len = strlen(hw_ops->id) + 1 + NLA_HDRLEN;
	kind->nla_type = NLA_NUL_STRING;
	strcpy(nla_data(kind), hw_ops->id);
	ops = qdisc_lookup_ops(kind);
#ifdef CONFIG_MODULES
	if(!ops) request_module("sch_%s", hw_ops->id);
	ops = qdisc_lookup_ops(kind);
#endif
	kfree(kind);
	if(!ops) {
		pr_debug("not found\n");
		return -ENOENT;
	}

	memcpy(sw_ops, ops, sizeof(*ops));

	ops->priv_size += hw_ops->priv_size;
#define QDISC_OP_CHECK_AND_REPLACE(name)           \
	do {                                           \
		if(hw_ops->name) ops->name = hw_ops->name; \
	} while(0)

	QDISC_OP_CHECK_AND_REPLACE(cl_ops);
	QDISC_OP_CHECK_AND_REPLACE(enqueue);
	QDISC_OP_CHECK_AND_REPLACE(dequeue);
	QDISC_OP_CHECK_AND_REPLACE(peek);
#if defined(QDISC_HAS_DROP)
	QDISC_OP_CHECK_AND_REPLACE(drop);
#endif
	QDISC_OP_CHECK_AND_REPLACE(init);
	QDISC_OP_CHECK_AND_REPLACE(reset);
	QDISC_OP_CHECK_AND_REPLACE(destroy);
	QDISC_OP_CHECK_AND_REPLACE(change);
	QDISC_OP_CHECK_AND_REPLACE(attach);
	QDISC_OP_CHECK_AND_REPLACE(dump);
	QDISC_OP_CHECK_AND_REPLACE(dump_stats);

#undef QDISC_OP_CHECK_AND_REPLACE
	pr_debug("done\n");

	return 0;
}

static int __init hw_sched_module_init(void)
{
	int rv = 0;
#if IS_ENABLED(CONFIG_AVM_PA_SCH_LLQ)
	rv = hook_qdisc_ops(&llq_ops, &hw_sched_llq_ops);
#endif
	rv |= hook_qdisc_ops(&tbf_ops, &hw_sched_tbf_ops);

	return rv;
}

static void __exit hw_sched_module_exit(void)
{
	/* TODO */
	BUG();
}

module_init(hw_sched_module_init)
module_exit(hw_sched_module_exit)