/* SPDX-License-Identifier: (BSD-2-Clause OR GPL-2.0-or-later) * * vim:set noexpandtab shiftwidth=8 softtabstop=8 fileencoding=utf-8: * * Layer 2 network upstream driver */ /* ------------------------------------------------------------------------ */ #define pr_fmt(fmt) "net_upstream: " fmt #include #include #include #include #include #include #include #include #include #include #include // struct file_operations #include // struct file_operations #ifdef CONFIG_AVM_PA #include #endif #include #include #include "net_upstream_ioctl.h" /* ------------------------------------------------------------------------ */ static struct us_glob { struct cchardevice *chardev; struct dentry *dbgfs; struct list_head open_devs; } glob; struct rx_action { struct rcu_head rcu; struct list_head list; enum { RX_ACT_MAC_PASSHTRU, MAX_RX_ACTION, } type; }; struct rx_mac_passthru { struct rx_action rxact; struct net_us_mac_passthru conf __aligned(2); }; struct tx_action { struct rcu_head rcu; struct list_head list; enum { TX_ACT_VLAN_PRIO, MAX_TX_ACTION, } type; }; struct tx_vlan_prio_map { struct tx_action txact; struct net_us_vlan_prio_map conf; }; struct upstream_dev { struct list_head list; struct net_device *ndev; int master_ifindex; struct net_device *master; struct notifier_block notify; spinlock_t actions_lock; struct list_head rx_actions; struct list_head tx_actions; int minor; }; #define us_netdev_priv(ndev) ((struct upstream_dev **) (netdev_priv(ndev))) static inline int is_passhtru(struct sk_buff *skb, struct rx_mac_passthru *rx_mac) { u8 *src = eth_hdr(skb)->h_source; u8 *dst = eth_hdr(skb)->h_dest; if (rx_mac->conf.check_dest) return ether_addr_equal(rx_mac->conf.mac, dst) ? 1 : 0; else return ether_addr_equal(rx_mac->conf.mac, src) ? 1 : 0; } static int apply_rx_actions(struct sk_buff *skb, struct upstream_dev *dev) { int ret = 0; struct rx_action *rxact; rcu_read_lock(); if (list_empty(&dev->rx_actions)) goto unlock; list_for_each_entry_rcu(rxact, &dev->rx_actions, list) { switch (rxact->type) { case RX_ACT_MAC_PASSHTRU: if (!ret) ret = is_passhtru(skb, (struct rx_mac_passthru *) rxact); break; case MAX_RX_ACTION: BUG(); } } unlock: rcu_read_unlock(); return ret; } static rx_handler_result_t us_rx(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct upstream_dev *dev = rcu_dereference(skb->dev->rx_handler_data); int ret; ret = apply_rx_actions(skb, dev); if (ret == 1) return RX_HANDLER_PASS; if (dev->ndev->flags & IFF_UP) { skb->dev = dev->ndev; skb->dev->stats.rx_packets++; skb->dev->stats.rx_bytes += skb->len + ETH_HLEN; return RX_HANDLER_ANOTHER; } skb->dev->stats.rx_dropped++; kfree_skb(skb); return RX_HANDLER_CONSUMED; } #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) || defined(AVM_DEV_QUEUE_XMIT_NIT_EXPORT) /* Based on dev_direct_xmit() from mainline dev.c, not found in 4.4 yet */ static int us_dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { struct net_device *dev = skb->dev; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; bool __maybe_unused again = false; if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 16, 0) skb = validate_xmit_skb_list(skb, dev, &again); #else skb = validate_xmit_skb_list(skb, dev); #endif if (unlikely(!skb)) goto drop; skb_set_queue_mapping(skb, queue_id); txq = skb_get_tx_queue(dev, skb); #ifdef CONFIG_AVM_PA avm_pa_dev_snoop_transmit(AVM_PA_DEVINFO(skb->dev), skb); #endif local_bh_disable(); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_drv_stopped(txq)) ret = netdev_start_xmit(skb, dev, txq, false); HARD_TX_UNLOCK(dev, txq); local_bh_enable(); return ret; drop: atomic_long_inc(&dev->tx_dropped); kfree_skb_list(skb); return NET_XMIT_DROP; } #endif static void apply_vlan_prio(struct sk_buff *skb, struct net_device *ndev, struct tx_vlan_prio_map *vlanact) { u16 vlan_id, vlan_prio; if (!skb_vlan_tagged(skb)) return; if (skb_vlan_tag_present(skb)) vlan_id = skb_vlan_tag_get_id(skb); else vlan_id = ntohs(vlan_eth_hdr(skb)->h_vlan_TCI) & VLAN_VID_MASK; if (vlan_id >= vlanact->conf.vlan_start && vlan_id <= vlanact->conf.vlan_end) { vlan_prio = vlanact->conf.priomap[skb->priority & 0x7]; vlan_id |= (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK; if (skb_vlan_tag_present(skb)) __vlan_hwaccel_put_tag(skb, skb->vlan_proto, vlan_id); else vlan_eth_hdr(skb)->h_vlan_TCI = htons(vlan_id); } } static void apply_tx_actions(struct sk_buff *skb, struct net_device *ndev) { struct upstream_dev *dev = *us_netdev_priv(ndev); struct tx_action *txact; rcu_read_lock(); if (list_empty(&dev->tx_actions)) goto unlock; list_for_each_entry_rcu(txact, &dev->tx_actions, list) { switch (txact->type) { case TX_ACT_VLAN_PRIO: apply_vlan_prio(skb, ndev, (struct tx_vlan_prio_map *) txact); break; case MAX_TX_ACTION: BUG(); } } unlock: rcu_read_unlock(); } static netdev_tx_t us_net_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct upstream_dev *dev = *us_netdev_priv(ndev); struct net_device __rcu *master = rcu_dereference(dev->master); if (!master) { net_err_ratelimited("%s: no master, dropping packets\n", ndev->name); kfree_skb_list(skb); ndev->stats.tx_dropped++; return NETDEV_TX_OK; } /* TODO: Future actions might want to filter/drop packets */ apply_tx_actions(skb, ndev); skb->dev = master; ndev->stats.tx_packets++; ndev->stats.tx_bytes += skb->len; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) || defined(AVM_DEV_QUEUE_XMIT_NIT_EXPORT) /* TODO: avoid this in fast path. */ dev_queue_xmit_nit(skb, skb->dev); /* TODO: pick some non-default queue, but based on what criterea? * * skb_get_queue_mapping() does not work here, is based on the queues * for the upstreams, which don't mean anything for the master device. */ return us_dev_direct_xmit(skb, 0); #else pr_warn_once("%s: dev_queue_xmit_nit() missing, calling dev_queue_xmit()!\n", __func__); return dev_queue_xmit(skb); #endif } static int us_net_open(struct net_device *ndev) { netif_tx_start_all_queues(ndev); return 0; } static int us_net_stop(struct net_device *ndev) { netif_tx_stop_all_queues(ndev); return 0; } #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) static int us_net_get_iflink(const struct net_device *ndev) { struct upstream_dev *dev = *us_netdev_priv(ndev); struct net_device __rcu *master = rcu_dereference(dev->master); return master ? master->ifindex : 0; } #endif static const struct net_device_ops us_ops = { .ndo_open = &us_net_open, .ndo_stop = &us_net_stop, #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) .ndo_get_iflink = &us_net_get_iflink, #endif .ndo_start_xmit = &us_net_start_xmit, .ndo_set_mac_address = eth_mac_addr, #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) .ndo_features_check = passthru_features_check, #endif }; /* Advertise many features, tx path does validate_xmit_skb_list() */ #define NET_US_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \ NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ NETIF_F_ALL_FCOE | NETIF_F_SCTP_CRC) #define VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX) #define IGN_FEATURES (NETIF_F_RXHASH | NETIF_F_NTUPLE) static void us_setup(struct net_device *ndev) { ether_setup(ndev); eth_hw_addr_random(ndev); ndev->netdev_ops = &us_ops; ndev->hw_features = NET_US_FEATURES | VLAN_FEATURES; ndev->vlan_features = NET_US_FEATURES; ndev->features = ndev->features | NETIF_F_LLTX; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) ndev->max_mtu = ETH_MAX_MTU; #endif } static int register_upstream_dev(struct upstream_dev *dev) { struct net_device *ndev; char name[IFNAMSIZ]; int ret; snprintf(name, sizeof(name), "net_upstream%d", dev->minor); #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0) ndev = alloc_netdev(sizeof(struct upstream_dev *), name, NET_NAME_UNKNOWN, &us_setup); #else ndev = alloc_netdev(sizeof(struct upstream_dev *), name, &us_setup); #endif if (!ndev) goto err; dev->ndev = ndev; *us_netdev_priv(ndev) = dev; ret = register_netdev(ndev); if (ret) goto err_alloc; return 0; err_alloc: free_netdev(dev->ndev); err: return -ENOMEM; } static void unregister_upstream_dev(struct upstream_dev *dev) { struct tx_action *txact, *txtmp; struct rx_action *rxact, *rxtmp; unregister_netdev(dev->ndev); free_netdev(dev->ndev); dev->ndev = NULL; dev->minor = -1; /* free rx and tx actions */ list_for_each_entry_safe(txact, txtmp, &dev->tx_actions, list) { list_del(&txact->list); kfree(txact); } list_for_each_entry_safe(rxact, rxtmp, &dev->rx_actions, list) { list_del(&rxact->list); kfree(rxact); } } static int us_open(struct inode *inp, struct file *fp) { struct upstream_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); int ret; if (!dev) return -ENOMEM; spin_lock_init(&dev->actions_lock); INIT_LIST_HEAD(&dev->rx_actions); INIT_LIST_HEAD(&dev->tx_actions); dev->minor = iminor(inp); ret = register_upstream_dev(dev); if (ret < 0) { kfree(dev); return ret; } list_add(&dev->list, &glob.open_devs); fp->private_data = dev; return 0; } static int us_release(struct inode *inp, struct file *fp) { struct upstream_dev *dev = (struct upstream_dev *) fp->private_data; list_del(&dev->list); fp->private_data = NULL; if (dev->master_ifindex) { /* Order is important, we will get final NETDEV_DOWN/_UNREGISTER events. */ unregister_netdevice_notifier(&dev->notify); dev->master_ifindex = 0; } #ifdef CONFIG_AVM_PA avm_pa_dev_unregister_sync(AVM_PA_DEVINFO(dev->ndev)); #endif unregister_upstream_dev(dev); kfree(dev); return 0; } #ifdef CONFIG_AVM_PA static void net_us_pa_xmit(void *arg, struct sk_buff *skb) { int rc; skb->dev = (struct net_device *) arg; rc = dev_queue_xmit(skb); if (!dev_xmit_complete(rc) && net_ratelimit()) { pr_err("%s(%s): xmit failure: %d\n", __func__, skb->dev->name, rc); } } static int net_us_pa_register(struct net_device *dev, struct net_device *master) { struct avm_pa_pid_cfg cfg = { 0 }; int ingress_pid_handle; snprintf(cfg.name, sizeof(cfg.name), "%s", dev->name); cfg.tx_func = net_us_pa_xmit; cfg.tx_arg = dev; cfg.framing = avm_pa_framing_dev; /* This prevents avm_pa pid change detection when packets are * observed on the master device first. */ ingress_pid_handle = AVM_PA_DEVINFO(master)->pid_handle; return avm_pa_dev_pidhandle_register_with_ingress(AVM_PA_DEVINFO(dev), 0, &cfg, ingress_pid_handle); } #endif static int net_us_notifier_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct upstream_dev *dev = container_of(notifier, struct upstream_dev, notify); #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) struct net_device *edev = (struct net_device *) ptr; #else struct net_device *edev = netdev_notifier_info_to_dev((const struct netdev_notifier_info *) ptr); #endif netdev_features_t feature_diff; /* If dev moves to different net namespace it's not our business. * We operate in init_net exclusively for the moment. * * Background: On FB5590, the fiber interface is also used to upload the * firmware image to the front-end (PRX). When this happens, the interface * is moved to a different namespace, thus removed from our view */ if (!net_eq(dev_net(dev->ndev), dev_net(edev))) return NOTIFY_DONE; if (dev->master_ifindex != edev->ifindex) return NOTIFY_DONE; switch (event) { case NETDEV_REGISTER: dev_hold(edev); /* We're just a proxy device. We must advertise most features (minus * IGN_FEATURES) of the master device. Otherwise we lose HW capabilities. */ feature_diff = dev->ndev->hw_features ^ edev->hw_features; feature_diff &= edev->hw_features & ~IGN_FEATURES; if (feature_diff) pr_alert("BUG: missing hw_features: %pNF\n", &feature_diff); rcu_assign_pointer(dev->master, edev); #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) dev->ndev->iflink = edev->ifindex; #endif if (netdev_rx_handler_register(dev->master, us_rx, dev)) { pr_err("Unable to register rx handler, already taken\n"); dev_put(dev->master); rcu_assign_pointer(dev->master, NULL); break; } #ifdef CONFIG_AVM_PA net_us_pa_register(dev->ndev, dev->master); #endif break; case NETDEV_UNREGISTER: BUG_ON(edev != dev->master); #ifdef CONFIG_AVM_PA avm_pa_dev_unregister_sync(AVM_PA_DEVINFO(dev->ndev)); #endif netdev_rx_handler_unregister(dev->master); #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) dev->ndev->iflink = 0; #endif rcu_assign_pointer(dev->master, NULL); dev_put(edev); break; case NETDEV_UP: netif_carrier_on(dev->ndev); break; case NETDEV_DOWN: netif_carrier_off(dev->ndev); break; } return NOTIFY_DONE; } static long us_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { struct upstream_dev *dev = (struct upstream_dev *) fp->private_data; long rc = -ENOSYS; struct rx_mac_passthru *m; struct tx_vlan_prio_map *v; struct rx_action *rxact, *rxtmp; struct tx_action *txact, *txtmp; struct net_device *ndev; struct net *net = current->nsproxy->net_ns; void __user *argp = (void __user *) arg; switch (cmd) { case NET_US_IOC_GET_IFINDEX: rc = put_user(dev->ndev->ifindex, (int32_t __user *) arg); break; case NET_US_IOC_CLR_MASTER: if (!dev->master_ifindex) { rc = -EIO; break; } /* Order is important, we will get final NETDEV_DOWN/_UNREGISTER events. */ unregister_netdevice_notifier(&dev->notify); dev->master_ifindex = 0; rc = 0; break; case NET_US_IOC_SET_MASTER: if (dev->master_ifindex) { rc = -EIO; break; } if (arg == 0 || arg > INT_MAX) { rc = -EINVAL; break; } /* Verify that device is valid in the threads namespace. Due to the * notifier mechanism it must be in the same namespace as the * net_upstream interface as well. */ ndev = dev_get_by_index(net, (int) arg); if (!ndev) { rc = -ENODEV; break; } /* Order is important, we will get initial NETDEV_UP/_REGISTER events. */ dev->master_ifindex = arg; dev->notify.notifier_call = net_us_notifier_event; register_netdevice_notifier(&dev->notify); dev_put(ndev); rc = 0; break; case NET_US_IOC_RXACT_CLEAR_ALL: spin_lock(&dev->actions_lock); list_for_each_entry_safe(rxact, rxtmp, &dev->rx_actions, list) { list_del_rcu(&rxact->list); kfree_rcu(rxact, rcu); } spin_unlock(&dev->actions_lock); rc = 0; break; case NET_US_IOC_RXACT_ADD_MAC_PASSTHRU: m = kmalloc(sizeof(*m), GFP_KERNEL); if (!m) { rc = -ENOMEM; break; } if (copy_from_user(&m->conf, argp, sizeof(m->conf))) { rc = -EFAULT; kfree(m); break; } m->rxact.type = RX_ACT_MAC_PASSHTRU; spin_lock(&dev->actions_lock); list_add_tail_rcu(&m->rxact.list, &dev->rx_actions); spin_unlock(&dev->actions_lock); rc = 0; break; case NET_US_IOC_TXACT_CLEAR_ALL: spin_lock(&dev->actions_lock); list_for_each_entry_safe(txact, txtmp, &dev->tx_actions, list) { list_del_rcu(&txact->list); kfree_rcu(txact, rcu); } spin_unlock(&dev->actions_lock); rc = 0; break; case NET_US_IOC_TXACT_ADD_VLANPRIO_MAP: v = kmalloc(sizeof(*v), GFP_KERNEL); if (!v) { rc = -ENOMEM; break; } if (copy_from_user(&v->conf, argp, sizeof(v->conf))) { rc = -EFAULT; kfree(v); break; } v->txact.type = TX_ACT_VLAN_PRIO; spin_lock(&dev->actions_lock); list_add_tail_rcu(&v->txact.list, &dev->tx_actions); spin_unlock(&dev->actions_lock); rc = 0; break; } return rc; } static struct file_operations us_fops = { .open = us_open, .release = us_release, .unlocked_ioctl = us_ioctl, }; static void show_rx_actions(struct seq_file *f, struct upstream_dev *dev, int dev_nr) { struct rx_action *rxact; int i; const char *action2str[] = { [RX_ACT_MAC_PASSHTRU] = "mac passthrough", [MAX_RX_ACTION] = "" }; if (list_empty(&dev->rx_actions)) { seq_printf(f, "\n"); return; } i = 0; seq_printf(f, "%d: rx_actions\n", dev_nr); list_for_each_entry(rxact, &dev->rx_actions, list) { struct rx_mac_passthru *m; seq_printf(f, "%d:%02d: %s\n", dev_nr, i, action2str[rxact->type]); switch (rxact->type) { case TX_ACT_VLAN_PRIO: m = (struct rx_mac_passthru *) rxact; seq_printf(f, "%d:%02d: %s mac: %pM\n", dev_nr, i, m->conf.check_dest ? "dest" : "src", m->conf.mac); break; default: break; } i++; } } static void show_tx_actions(struct seq_file *f, struct upstream_dev *dev, int dev_nr) { struct tx_action *txact; int i; const char *action2str[] = { [TX_ACT_VLAN_PRIO] = "vlanprio", [MAX_TX_ACTION] = "" }; if (list_empty(&dev->tx_actions)) { seq_printf(f, "\n"); return; } i = 0; seq_printf(f, "%d: tx_actions\n", dev_nr); list_for_each_entry(txact, &dev->tx_actions, list) { struct tx_vlan_prio_map *v; seq_printf(f, "%d:%02d: %s\n", dev_nr, i, action2str[txact->type]); switch (txact->type) { case TX_ACT_VLAN_PRIO: v = (struct tx_vlan_prio_map *) txact; seq_printf(f, "%d:%02d: vlan range [%d, %d]\n", dev_nr, i, v->conf.vlan_start, v->conf.vlan_end); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 0, v->conf.priomap[0]); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 1, v->conf.priomap[1]); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 2, v->conf.priomap[2]); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 3, v->conf.priomap[3]); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 4, v->conf.priomap[4]); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 5, v->conf.priomap[5]); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 6, v->conf.priomap[6]); seq_printf(f, "%d:%02d: %d:%d\n", dev_nr, i, 7, v->conf.priomap[7]); break; default: break; } i++; } } static int net_us_upstreams_show(struct seq_file *f, void *data) { struct upstream_dev *dev; struct net_device *ndev; int i = 0; list_for_each_entry(dev, &glob.open_devs, list) { ndev = dev->ndev; seq_printf(f, "%d: %s\n", i, ndev->name); seq_printf(f, "%d: master %s\n", i, dev->master ? dev->master->name : ""); show_rx_actions(f, dev, i); show_tx_actions(f, dev, i); i++; seq_printf(f, "\n"); } return 0; } static int net_us_upstreams_open(struct inode *inode, struct file *file) { return single_open(file, net_us_upstreams_show, NULL); } static const struct file_operations upstreams_fops = { .open = net_us_upstreams_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; /* module load */ static int __init us_init_module(void) { INIT_LIST_HEAD(&glob.open_devs); glob.chardev = cchardevice_register("net_us", 1, THIS_MODULE, &us_fops); glob.dbgfs = debugfs_create_dir("net_upstream", NULL); debugfs_create_file("upstreams", 0444, glob.dbgfs, NULL, &upstreams_fops); pr_info("AVM net_upstream driver: init done\n"); return 0; } /* module unload */ static void __exit us_exit_module(void) { cchardevice_unregister(glob.chardev); pr_info("AVM net_upstream driver: exit done\n"); } MODULE_AUTHOR("AVM GmbH"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("AVM net_upstream kernel driver"); module_init(us_init_module); module_exit(us_exit_module);