--- zzzz-none-000/linux-4.4.271/drivers/net/bonding/bond_main.c 2021-06-03 06:22:09.000000000 +0000 +++ dakota-7530ac-750/linux-4.4.271/drivers/net/bonding/bond_main.c 2023-01-11 09:25:42.000000000 +0000 @@ -1,4 +1,6 @@ /* + * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * * originally based on the dummy device. * * Copyright 1999, Thomas Davis, tadavis@lbl.gov. @@ -79,8 +81,11 @@ #include #include #include +#include #include #include +#include "bond_genl.h" + #include "bonding_priv.h" @@ -134,10 +139,7 @@ MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " "0 for off, 1 for on (default)"); module_param(mode, charp, 0); -MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, " - "1 for active-backup, 2 for balance-xor, " - "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " - "6 for balance-alb"); +MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, 1 for active-backup, 2 for balance-xor, 3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, 6 for balance-alb, 7 for l2da"); module_param(primary, charp, 0); MODULE_PARM_DESC(primary, "Primary network device to use"); module_param(primary_reselect, charp, 0); @@ -206,6 +208,7 @@ static int bond_mode = BOND_MODE_ROUNDROBIN; static int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; static int lacp_fast; +static unsigned long bond_id_mask = 0xFFFFFFF0; /*-------------------------- Forward declarations ---------------------------*/ @@ -230,14 +233,29 @@ [BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation", [BOND_MODE_TLB] = "transmit load balancing", [BOND_MODE_ALB] = "adaptive load balancing", + [BOND_MODE_L2DA] = "layer 2 destination address map", }; - if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_ALB) + if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_L2DA) return "unknown"; return names[mode]; } +int bond_get_id(struct net_device *bond_dev) +{ + struct bonding *bond; + + if (!((bond_dev->priv_flags & IFF_BONDING) && + (bond_dev->flags & IFF_MASTER))) + return -EINVAL; + + bond = netdev_priv(bond_dev); + + return bond->id; +} +EXPORT_SYMBOL(bond_get_id); + /*---------------------------------- VLAN -----------------------------------*/ /** @@ -278,8 +296,7 @@ * worse, and if it works for regular VLAN usage it will work here too. */ -/** - * bond_vlan_rx_add_vid - Propagates adding an id to slaves +/* bond_vlan_rx_add_vid - Propagates adding an id to slaves * @bond_dev: bonding net device that got called * @vid: vlan id being added */ @@ -838,6 +855,21 @@ if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_handle_link_change(new_active, BOND_LINK_UP); + if (bond->params.mode == BOND_MODE_XOR) { + struct bond_cb *lag_cb_main; + + rcu_read_lock(); + lag_cb_main = rcu_dereference(bond_cb); + if (lag_cb_main && + lag_cb_main->bond_cb_link_up) { + struct net_device *dev; + + dev = new_active->dev; + lag_cb_main->bond_cb_link_up(dev); + } + rcu_read_unlock(); + } + if (bond_is_lb(bond)) bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP); } else { @@ -1187,6 +1219,12 @@ return RX_HANDLER_EXACT; } + if (bond_is_l2da(bond) && + !bond_l2da_handle_rx_frame(bond, slave, skb)) { + consume_skb(skb); + return RX_HANDLER_CONSUMED; + } + skb->dev = bond->dev; if (BOND_MODE(bond) == BOND_MODE_ALB && @@ -1328,8 +1366,9 @@ const struct net_device_ops *slave_ops = slave_dev->netdev_ops; struct slave *new_slave = NULL, *prev_slave; struct sockaddr addr; + struct bond_cb *lag_cb_main; int link_reporting; - int res = 0, i; + int res = 0, i, mac_stolen = 0, same_addr; if (!bond->params.use_carrier && slave_dev->ethtool_ops->get_link == NULL && @@ -1444,8 +1483,10 @@ * address to be the same as the slave's. */ if (!bond_has_slaves(bond) && - bond->dev->addr_assign_type == NET_ADDR_RANDOM) + bond->dev->addr_assign_type == NET_ADDR_RANDOM) { bond_set_dev_addr(bond->dev, slave_dev); + mac_stolen = 1; + } new_slave = bond_alloc_slave(bond, slave_dev); if (!new_slave) { @@ -1471,17 +1512,24 @@ * set it to the master's address */ ether_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr); + same_addr = ether_addr_equal(bond_dev->dev_addr, slave_dev->dev_addr); - if (!bond->params.fail_over_mac || - BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - /* Set slave to master's mac address. The application already + if ((!bond->params.fail_over_mac || + BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) && + /* In l2da mode, skip for first slave and skip if + * slave's address is already same as bond's address. + */ + !(bond_is_l2da(bond) && + (mac_stolen || same_addr || bond->l2da_info.multimac))) { + /* Set slave to master's mac address. The application already * set the master's mac address to that of the first slave */ memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); addr.sa_family = slave_dev->type; res = dev_set_mac_address(slave_dev, &addr); if (res) { - netdev_dbg(bond_dev, "Error %d calling set_mac_address\n", res); + netdev_dbg(bond_dev, + "Error %d calling set_mac_address\n", res); goto err_restore_mtu; } } @@ -1615,6 +1663,10 @@ bond_set_active_slave(new_slave); bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); break; + case BOND_MODE_L2DA: + bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); + bond_l2da_bind_slave(bond, new_slave); + break; default: netdev_dbg(bond_dev, "This slave is always active in trunk mode\n"); @@ -1711,6 +1763,12 @@ if (bond_mode_uses_xmit_hash(bond)) bond_update_slave_arr(bond, NULL); + rcu_read_lock(); + lag_cb_main = rcu_dereference(bond_cb); + if (lag_cb_main && lag_cb_main->bond_cb_enslave) + lag_cb_main->bond_cb_enslave(slave_dev); + + rcu_read_unlock(); netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", slave_dev->name, bond_is_active_slave(new_slave) ? "an active" : "a backup", @@ -1782,6 +1840,13 @@ } } + rcu_read_lock(); + lag_cb_main = rcu_dereference(bond_cb); + if (lag_cb_main && lag_cb_main->bond_cb_enslave) + lag_cb_main->bond_cb_enslave(slave_dev); + + rcu_read_unlock(); + return res; } @@ -1803,6 +1868,7 @@ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *oldcurrent; struct sockaddr addr; + struct bond_cb *lag_cb_main; int old_flags = bond_dev->flags; netdev_features_t old_features = bond_dev->features; @@ -1825,6 +1891,13 @@ return -EINVAL; } + rcu_read_lock(); + lag_cb_main = rcu_dereference(bond_cb); + if (lag_cb_main && lag_cb_main->bond_cb_release) + lag_cb_main->bond_cb_release(slave_dev); + + rcu_read_unlock(); + bond_sysfs_slave_del(slave); /* recompute stats just before removing the slave */ @@ -1872,7 +1945,8 @@ * but before a new active slave is selected. */ bond_alb_deinit_slave(bond, slave); - } + } else if (bond_is_l2da(bond)) + bond_l2da_unbind_slave(bond, slave); if (all) { RCU_INIT_POINTER(bond->curr_active_slave, NULL); @@ -2106,6 +2180,8 @@ { struct list_head *iter; struct slave *slave, *primary; + struct net_device *slave_dev = NULL; + struct bond_cb *lag_cb_main; bond_for_each_slave(bond, slave, iter) { switch (slave->new_link) { @@ -2150,9 +2226,16 @@ bond_alb_handle_link_change(bond, slave, BOND_LINK_UP); + if ((bond->params.mode == BOND_MODE_XOR) && + (!slave_dev)) + slave_dev = slave->dev; + if (BOND_MODE(bond) == BOND_MODE_XOR) bond_update_slave_arr(bond, NULL); + if (bond_is_l2da(bond)) + bond_l2da_handle_link_change(bond, slave); + if (!bond->curr_active_slave || slave == primary) goto do_failover; @@ -2180,6 +2263,9 @@ bond_alb_handle_link_change(bond, slave, BOND_LINK_DOWN); + if (bond_is_l2da(bond)) + bond_l2da_handle_link_change(bond, slave); + if (BOND_MODE(bond) == BOND_MODE_XOR) bond_update_slave_arr(bond, NULL); @@ -2203,6 +2289,15 @@ } bond_set_carrier(bond); + + rcu_read_lock(); + lag_cb_main = rcu_dereference(bond_cb); + + if (slave_dev && lag_cb_main && lag_cb_main->bond_cb_link_up) + lag_cb_main->bond_cb_link_up(slave_dev); + + rcu_read_unlock(); + } /* bond_mii_monitor @@ -3136,11 +3231,45 @@ ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp); if (ep) - return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; + return ep->h_dest[5] ^ ep->h_source[5]; return 0; } /* Extract the appropriate headers based on bond's xmit policy */ +static bool bond_flow_dissect_without_skb(struct bonding *bond, + u8 *src_mac, u8 *dst_mac, + void *psrc, void *pdst, + u16 protocol, __be16 *layer4hdr, + struct flow_keys *fk) +{ + u32 *src = NULL; + u32 *dst = NULL; + + fk->ports.ports = 0; + src = (uint32_t *)psrc; + dst = (uint32_t *)pdst; + + if (protocol == htons(ETH_P_IP)) { + /* V4 addresses and address type*/ + fk->addrs.v4addrs.src = src[0]; + fk->addrs.v4addrs.dst = dst[0]; + fk->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + } else if (protocol == htons(ETH_P_IPV6)) { + /* V6 addresses and address type*/ + memcpy(&fk->addrs.v6addrs.src, src, sizeof(struct in6_addr)); + memcpy(&fk->addrs.v6addrs.dst, dst, sizeof(struct in6_addr)); + fk->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } else { + return false; + } + if ((bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) && + (layer4hdr)) + fk->ports.ports = *layer4hdr; + + return true; +} + +/* Extract the appropriate headers based on bond's xmit policy */ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, struct flow_keys *fk) { @@ -3810,6 +3939,179 @@ return NETDEV_TX_OK; } +/* bond_xmit_hash_without_skb - Applies load balancing algorithm for a packet, + * to calculate hash for a given set of L2/L3 addresses. Does not + * calculate egress interface. + */ +uint32_t bond_xmit_hash_without_skb(u8 *src_mac, u8 *dst_mac, + void *psrc, void *pdst, u16 protocol, + struct net_device *bond_dev, + __be16 *layer4hdr) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct flow_keys flow; + u32 hash = 0; + + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || + !bond_flow_dissect_without_skb(bond, src_mac, dst_mac, psrc, + pdst, protocol, layer4hdr, &flow)) + return (dst_mac[5] ^ src_mac[5]); + + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23) + hash = dst_mac[5] ^ src_mac[5]; + else if (layer4hdr) + hash = (__force u32)flow.ports.ports; + + hash ^= (__force u32)flow_get_u32_dst(&flow) ^ + (__force u32)flow_get_u32_src(&flow); + hash ^= (hash >> 16); + hash ^= (hash >> 8); + + return hash; +} + +/* bond_xor_get_tx_dev - Calculate egress interface for a given packet for a LAG + * that is configured in balance-xor mode + * @skb: pointer to skb to be egressed + * @src_mac: pointer to source L2 address + * @dst_mac: pointer to destination L2 address + * @src: pointer to source L3 address in network order + * @dst: pointer to destination L3 address in network order + * @protocol: L3 protocol + * @bond_dev: pointer to bond master device + * + * If @skb is NULL, bond_xmit_hash_without_skb is used to calculate hash using + * L2/L3 addresses. + * + * Returns: Either valid slave device, or NULL otherwise + */ +static struct net_device *bond_xor_get_tx_dev(struct sk_buff *skb, + u8 *src_mac, u8 *dst_mac, + void *src, void *dst, + u16 protocol, + struct net_device *bond_dev, + __be16 *layer4hdr) +{ + struct bonding *bond = netdev_priv(bond_dev); + int slave_cnt = ACCESS_ONCE(bond->slave_cnt); + int slave_id = 0, i = 0; + u32 hash; + struct list_head *iter; + struct slave *slave; + + if (slave_cnt == 0) { + pr_debug("%s: Error: No slave is attached to the interface\n", + bond_dev->name); + return NULL; + } + + if (skb) { + hash = bond_xmit_hash(bond, skb); + slave_id = hash % slave_cnt; + } else { + if (bond->params.xmit_policy != BOND_XMIT_POLICY_LAYER23 && + bond->params.xmit_policy != BOND_XMIT_POLICY_LAYER2 && + bond->params.xmit_policy != BOND_XMIT_POLICY_LAYER34) { + pr_debug("%s: Error: Unsupported hash policy for balance-XOR fast path\n", + bond_dev->name); + return NULL; + } + + hash = bond_xmit_hash_without_skb(src_mac, dst_mac, src, + dst, protocol, bond_dev, + layer4hdr); + slave_id = hash % slave_cnt; + } + + i = slave_id; + + /* Here we start from the slave with slave_id */ + bond_for_each_slave_rcu(bond, slave, iter) { + if (--i < 0) { + if (bond_slave_can_tx(slave)) + return slave->dev; + } + } + + /* Here we start from the first slave up to slave_id */ + i = slave_id; + bond_for_each_slave_rcu(bond, slave, iter) { + if (--i < 0) + break; + if (bond_slave_can_tx(slave)) + return slave->dev; + } + + return NULL; +} + +/* bond_get_tx_dev - Calculate egress interface for a given packet. + * + * Supports 802.3AD and balance-xor modes + * + * @skb: pointer to skb to be egressed, if valid + * @src_mac: pointer to source L2 address + * @dst_mac: pointer to destination L2 address + * @src: pointer to source L3 address in network order + * @dst: pointer to destination L3 address in network order + * @protocol: L3 protocol id from L2 header + * @bond_dev: pointer to bond master device + * + * Returns: Either valid slave device, or NULL for un-supported LAG modes + */ +struct net_device *bond_get_tx_dev(struct sk_buff *skb, uint8_t *src_mac, + u8 *dst_mac, void *src, + void *dst, u16 protocol, + struct net_device *bond_dev, + __be16 *layer4hdr) +{ + struct bonding *bond = netdev_priv(bond_dev); + + if (!bond) + return NULL; + + switch (bond->params.mode) { + case BOND_MODE_XOR: + return bond_xor_get_tx_dev(skb, src_mac, dst_mac, + src, dst, protocol, + bond_dev, layer4hdr); + case BOND_MODE_8023AD: + return bond_3ad_get_tx_dev(skb, src_mac, dst_mac, + src, dst, protocol, + bond_dev, layer4hdr); + case BOND_MODE_L2DA: + return bond_l2da_get_tx_dev(dst_mac, bond_dev); + default: + return NULL; + } +} +EXPORT_SYMBOL(bond_get_tx_dev); + +/* In bond_xmit_xor() , we determine the output device by using a pre- + * determined xmit_hash_policy(), If the selected device is not enabled, + * find the next active slave. + */ +static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev) +{ + struct bonding *bond = netdev_priv(dev); + struct net_device *outdev; + + outdev = bond_xor_get_tx_dev(skb, NULL, NULL, NULL, + NULL, 0, dev, NULL); + + if (!outdev) + goto out; + + bond_dev_queue_xmit(bond, skb, outdev); + + goto final; +out: + /* no suitable interface, frame not sent */ + dev_kfree_skb(skb); +final: + return NETDEV_TX_OK; +} + /* Use this to update slave_array when (a) it's not appropriate to update * slave_array right away (note that update_slave_array() may sleep) * and / or (b) RTNL is not held. @@ -3937,19 +4239,21 @@ static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); - struct slave *slave; - struct bond_up_slave *slaves; - unsigned int count; + struct net_device *outdev = NULL; - slaves = rcu_dereference(bond->slave_arr); - count = slaves ? ACCESS_ONCE(slaves->count) : 0; - if (likely(count)) { - slave = slaves->arr[bond_xmit_hash(bond, skb) % count]; - bond_dev_queue_xmit(bond, skb, slave->dev); - } else { - bond_tx_drop(dev, skb); - } + outdev = bond_3ad_get_tx_dev(skb, NULL, NULL, NULL, + NULL, 0, dev, NULL); + + if (!outdev) + goto out; + + bond_dev_queue_xmit(bond, skb, outdev); + goto final; +out: + dev_kfree_skb(skb); + +final: return NETDEV_TX_OK; } @@ -4045,8 +4349,9 @@ return bond_xmit_roundrobin(skb, dev); case BOND_MODE_ACTIVEBACKUP: return bond_xmit_activebackup(skb, dev); - case BOND_MODE_8023AD: case BOND_MODE_XOR: + return bond_xmit_xor(skb, dev); + case BOND_MODE_8023AD: return bond_3ad_xor_xmit(skb, dev); case BOND_MODE_BROADCAST: return bond_xmit_broadcast(skb, dev); @@ -4054,6 +4359,8 @@ return bond_alb_xmit(skb, dev); case BOND_MODE_TLB: return bond_tlb_xmit(skb, dev); + case BOND_MODE_L2DA: + return bond_l2da_xmit(skb, dev); default: /* Should never happen, mode already checked */ netdev_err(dev, "Unknown bonding mode %d\n", BOND_MODE(bond)); @@ -4186,6 +4493,10 @@ struct bonding *bond = netdev_priv(bond_dev); if (bond->wq) destroy_workqueue(bond->wq); + + if (bond->id != (~0U)) + clear_bit(bond->id, &bond_id_mask); + free_netdev(bond_dev); } @@ -4263,10 +4574,30 @@ list_del(&bond->bond_list); bond_debug_unregister(bond); + + if (bond_is_l2da(bond)) + bond_l2da_deinitialize(bond); } /*------------------------- Module initialization ---------------------------*/ +/** + * Notify ECM about the change in bond slave + */ +void bond_notify_l2da(uint8_t *slave_mac_addr) +{ + struct bond_cb *bond_cb_ref; + + rcu_read_lock(); + bond_cb_ref = rcu_dereference(bond_cb); + if (bond_cb_ref && bond_cb_ref->bond_cb_delete_by_mac) { + bond_cb_ref->bond_cb_delete_by_mac(slave_mac_addr); + pr_info("Deleted fast path rules with mac-id: %pM\n", + slave_mac_addr); + } + rcu_read_unlock(); +} + static int bond_check_params(struct bond_params *params) { int arp_validate_value, fail_over_mac_value, primary_reselect_value, i; @@ -4395,6 +4726,19 @@ all_slaves_active = 0; } + if (bond_mode == BOND_MODE_L2DA) { + if (!all_slaves_active) { + pr_warn("Warning: all_slaves_active must be set, otherwise bonding will not be able to route packets that are essential for l2da operation\n"); + pr_warn("Forcing all_slaves_active to 1\n"); + all_slaves_active = 1; + } + if (!miimon) { + pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure which is essential for l2da operation\n"); + pr_warn("Forcing miimon to 100msec\n"); + miimon = 100; + } + } + if (resend_igmp < 0 || resend_igmp > 255) { pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n", resend_igmp, BOND_DEFAULT_RESEND_IGMP); @@ -4664,9 +5008,19 @@ { struct bonding *bond = netdev_priv(bond_dev); struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); + int ret; netdev_dbg(bond_dev, "Begin bond_init\n"); + if (bond_is_l2da(bond)) { + ret = bond_l2da_initialize(bond); + if (ret) { + pr_err("%s: l2da mode cannot be initialized\n", + bond->dev->name); + return ret; + } + } + bond->wq = create_singlethread_workqueue(bond_dev->name); if (!bond->wq) return -ENOMEM; @@ -4733,6 +5087,14 @@ rtnl_unlock(); if (res < 0) bond_destructor(bond_dev); + + bond = netdev_priv(bond_dev); + bond->id = ~0U; + if (bond_id_mask != (~0UL)) { + bond->id = (u32)ffz(bond_id_mask); + set_bit(bond->id, &bond_id_mask); + } + return res; } @@ -4793,6 +5155,10 @@ if (res) goto err_link; + res = bond_genl_initialize(); + if (res) + goto err_genl; + bond_create_debugfs(); for (i = 0; i < max_bonds; i++) { @@ -4806,6 +5172,8 @@ return res; err: bond_destroy_debugfs(); + bond_genl_deinitialize(); +err_genl: bond_netlink_fini(); err_link: unregister_pernet_subsys(&bond_net_ops); @@ -4819,6 +5187,7 @@ bond_destroy_debugfs(); + bond_genl_deinitialize(); bond_netlink_fini(); unregister_pernet_subsys(&bond_net_ops);