--- zzzz-none-000/linux-5.4.213/net/openvswitch/datapath.c 2022-09-15 10:04:56.000000000 +0000 +++ miami-7690-761/linux-5.4.213/net/openvswitch/datapath.c 2024-05-29 11:20:02.000000000 +0000 @@ -47,6 +47,8 @@ #include "vport-netdev.h" unsigned int ovs_net_id __read_mostly; +static struct ovs_accel_callback __rcu *ovs_accel_cb; +static struct srcu_struct ovs_accel_cb_sp_rcu; static struct genl_family dp_packet_genl_family; static struct genl_family dp_flow_genl_family; @@ -212,6 +214,124 @@ ovs_vport_del(p); } +/* Notify datapath add event to acceleration callback */ +static void ovs_dp_add_notify(struct datapath *dp, struct vport *vp) +{ + struct ovs_accel_callback *ovs_cb; + int idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_add) + ovs_cb->ovs_accel_dp_add((void *)dp, vp->dev); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Notify datapath delete event to acceleration callback */ +static void ovs_dp_del_notify(struct datapath *dp, struct vport *vp) +{ + struct ovs_accel_callback *ovs_cb; + + int idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_del) + ovs_cb->ovs_accel_dp_del((void *)dp, vp->dev); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Notify datapath port add event to acceleration callback */ +static void ovs_dp_port_add_notify(struct datapath *dp, struct vport *vp, + struct nlattr **a) +{ + struct ovs_accel_callback *ovs_cb; + const char *master = NULL; + int idx; + + if (a[OVS_VPORT_ATTR_MASTER]) + master = nla_data(a[OVS_VPORT_ATTR_MASTER]); + + idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_port_add) + ovs_cb->ovs_accel_dp_port_add((void *)dp, (void *)vp, + vp->port_no, vp->ops->type, + master, vp->dev); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Notify datapath port delete event to acceleration callback */ +void ovs_dp_port_del_notify(struct datapath *dp, struct vport *vp) +{ + struct ovs_accel_callback *ovs_cb; + + int idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_port_del) + ovs_cb->ovs_accel_dp_port_del((void *)dp, (void *)vp, vp->dev); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Notify datapath flow add event to acceleration callback */ +static void ovs_dp_flow_add_notify(struct datapath *dp, struct sw_flow *sf) +{ + struct ovs_accel_callback *ovs_cb; + + int idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_flow_add) + ovs_cb->ovs_accel_dp_flow_add((void *)dp, sf); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Notify datapath flow delete event to acceleration callback */ +static void ovs_dp_flow_del_notify(struct datapath *dp, struct sw_flow *sf) +{ + struct ovs_accel_callback *ovs_cb; + + int idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_flow_del) + ovs_cb->ovs_accel_dp_flow_del((void *)dp, sf); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Notify datapath flow table flush event to acceleration callback */ +static void ovs_dp_flow_tbl_flush_notify(struct datapath *dp) +{ + struct ovs_accel_callback *ovs_cb; + + int idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_flow_tbl_flush) + ovs_cb->ovs_accel_dp_flow_tbl_flush((void *)dp); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Notify datapath flow set/change event to acceleration callback */ +static void ovs_dp_flow_set_notify(struct datapath *dp, struct sw_flow *sf, + struct sw_flow_actions *new_sfa) +{ + struct ovs_accel_callback *ovs_cb; + + int idx = srcu_read_lock(&ovs_accel_cb_sp_rcu); + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_flow_set) + ovs_cb->ovs_accel_dp_flow_set((void *)dp, sf, new_sfa); + srcu_read_unlock(&ovs_accel_cb_sp_rcu, idx); +} + +/* Forward datapath packet to acceleration callback + * Must be called with rcu_read_lock. + */ +static void ovs_dp_pkt_process_notify(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, struct sw_flow *sf, + struct sw_flow_actions *sfa) +{ + struct ovs_accel_callback *ovs_cb; + + ovs_cb = rcu_dereference(ovs_accel_cb); + if (ovs_cb && ovs_cb->ovs_accel_dp_pkt_process) + ovs_cb->ovs_accel_dp_pkt_process((void *)dp, skb, key, sf, sfa); +} + /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { @@ -226,6 +346,8 @@ stats = this_cpu_ptr(dp->stats_percpu); + ovs_dp_pkt_process_notify(dp, skb, key, NULL, NULL); + /* Look up flow. */ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); if (unlikely(!flow)) { @@ -246,6 +368,7 @@ ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); + ovs_dp_pkt_process_notify(dp, skb, key, flow, sf_acts); error = ovs_execute_actions(dp, skb, sf_acts, key); if (unlikely(error)) net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", @@ -984,6 +1107,7 @@ goto err_unlock_ovs; } + ovs_dp_flow_add_notify(dp, new_flow); if (unlikely(reply)) { error = ovs_flow_cmd_fill_info(new_flow, ovs_header->dp_ifindex, @@ -1191,6 +1315,7 @@ if (likely(acts)) { old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); + ovs_dp_flow_set_notify(dp, flow, old_acts); if (unlikely(reply)) { error = ovs_flow_cmd_fill_info(flow, @@ -1326,6 +1451,7 @@ } if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) { + ovs_dp_flow_tbl_flush_notify(dp); err = ovs_flow_tbl_flush(&dp->table); goto unlock; } @@ -1340,6 +1466,7 @@ } ovs_flow_tbl_remove(&dp->table, flow); + ovs_dp_flow_del_notify(dp, flow); ovs_unlock(); reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts, @@ -1677,6 +1804,7 @@ ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); + ovs_dp_add_notify(dp, vport); ovs_unlock(); @@ -1713,6 +1841,7 @@ ovs_dp_detach_port(vport); } + ovs_dp_del_notify(dp, ovs_vport_ovsl(dp, OVSP_LOCAL)); list_del_rcu(&dp->list_node); /* OVSP_LOCAL is datapath internal port. We need to make sure that @@ -2099,6 +2228,7 @@ goto exit_unlock_free; } + ovs_dp_port_add_notify(dp, vport, a); err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW, GFP_KERNEL); @@ -2151,7 +2281,6 @@ goto exit_unlock_free; } - if (a[OVS_VPORT_ATTR_UPCALL_PID]) { struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID]; @@ -2200,6 +2329,7 @@ goto exit_unlock_free; } + ovs_dp_port_del_notify(vport->dp, vport); err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_DEL, GFP_KERNEL); @@ -2230,6 +2360,273 @@ return err; } +/* Register OVS datapath accelerator */ +int ovs_register_accelerator(struct ovs_accel_callback *oac) +{ + ovs_lock(); + + if (unlikely(rcu_access_pointer(ovs_accel_cb))) { + ovs_unlock(); + return -EEXIST; + } + + rcu_assign_pointer(ovs_accel_cb, oac); + ovs_unlock(); + synchronize_srcu(&ovs_accel_cb_sp_rcu); + return 0; +} +EXPORT_SYMBOL(ovs_register_accelerator); + +/* Unregister OVS datapath accelerator */ +void ovs_unregister_accelerator(struct ovs_accel_callback *oac) +{ + ovs_lock(); + rcu_assign_pointer(ovs_accel_cb, NULL); + ovs_unlock(); + synchronize_srcu(&ovs_accel_cb_sp_rcu); +} +EXPORT_SYMBOL(ovs_unregister_accelerator); + +/* Find datapath flow rule using the key*/ +struct sw_flow *ovs_accel_flow_find(void *dp_inst, struct sw_flow_key *key) +{ + struct datapath *dp = dp_inst; + struct sw_flow *flow; + + rcu_read_lock(); + flow = ovs_flow_tbl_lookup(&dp->table, key); + rcu_read_unlock(); + + return flow; +} +EXPORT_SYMBOL(ovs_accel_flow_find); + +/* Find datapath flow rule using MAC addresses*/ +struct sw_flow *ovs_accel_flow_find_by_mac(void *dp_inst, + struct net_device *indev, + struct net_device *outdev, + uint8_t *smac, uint8_t *dmac, uint16_t type) +{ + struct datapath *dp = dp_inst; + struct table_instance *ti; + struct sw_flow *flow = NULL; + struct sw_flow_actions *sf_acts; + const struct nlattr *a; + struct vport *vport; + bool flow_found = false; + int rem; + int i; + + rcu_read_lock(); + ti = rcu_dereference(dp->table.ti); + + for (i = 0; i < ti->n_buckets; i++) { + struct hlist_head *head = &ti->buckets[i]; + struct hlist_node *n; + + if (unlikely(!head)) + continue; + + hlist_for_each_entry_safe(flow, n, head, + flow_table.node[ti->node_ver]) { + if ((flow->key.eth.type == type) && + ether_addr_equal(flow->key.eth.src, smac) && + ether_addr_equal(flow->key.eth.dst, dmac)) { + flow_found = true; + goto found; + } + } + } +found: + if (!flow_found) { + rcu_read_unlock(); + return NULL; + } + + /* + * Flow is found, check if ingress port matches indev + */ + if (!indev) { + goto check_outdev; + } + + vport = ovs_vport_ovsl_rcu(dp, flow->key.phy.in_port); + if (!vport || (indev != vport->dev)) { + rcu_read_unlock(); + return NULL; + } + +check_outdev: + /* + * if outdev is NULL, then the API is called + * to find the flow only + */ + if (!outdev) { + rcu_read_unlock(); + return flow; + } + + /* + * Flow is found, check if output action is outdev + */ + flow_found = false; + sf_acts = rcu_dereference(flow->sf_acts); + for (a = sf_acts->actions, rem = sf_acts->actions_len; rem > 0; + a = nla_next(a, &rem)) { + int port_no; + + if (nla_type(a) != OVS_ACTION_ATTR_OUTPUT) + continue; + + port_no = nla_get_u32(a); + vport = ovs_vport_ovsl_rcu(dp, port_no); + + if (vport && (outdev == vport->dev)) { + flow_found = true; + } + } + + if (!flow_found) + flow = NULL; + + rcu_read_unlock(); + return flow; +} +EXPORT_SYMBOL(ovs_accel_flow_find_by_mac); + +/* Update flow rule statistics */ +int ovs_accel_flow_stats_update(void *dp_inst, void *out_vport, + struct sw_flow_key *key, int pkts, int bytes) +{ + struct datapath *dp = dp_inst; + struct sw_flow_stats *stats; + struct sw_flow *flow; + struct dp_stats_percpu *dp_stats; + int node = numa_node_id(); + u64 *stats_counter; + u32 n_mask_hit; + + rcu_read_lock(); + flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); + if (!flow) { + rcu_read_unlock(); + return -EINVAL; + } + + /* Update node specific statistics, if memory is not allocated + * for this node then update in 0 node + */ + stats = rcu_dereference(flow->stats[node]); + if (unlikely(!stats)) + stats = rcu_dereference(flow->stats[0]); + + rcu_read_unlock(); + + spin_lock_bh(&stats->lock); + stats->used = jiffies; + stats->packet_count += pkts; + stats->byte_count += bytes; + + /* Update datapath statistics, only hit count should be updated here, + * miss count is taken care by datapath. + * n_mask_hit and stats_counter are updated per packet, whereas + * stats_counter will match the number of packets processed in datapath + * n_mask_hit is updated number of packets times the total masks that + * are processed. Datapath flows are now accelerated and this API is + * called to update flow statistics, datpath statistics should use + * number of packets. + */ + dp_stats = this_cpu_ptr(dp->stats_percpu); + stats_counter = &dp_stats->n_hit; + + u64_stats_update_begin(&dp_stats->syncp); + (*stats_counter) += pkts; + dp_stats->n_mask_hit += n_mask_hit * pkts; + u64_stats_update_end(&dp_stats->syncp); + + spin_unlock_bh(&stats->lock); + return 0; +} +EXPORT_SYMBOL(ovs_accel_flow_stats_update); + +/* Find netdev using vport number */ +struct net_device *ovs_accel_dev_find(void *dp_inst, int vport_no) +{ + struct datapath *dp = dp_inst; + struct net_device *dev; + struct vport *vport; + + rcu_read_lock(); + + vport = ovs_vport_rcu(dp, vport_no); + if (!vport) { + rcu_read_unlock(); + return NULL; + } + + dev = vport->dev; + rcu_read_unlock(); + return dev; +} +EXPORT_SYMBOL(ovs_accel_dev_find); + +/* Find egress interface using key and skb */ +struct net_device *ovs_accel_egress_dev_find(void *dp_inst, + struct sw_flow_key *key, + struct sk_buff *skb) +{ + struct datapath *dp = dp_inst; + struct sw_flow *flow; + struct sw_flow_actions *sf_acts; + struct net_device *dev; + const struct nlattr *a; + int rem; + int egress_cnt = 0; + + rcu_read_lock(); + flow = ovs_accel_flow_find(dp_inst, key); + if (unlikely(!flow)) + goto done; + + sf_acts = rcu_dereference(flow->sf_acts); + for (a = sf_acts->actions, rem = sf_acts->actions_len; rem > 0; + a = nla_next(a, &rem)) { + struct vport *vport; + int port_no; + + switch (nla_type(a)) { + case OVS_ACTION_ATTR_OUTPUT: + port_no = nla_get_u32(a); + vport = ovs_vport_ovsl_rcu(dp, port_no); + + /* + * Avoid offloading flows with internal port as egress port + */ + if (!vport || (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)) { + goto done; + } + + dev = vport->dev; + egress_cnt++; + } + } + + /* + * Return dev only if flow is not a broadcast i.e. + * there is only one egress interface for the flow + */ + if (egress_cnt == 1) { + dev_hold(dev); + rcu_read_unlock(); + return dev; + } + +done: + rcu_read_unlock(); + return NULL; +} +EXPORT_SYMBOL(ovs_accel_egress_dev_find); + static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -2468,6 +2865,8 @@ pr_info("Open vSwitch switching datapath\n"); + init_srcu_struct(&ovs_accel_cb_sp_rcu); + err = action_fifos_init(); if (err) goto error; @@ -2517,6 +2916,7 @@ error_action_fifos_exit: action_fifos_exit(); error: + cleanup_srcu_struct(&ovs_accel_cb_sp_rcu); return err; }