#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) #include #include #include #include #if IS_ENABLED(CONFIG_IPV6) #include #include #endif enum { MPTCP_EVENT_ADD = 1, MPTCP_EVENT_DEL, MPTCP_EVENT_MOD, }; #define MPTCP_SUBFLOW_RETRY_DELAY 1000 /* Max number of local or remote addresses we can store. * When changing, see the bitfield below in fullmesh_rem4/6. */ #define MPTCP_MAX_ADDR 8 struct fullmesh_rem4 { u8 rem4_id; u8 bitfield; u8 retry_bitfield; __be16 port; struct in_addr addr; }; struct fullmesh_rem6 { u8 rem6_id; u8 bitfield; u8 retry_bitfield; __be16 port; struct in6_addr addr; }; struct mptcp_loc_addr { struct mptcp_loc4 locaddr4[MPTCP_MAX_ADDR]; u8 loc4_bits; u8 next_v4_index; struct mptcp_loc6 locaddr6[MPTCP_MAX_ADDR]; u8 loc6_bits; u8 next_v6_index; struct rcu_head rcu; }; struct mptcp_addr_event { struct list_head list; unsigned short family; u8 code:7, low_prio:1; int if_idx; union inet_addr addr; }; struct fullmesh_priv { /* Worker struct for subflow establishment */ struct work_struct subflow_work; /* Delayed worker, when the routing-tables are not yet ready. */ struct delayed_work subflow_retry_work; /* Remote addresses */ struct fullmesh_rem4 remaddr4[MPTCP_MAX_ADDR]; struct fullmesh_rem6 remaddr6[MPTCP_MAX_ADDR]; struct mptcp_cb *mpcb; u16 remove_addrs; /* Addresses to remove */ u8 announced_addrs_v4; /* IPv4 Addresses we did announce */ u8 announced_addrs_v6; /* IPv6 Addresses we did announce */ u8 add_addr; /* Are we sending an add_addr? */ u8 rem4_bits; u8 rem6_bits; /* Have we established the additional subflows for primary pair? */ u8 first_pair:1; }; struct mptcp_fm_ns { struct mptcp_loc_addr __rcu *local; spinlock_t local_lock; /* Protecting the above pointer */ struct list_head events; struct delayed_work address_worker; struct net *net; }; static int num_subflows __read_mostly = 1; module_param(num_subflows, int, 0644); MODULE_PARM_DESC(num_subflows, "choose the number of subflows per pair of IP addresses of MPTCP connection"); static int create_on_err __read_mostly; module_param(create_on_err, int, 0644); MODULE_PARM_DESC(create_on_err, "recreate the subflow upon a timeout"); static struct mptcp_pm_ops full_mesh __read_mostly; static void full_mesh_create_subflows(struct sock *meta_sk); static struct mptcp_fm_ns *fm_get_ns(const struct net *net) { return (struct mptcp_fm_ns *)net->mptcp.path_managers[MPTCP_PM_FULLMESH]; } static struct fullmesh_priv *fullmesh_get_priv(const struct mptcp_cb *mpcb) { return (struct fullmesh_priv *)&mpcb->mptcp_pm[0]; } /* Find the first free index in the bitfield */ static int __mptcp_find_free_index(u8 bitfield, u8 base) { int i; /* There are anyways no free bits... */ if (bitfield == 0xff) goto exit; i = ffs(~(bitfield >> base)) - 1; if (i < 0) goto exit; /* No free bits when starting at base, try from 0 on */ if (i + base >= sizeof(bitfield) * 8) return __mptcp_find_free_index(bitfield, 0); return i + base; exit: return -1; } static int mptcp_find_free_index(u8 bitfield) { return __mptcp_find_free_index(bitfield, 0); } static void mptcp_addv4_raddr(struct mptcp_cb *mpcb, const struct in_addr *addr, __be16 port, u8 id) { int i; struct fullmesh_rem4 *rem4; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); mptcp_for_each_bit_set(fmp->rem4_bits, i) { rem4 = &fmp->remaddr4[i]; /* Address is already in the list --- continue */ if (rem4->rem4_id == id && rem4->addr.s_addr == addr->s_addr && rem4->port == port) return; /* This may be the case, when the peer is behind a NAT. He is * trying to JOIN, thus sending the JOIN with a certain ID. * However the src_addr of the IP-packet has been changed. We * update the addr in the list, because this is the address as * OUR BOX sees it. */ if (rem4->rem4_id == id && rem4->addr.s_addr != addr->s_addr) { /* update the address */ mptcp_debug("%s: updating old addr:%pI4 to addr %pI4 with id:%d\n", __func__, &rem4->addr.s_addr, &addr->s_addr, id); rem4->addr.s_addr = addr->s_addr; rem4->port = port; mpcb->list_rcvd = 1; return; } } i = mptcp_find_free_index(fmp->rem4_bits); /* Do we have already the maximum number of local/remote addresses? */ if (i < 0) { mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI4\n", __func__, MPTCP_MAX_ADDR, &addr->s_addr); return; } rem4 = &fmp->remaddr4[i]; /* Address is not known yet, store it */ rem4->addr.s_addr = addr->s_addr; rem4->port = port; rem4->bitfield = 0; rem4->retry_bitfield = 0; rem4->rem4_id = id; mpcb->list_rcvd = 1; fmp->rem4_bits |= (1 << i); return; } static void mptcp_addv6_raddr(struct mptcp_cb *mpcb, const struct in6_addr *addr, __be16 port, u8 id) { int i; struct fullmesh_rem6 *rem6; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); mptcp_for_each_bit_set(fmp->rem6_bits, i) { rem6 = &fmp->remaddr6[i]; /* Address is already in the list --- continue */ if (rem6->rem6_id == id && ipv6_addr_equal(&rem6->addr, addr) && rem6->port == port) return; /* This may be the case, when the peer is behind a NAT. He is * trying to JOIN, thus sending the JOIN with a certain ID. * However the src_addr of the IP-packet has been changed. We * update the addr in the list, because this is the address as * OUR BOX sees it. */ if (rem6->rem6_id == id) { /* update the address */ mptcp_debug("%s: updating old addr: %pI6 to addr %pI6 with id:%d\n", __func__, &rem6->addr, addr, id); rem6->addr = *addr; rem6->port = port; mpcb->list_rcvd = 1; return; } } i = mptcp_find_free_index(fmp->rem6_bits); /* Do we have already the maximum number of local/remote addresses? */ if (i < 0) { mptcp_debug("%s: At max num of remote addresses: %d --- not adding address: %pI6\n", __func__, MPTCP_MAX_ADDR, addr); return; } rem6 = &fmp->remaddr6[i]; /* Address is not known yet, store it */ rem6->addr = *addr; rem6->port = port; rem6->bitfield = 0; rem6->retry_bitfield = 0; rem6->rem6_id = id; mpcb->list_rcvd = 1; fmp->rem6_bits |= (1 << i); return; } static void mptcp_v4_rem_raddress(struct mptcp_cb *mpcb, u8 id) { int i; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); mptcp_for_each_bit_set(fmp->rem4_bits, i) { if (fmp->remaddr4[i].rem4_id == id) { /* remove address from bitfield */ fmp->rem4_bits &= ~(1 << i); break; } } } static void mptcp_v6_rem_raddress(const struct mptcp_cb *mpcb, u8 id) { int i; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); mptcp_for_each_bit_set(fmp->rem6_bits, i) { if (fmp->remaddr6[i].rem6_id == id) { /* remove address from bitfield */ fmp->rem6_bits &= ~(1 << i); break; } } } /* Sets the bitfield of the remote-address field */ static void mptcp_v4_set_init_addr_bit(const struct mptcp_cb *mpcb, const struct in_addr *addr, u8 index) { int i; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); mptcp_for_each_bit_set(fmp->rem4_bits, i) { if (fmp->remaddr4[i].addr.s_addr == addr->s_addr) { fmp->remaddr4[i].bitfield |= (1 << index); return; } } } /* Sets the bitfield of the remote-address field */ static void mptcp_v6_set_init_addr_bit(struct mptcp_cb *mpcb, const struct in6_addr *addr, u8 index) { int i; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); mptcp_for_each_bit_set(fmp->rem6_bits, i) { if (ipv6_addr_equal(&fmp->remaddr6[i].addr, addr)) { fmp->remaddr6[i].bitfield |= (1 << index); return; } } } static void mptcp_set_init_addr_bit(struct mptcp_cb *mpcb, const union inet_addr *addr, sa_family_t family, u8 id) { if (family == AF_INET) mptcp_v4_set_init_addr_bit(mpcb, &addr->in, id); else mptcp_v6_set_init_addr_bit(mpcb, &addr->in6, id); } static void mptcp_v4_subflows(struct sock *meta_sk, const struct mptcp_loc4 *loc, struct mptcp_rem4 *rem) { int i; for (i = 1; i < num_subflows; i++) mptcp_init4_subsockets(meta_sk, loc, rem); } #if IS_ENABLED(CONFIG_IPV6) static void mptcp_v6_subflows(struct sock *meta_sk, const struct mptcp_loc6 *loc, struct mptcp_rem6 *rem) { int i; for (i = 1; i < num_subflows; i++) mptcp_init6_subsockets(meta_sk, loc, rem); } #endif static void retry_subflow_worker(struct work_struct *work) { struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); struct fullmesh_priv *fmp = container_of(delayed_work, struct fullmesh_priv, subflow_retry_work); struct mptcp_cb *mpcb = fmp->mpcb; struct sock *meta_sk = mpcb->meta_sk; struct mptcp_loc_addr *mptcp_local; struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); int iter = 0, i; /* We need a local (stable) copy of the address-list. Really, it is not * such a big deal, if the address-list is not 100% up-to-date. */ rcu_read_lock_bh(); mptcp_local = rcu_dereference_bh(fm_ns->local); mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC); rcu_read_unlock_bh(); if (!mptcp_local) return; next_subflow: if (iter) { release_sock(meta_sk); mutex_unlock(&mpcb->mpcb_mutex); cond_resched(); } mutex_lock(&mpcb->mpcb_mutex); lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); if (!mptcp(tcp_sk(meta_sk))) goto exit; iter++; if (sock_flag(meta_sk, SOCK_DEAD)) goto exit; mptcp_for_each_bit_set(fmp->rem4_bits, i) { struct fullmesh_rem4 *rem = &fmp->remaddr4[i]; /* Do we need to retry establishing a subflow ? */ if (rem->retry_bitfield) { int i = mptcp_find_free_index(~rem->retry_bitfield); struct mptcp_rem4 rem4; rem->bitfield |= (1 << i); rem->retry_bitfield &= ~(1 << i); rem4.addr = rem->addr; rem4.port = rem->port; rem4.rem4_id = rem->rem4_id; mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i], &rem4); mptcp_v4_subflows(meta_sk, &mptcp_local->locaddr4[i], &rem4); goto next_subflow; } } #if IS_ENABLED(CONFIG_IPV6) mptcp_for_each_bit_set(fmp->rem6_bits, i) { struct fullmesh_rem6 *rem = &fmp->remaddr6[i]; /* Do we need to retry establishing a subflow ? */ if (rem->retry_bitfield) { int i = mptcp_find_free_index(~rem->retry_bitfield); struct mptcp_rem6 rem6; rem->bitfield |= (1 << i); rem->retry_bitfield &= ~(1 << i); rem6.addr = rem->addr; rem6.port = rem->port; rem6.rem6_id = rem->rem6_id; mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i], &rem6); mptcp_v6_subflows(meta_sk, &mptcp_local->locaddr6[i], &rem6); goto next_subflow; } } #endif exit: kfree(mptcp_local); release_sock(meta_sk); mutex_unlock(&mpcb->mpcb_mutex); mptcp_mpcb_put(mpcb); sock_put(meta_sk); } /** * Create all new subflows, by doing calls to mptcp_initX_subsockets * * This function uses a goto next_subflow, to allow releasing the lock between * new subflows and giving other processes a chance to do some work on the * socket and potentially finishing the communication. **/ static void create_subflow_worker(struct work_struct *work) { struct fullmesh_priv *fmp = container_of(work, struct fullmesh_priv, subflow_work); struct mptcp_cb *mpcb = fmp->mpcb; struct sock *meta_sk = mpcb->meta_sk; struct mptcp_loc_addr *mptcp_local; const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); int iter = 0, retry = 0; int i; /* We need a local (stable) copy of the address-list. Really, it is not * such a big deal, if the address-list is not 100% up-to-date. */ rcu_read_lock_bh(); mptcp_local = rcu_dereference_bh(fm_ns->local); mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC); rcu_read_unlock_bh(); if (!mptcp_local) return; next_subflow: if (iter) { release_sock(meta_sk); mutex_unlock(&mpcb->mpcb_mutex); cond_resched(); } mutex_lock(&mpcb->mpcb_mutex); lock_sock_nested(meta_sk, SINGLE_DEPTH_NESTING); if (sock_flag(meta_sk, SOCK_DEAD) || !mptcp(tcp_sk(meta_sk))) goto exit; if (mpcb->master_sk && !tcp_sk(mpcb->master_sk)->mptcp->fully_established) goto exit; /* Create the additional subflows for the first pair */ if (fmp->first_pair == 0 && mpcb->master_sk) { struct mptcp_loc4 loc; struct mptcp_rem4 rem; loc.addr.s_addr = inet_sk(meta_sk)->inet_saddr; loc.loc4_id = 0; loc.low_prio = 0; loc.if_idx = mpcb->master_sk->sk_bound_dev_if; rem.addr.s_addr = inet_sk(meta_sk)->inet_daddr; rem.port = inet_sk(meta_sk)->inet_dport; rem.rem4_id = 0; /* Default 0 */ mptcp_v4_subflows(meta_sk, &loc, &rem); fmp->first_pair = 1; } iter++; mptcp_for_each_bit_set(fmp->rem4_bits, i) { struct fullmesh_rem4 *rem; u8 remaining_bits; rem = &fmp->remaddr4[i]; remaining_bits = ~(rem->bitfield) & mptcp_local->loc4_bits; /* Are there still combinations to handle? */ if (remaining_bits) { int i = mptcp_find_free_index(~remaining_bits); struct mptcp_rem4 rem4; rem->bitfield |= (1 << i); rem4.addr = rem->addr; rem4.port = rem->port; rem4.rem4_id = rem->rem4_id; /* If a route is not yet available then retry once */ if (mptcp_init4_subsockets(meta_sk, &mptcp_local->locaddr4[i], &rem4) == -ENETUNREACH) retry = rem->retry_bitfield |= (1 << i); else mptcp_v4_subflows(meta_sk, &mptcp_local->locaddr4[i], &rem4); goto next_subflow; } } #if IS_ENABLED(CONFIG_IPV6) if (fmp->first_pair == 0 && mpcb->master_sk) { struct mptcp_loc6 loc; struct mptcp_rem6 rem; loc.addr = inet6_sk(meta_sk)->saddr; loc.loc6_id = 0; loc.low_prio = 0; loc.if_idx = mpcb->master_sk->sk_bound_dev_if; rem.addr = meta_sk->sk_v6_daddr; rem.port = inet_sk(meta_sk)->inet_dport; rem.rem6_id = 0; /* Default 0 */ mptcp_v6_subflows(meta_sk, &loc, &rem); fmp->first_pair = 1; } mptcp_for_each_bit_set(fmp->rem6_bits, i) { struct fullmesh_rem6 *rem; u8 remaining_bits; rem = &fmp->remaddr6[i]; remaining_bits = ~(rem->bitfield) & mptcp_local->loc6_bits; /* Are there still combinations to handle? */ if (remaining_bits) { int i = mptcp_find_free_index(~remaining_bits); struct mptcp_rem6 rem6; rem->bitfield |= (1 << i); rem6.addr = rem->addr; rem6.port = rem->port; rem6.rem6_id = rem->rem6_id; /* If a route is not yet available then retry once */ if (mptcp_init6_subsockets(meta_sk, &mptcp_local->locaddr6[i], &rem6) == -ENETUNREACH) retry = rem->retry_bitfield |= (1 << i); else mptcp_v6_subflows(meta_sk, &mptcp_local->locaddr6[i], &rem6); goto next_subflow; } } #endif if (retry && !delayed_work_pending(&fmp->subflow_retry_work)) { sock_hold(meta_sk); refcount_inc(&mpcb->mpcb_refcnt); queue_delayed_work(mptcp_wq, &fmp->subflow_retry_work, msecs_to_jiffies(MPTCP_SUBFLOW_RETRY_DELAY)); } exit: kfree(mptcp_local); release_sock(meta_sk); mutex_unlock(&mpcb->mpcb_mutex); mptcp_mpcb_put(mpcb); sock_put(meta_sk); } static void announce_remove_addr(u8 addr_id, struct sock *meta_sk) { struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); struct sock *sk = mptcp_select_ack_sock(meta_sk); fmp->remove_addrs |= (1 << addr_id); mpcb->addr_signal = 1; if (sk) tcp_send_ack(sk); } static void update_addr_bitfields(struct sock *meta_sk, const struct mptcp_loc_addr *mptcp_local) { struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); int i; /* The bits in announced_addrs_* always match with loc*_bits. So, a * simple & operation unsets the correct bits, because these go from * announced to non-announced */ fmp->announced_addrs_v4 &= mptcp_local->loc4_bits; mptcp_for_each_bit_set(fmp->rem4_bits, i) { fmp->remaddr4[i].bitfield &= mptcp_local->loc4_bits; fmp->remaddr4[i].retry_bitfield &= mptcp_local->loc4_bits; } fmp->announced_addrs_v6 &= mptcp_local->loc6_bits; mptcp_for_each_bit_set(fmp->rem6_bits, i) { fmp->remaddr6[i].bitfield &= mptcp_local->loc6_bits; fmp->remaddr6[i].retry_bitfield &= mptcp_local->loc6_bits; } } static int mptcp_find_address(const struct mptcp_loc_addr *mptcp_local, sa_family_t family, const union inet_addr *addr, int if_idx) { int i; u8 loc_bits; bool found = false; if (family == AF_INET) loc_bits = mptcp_local->loc4_bits; else loc_bits = mptcp_local->loc6_bits; mptcp_for_each_bit_set(loc_bits, i) { if (family == AF_INET && (!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx) && mptcp_local->locaddr4[i].addr.s_addr == addr->in.s_addr) { found = true; break; } if (family == AF_INET6 && (!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx) && ipv6_addr_equal(&mptcp_local->locaddr6[i].addr, &addr->in6)) { found = true; break; } } if (!found) return -1; return i; } static int mptcp_find_address_transp(const struct mptcp_loc_addr *mptcp_local, sa_family_t family, int if_idx) { bool found = false; u8 loc_bits; int i; if (family == AF_INET) loc_bits = mptcp_local->loc4_bits; else loc_bits = mptcp_local->loc6_bits; mptcp_for_each_bit_set(loc_bits, i) { if (family == AF_INET && (!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx)) { found = true; break; } if (family == AF_INET6 && (!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx)) { found = true; break; } } if (!found) return -1; return i; } static void mptcp_address_worker(struct work_struct *work) { const struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); struct mptcp_fm_ns *fm_ns = container_of(delayed_work, struct mptcp_fm_ns, address_worker); struct net *net = fm_ns->net; struct mptcp_addr_event *event = NULL; struct mptcp_loc_addr *mptcp_local, *old; int i, id = -1; /* id is used in the socket-code on a delete-event */ bool success; /* Used to indicate if we succeeded handling the event */ next_event: success = false; kfree(event); /* First, let's dequeue an event from our event-list */ rcu_read_lock_bh(); spin_lock(&fm_ns->local_lock); event = list_first_entry_or_null(&fm_ns->events, struct mptcp_addr_event, list); if (!event) { spin_unlock(&fm_ns->local_lock); rcu_read_unlock_bh(); return; } list_del(&event->list); mptcp_local = rcu_dereference_bh(fm_ns->local); if (event->code == MPTCP_EVENT_DEL) { id = mptcp_find_address(mptcp_local, event->family, &event->addr, event->if_idx); /* Not in the list - so we don't care */ if (id < 0) { mptcp_debug("%s could not find id\n", __func__); goto duno; } old = mptcp_local; mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC); if (!mptcp_local) goto duno; if (event->family == AF_INET) mptcp_local->loc4_bits &= ~(1 << id); else mptcp_local->loc6_bits &= ~(1 << id); rcu_assign_pointer(fm_ns->local, mptcp_local); kfree_rcu(old, rcu); } else { int i = mptcp_find_address(mptcp_local, event->family, &event->addr, event->if_idx); int j = i; if (j < 0) { /* Not in the list, so we have to find an empty slot */ if (event->family == AF_INET) i = __mptcp_find_free_index(mptcp_local->loc4_bits, mptcp_local->next_v4_index); if (event->family == AF_INET6) i = __mptcp_find_free_index(mptcp_local->loc6_bits, mptcp_local->next_v6_index); if (i < 0) { mptcp_debug("%s no more space\n", __func__); goto duno; } /* It might have been a MOD-event. */ event->code = MPTCP_EVENT_ADD; } else { /* Let's check if anything changes */ if (event->family == AF_INET && event->low_prio == mptcp_local->locaddr4[i].low_prio) goto duno; if (event->family == AF_INET6 && event->low_prio == mptcp_local->locaddr6[i].low_prio) goto duno; } old = mptcp_local; mptcp_local = kmemdup(mptcp_local, sizeof(*mptcp_local), GFP_ATOMIC); if (!mptcp_local) goto duno; if (event->family == AF_INET) { mptcp_local->locaddr4[i].addr.s_addr = event->addr.in.s_addr; mptcp_local->locaddr4[i].loc4_id = i + 1; mptcp_local->locaddr4[i].low_prio = event->low_prio; mptcp_local->locaddr4[i].if_idx = event->if_idx; mptcp_debug("%s updated IP %pI4 on ifidx %u prio %u id %u\n", __func__, &event->addr.in.s_addr, event->if_idx, event->low_prio, i + 1); } else { mptcp_local->locaddr6[i].addr = event->addr.in6; mptcp_local->locaddr6[i].loc6_id = i + MPTCP_MAX_ADDR; mptcp_local->locaddr6[i].low_prio = event->low_prio; mptcp_local->locaddr6[i].if_idx = event->if_idx; mptcp_debug("%s updated IP %pI6 on ifidx %u prio %u id %u\n", __func__, &event->addr.in6, event->if_idx, event->low_prio, i + MPTCP_MAX_ADDR); } if (j < 0) { if (event->family == AF_INET) { mptcp_local->loc4_bits |= (1 << i); mptcp_local->next_v4_index = i + 1; } else { mptcp_local->loc6_bits |= (1 << i); mptcp_local->next_v6_index = i + 1; } } rcu_assign_pointer(fm_ns->local, mptcp_local); kfree_rcu(old, rcu); } success = true; duno: spin_unlock(&fm_ns->local_lock); rcu_read_unlock_bh(); if (!success) goto next_event; /* Now we iterate over the MPTCP-sockets and apply the event. */ for (i = 0; i < MPTCP_HASH_SIZE; i++) { const struct hlist_nulls_node *node; struct tcp_sock *meta_tp; rcu_read_lock_bh(); hlist_nulls_for_each_entry_rcu(meta_tp, node, &tk_hashtable[i], tk_table) { struct sock *meta_sk = (struct sock *)meta_tp, *sk; bool meta_v4 = meta_sk->sk_family == AF_INET; struct mptcp_cb *mpcb; if (sock_net(meta_sk) != net) continue; if (meta_v4) { /* skip IPv6 events if meta is IPv4 */ if (event->family == AF_INET6) continue; } else if (event->family == AF_INET && meta_sk->sk_ipv6only) { /* skip IPv4 events if IPV6_V6ONLY is set */ continue; } if (unlikely(!refcount_inc_not_zero(&meta_sk->sk_refcnt))) continue; bh_lock_sock(meta_sk); mpcb = meta_tp->mpcb; if (!mpcb) goto next; if (!mptcp(meta_tp) || !is_meta_sk(meta_sk) || mptcp_in_infinite_mapping_weak(mpcb)) goto next; /* May be that the pm has changed in-between */ if (mpcb->pm_ops != &full_mesh) goto next; if (sock_owned_by_user(meta_sk)) { if (!test_and_set_bit(MPTCP_PATH_MANAGER_DEFERRED, &meta_sk->sk_tsq_flags)) sock_hold(meta_sk); goto next; } if (event->code == MPTCP_EVENT_ADD) { struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); fmp->add_addr++; mpcb->addr_signal = 1; sk = mptcp_select_ack_sock(meta_sk); if (sk) tcp_send_ack(sk); full_mesh_create_subflows(meta_sk); } if (event->code == MPTCP_EVENT_DEL) { struct mptcp_tcp_sock *mptcp; struct mptcp_loc_addr *mptcp_local; struct hlist_node *tmp; bool found = false; mptcp_local = rcu_dereference_bh(fm_ns->local); /* In any case, we need to update our bitfields */ if (id >= 0) update_addr_bitfields(meta_sk, mptcp_local); /* Look for the socket and remove him */ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { struct sock *sk = mptcp_to_sock(mptcp); if ((event->family == AF_INET6 && (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk))) || (event->family == AF_INET && (sk->sk_family == AF_INET6 && !mptcp_v6_is_v4_mapped(sk)))) continue; if (event->family == AF_INET && (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) && inet_sk(sk)->inet_saddr != event->addr.in.s_addr) continue; if (event->family == AF_INET6 && sk->sk_family == AF_INET6 && !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6)) continue; /* Reinject, so that pf = 1 and so we * won't select this one as the * ack-sock. */ mptcp_reinject_data(sk, 0); /* We announce the removal of this id */ announce_remove_addr(tcp_sk(sk)->mptcp->loc_id, meta_sk); mptcp_sub_force_close(sk); found = true; } if (found) goto next; /* The id may have been given by the event, * matching on a local address. And it may not * have matched on one of the above sockets, * because the client never created a subflow. * So, we have to finally remove it here. */ if (id >= 0) { u8 loc_id = id + (event->family == AF_INET ? 1 : MPTCP_MAX_ADDR); announce_remove_addr(loc_id, meta_sk); } } if (event->code == MPTCP_EVENT_MOD) { struct mptcp_tcp_sock *mptcp; mptcp_for_each_sub(mpcb, mptcp) { struct sock *sk = mptcp_to_sock(mptcp); struct tcp_sock *tp = tcp_sk(sk); if (event->family == AF_INET && (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) && inet_sk(sk)->inet_saddr == event->addr.in.s_addr) { if (event->low_prio != tp->mptcp->low_prio) { tp->mptcp->send_mp_prio = 1; tp->mptcp->low_prio = event->low_prio; tcp_send_ack(sk); } } if (event->family == AF_INET6 && sk->sk_family == AF_INET6 && !ipv6_addr_equal(&inet6_sk(sk)->saddr, &event->addr.in6)) { if (event->low_prio != tp->mptcp->low_prio) { tp->mptcp->send_mp_prio = 1; tp->mptcp->low_prio = event->low_prio; tcp_send_ack(sk); } } } } next: bh_unlock_sock(meta_sk); sock_put(meta_sk); } rcu_read_unlock_bh(); } goto next_event; } static struct mptcp_addr_event *lookup_similar_event(const struct net *net, const struct mptcp_addr_event *event) { struct mptcp_addr_event *eventq; struct mptcp_fm_ns *fm_ns = fm_get_ns(net); list_for_each_entry(eventq, &fm_ns->events, list) { if (eventq->family != event->family) continue; if (eventq->if_idx != event->if_idx) continue; if (event->family == AF_INET) { if (eventq->addr.in.s_addr == event->addr.in.s_addr) return eventq; } else { if (ipv6_addr_equal(&eventq->addr.in6, &event->addr.in6)) return eventq; } } return NULL; } /* We already hold the net-namespace MPTCP-lock */ static void add_pm_event(struct net *net, const struct mptcp_addr_event *event) { struct mptcp_addr_event *eventq = lookup_similar_event(net, event); struct mptcp_fm_ns *fm_ns = fm_get_ns(net); if (eventq) { switch (event->code) { case MPTCP_EVENT_DEL: mptcp_debug("%s del old_code %u\n", __func__, eventq->code); list_del(&eventq->list); kfree(eventq); break; case MPTCP_EVENT_ADD: mptcp_debug("%s add old_code %u\n", __func__, eventq->code); eventq->low_prio = event->low_prio; eventq->code = MPTCP_EVENT_ADD; return; case MPTCP_EVENT_MOD: mptcp_debug("%s mod old_code %u\n", __func__, eventq->code); eventq->low_prio = event->low_prio; eventq->code = MPTCP_EVENT_MOD; return; } } /* OK, we have to add the new address to the wait queue */ eventq = kmemdup(event, sizeof(struct mptcp_addr_event), GFP_ATOMIC); if (!eventq) return; list_add_tail(&eventq->list, &fm_ns->events); /* Create work-queue */ if (!delayed_work_pending(&fm_ns->address_worker)) queue_delayed_work(mptcp_wq, &fm_ns->address_worker, msecs_to_jiffies(500)); } static void addr4_event_handler(const struct in_ifaddr *ifa, unsigned long event, struct net *net) { const struct net_device *netdev = ifa->ifa_dev->dev; struct mptcp_fm_ns *fm_ns = fm_get_ns(net); struct mptcp_addr_event mpevent; if (ifa->ifa_scope > RT_SCOPE_LINK || ipv4_is_loopback(ifa->ifa_local)) return; spin_lock_bh(&fm_ns->local_lock); mpevent.family = AF_INET; mpevent.addr.in.s_addr = ifa->ifa_local; mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0; mpevent.if_idx = netdev->ifindex; if (event == NETDEV_DOWN || !netif_running(netdev) || (netdev->flags & IFF_NOMULTIPATH) || !(netdev->flags & IFF_UP)) mpevent.code = MPTCP_EVENT_DEL; else if (event == NETDEV_UP) mpevent.code = MPTCP_EVENT_ADD; else if (event == NETDEV_CHANGE) mpevent.code = MPTCP_EVENT_MOD; mptcp_debug("%s created event for %pI4, code %u prio %u idx %u\n", __func__, &ifa->ifa_local, mpevent.code, mpevent.low_prio, mpevent.if_idx); add_pm_event(net, &mpevent); spin_unlock_bh(&fm_ns->local_lock); return; } /* React on IPv4-addr add/rem-events */ static int mptcp_pm_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { const struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct net *net = dev_net(ifa->ifa_dev->dev); if (!(event == NETDEV_UP || event == NETDEV_DOWN || event == NETDEV_CHANGE)) return NOTIFY_DONE; addr4_event_handler(ifa, event, net); return NOTIFY_DONE; } static struct notifier_block mptcp_pm_inetaddr_notifier = { .notifier_call = mptcp_pm_inetaddr_event, }; #if IS_ENABLED(CONFIG_IPV6) static int inet6_addr_event(struct notifier_block *this, unsigned long event, void *ptr); static void addr6_event_handler(const struct inet6_ifaddr *ifa, unsigned long event, struct net *net) { const struct net_device *netdev = ifa->idev->dev; int addr_type = ipv6_addr_type(&ifa->addr); struct mptcp_fm_ns *fm_ns = fm_get_ns(net); struct mptcp_addr_event mpevent; if (ifa->scope > RT_SCOPE_LINK || addr_type == IPV6_ADDR_ANY || (addr_type & IPV6_ADDR_LOOPBACK) || (addr_type & IPV6_ADDR_LINKLOCAL)) return; spin_lock_bh(&fm_ns->local_lock); mpevent.family = AF_INET6; mpevent.addr.in6 = ifa->addr; mpevent.low_prio = (netdev->flags & IFF_MPBACKUP) ? 1 : 0; mpevent.if_idx = netdev->ifindex; if (event == NETDEV_DOWN || !netif_running(netdev) || (netdev->flags & IFF_NOMULTIPATH) || !(netdev->flags & IFF_UP)) mpevent.code = MPTCP_EVENT_DEL; else if (event == NETDEV_UP) mpevent.code = MPTCP_EVENT_ADD; else if (event == NETDEV_CHANGE) mpevent.code = MPTCP_EVENT_MOD; mptcp_debug("%s created event for %pI6, code %u prio %u idx %u\n", __func__, &ifa->addr, mpevent.code, mpevent.low_prio, mpevent.if_idx); add_pm_event(net, &mpevent); spin_unlock_bh(&fm_ns->local_lock); return; } /* React on IPv6-addr add/rem-events */ static int inet6_addr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa6 = (struct inet6_ifaddr *)ptr; struct net *net = dev_net(ifa6->idev->dev); if (!(event == NETDEV_UP || event == NETDEV_DOWN || event == NETDEV_CHANGE)) return NOTIFY_DONE; addr6_event_handler(ifa6, event, net); return NOTIFY_DONE; } static struct notifier_block inet6_addr_notifier = { .notifier_call = inet6_addr_event, }; #endif /* React on ifup/down-events */ static int netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; #if IS_ENABLED(CONFIG_IPV6) struct inet6_dev *in6_dev; #endif if (!(event == NETDEV_UP || event == NETDEV_DOWN || event == NETDEV_CHANGE)) return NOTIFY_DONE; rcu_read_lock(); in_dev = __in_dev_get_rtnl(dev); if (in_dev) { for_ifa(in_dev) { mptcp_pm_inetaddr_event(NULL, event, ifa); } endfor_ifa(in_dev); } #if IS_ENABLED(CONFIG_IPV6) in6_dev = __in6_dev_get(dev); if (in6_dev) { struct inet6_ifaddr *ifa6; list_for_each_entry(ifa6, &in6_dev->addr_list, if_list) inet6_addr_event(NULL, event, ifa6); } #endif rcu_read_unlock(); return NOTIFY_DONE; } static struct notifier_block mptcp_pm_netdev_notifier = { .notifier_call = netdev_event, }; static void full_mesh_add_raddr(struct mptcp_cb *mpcb, const union inet_addr *addr, sa_family_t family, __be16 port, u8 id) { if (family == AF_INET) mptcp_addv4_raddr(mpcb, &addr->in, port, id); else mptcp_addv6_raddr(mpcb, &addr->in6, port, id); } static void full_mesh_new_session(const struct sock *meta_sk) { struct mptcp_loc_addr *mptcp_local; struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); struct tcp_sock *master_tp = tcp_sk(mpcb->master_sk); int i, index, if_idx = 0; union inet_addr saddr, daddr; sa_family_t family = AF_INET; bool meta_v4 = meta_sk->sk_family == AF_INET; /* Init local variables necessary for the rest */ if (meta_sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(meta_sk)) { saddr.ip = inet_sk(meta_sk)->inet_saddr; daddr.ip = inet_sk(meta_sk)->inet_daddr; if_idx = mpcb->master_sk->sk_bound_dev_if; family = AF_INET; #if IS_ENABLED(CONFIG_IPV6) } else { saddr.in6 = inet6_sk(meta_sk)->saddr; daddr.in6 = meta_sk->sk_v6_daddr; if_idx = mpcb->master_sk->sk_bound_dev_if; family = AF_INET6; #endif } if (inet_sk(meta_sk)->transparent) if_idx = inet_sk(meta_sk)->rx_dst_ifindex; rcu_read_lock_bh(); mptcp_local = rcu_dereference(fm_ns->local); if (inet_sk(meta_sk)->transparent) index = mptcp_find_address_transp(mptcp_local, family, if_idx); else index = mptcp_find_address(mptcp_local, family, &saddr, if_idx); if (index < 0) goto fallback; if (family == AF_INET) master_tp->mptcp->low_prio = mptcp_local->locaddr4[index].low_prio; else master_tp->mptcp->low_prio = mptcp_local->locaddr6[index].low_prio; master_tp->mptcp->send_mp_prio = master_tp->mptcp->low_prio; full_mesh_add_raddr(mpcb, &daddr, family, 0, 0); mptcp_set_init_addr_bit(mpcb, &daddr, family, index); /* Initialize workqueue-struct */ INIT_WORK(&fmp->subflow_work, create_subflow_worker); INIT_DELAYED_WORK(&fmp->subflow_retry_work, retry_subflow_worker); fmp->mpcb = mpcb; if (!meta_v4 && meta_sk->sk_ipv6only) goto skip_ipv4; /* Look for the address among the local addresses */ mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { __be32 ifa_address = mptcp_local->locaddr4[i].addr.s_addr; /* We do not need to announce the initial subflow's address again */ if (family == AF_INET && (!if_idx || mptcp_local->locaddr4[i].if_idx == if_idx) && saddr.ip == ifa_address) continue; fmp->add_addr++; mpcb->addr_signal = 1; } skip_ipv4: #if IS_ENABLED(CONFIG_IPV6) /* skip IPv6 addresses if meta-socket is IPv4 */ if (meta_v4) goto skip_ipv6; mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { const struct in6_addr *ifa6 = &mptcp_local->locaddr6[i].addr; /* We do not need to announce the initial subflow's address again */ if (family == AF_INET6 && (!if_idx || mptcp_local->locaddr6[i].if_idx == if_idx) && ipv6_addr_equal(&saddr.in6, ifa6)) continue; fmp->add_addr++; mpcb->addr_signal = 1; } skip_ipv6: #endif rcu_read_unlock_bh(); if (family == AF_INET) fmp->announced_addrs_v4 |= (1 << index); else fmp->announced_addrs_v6 |= (1 << index); for (i = fmp->add_addr; i && fmp->add_addr; i--) tcp_send_ack(mpcb->master_sk); if (master_tp->mptcp->send_mp_prio) tcp_send_ack(mpcb->master_sk); return; fallback: rcu_read_unlock_bh(); mptcp_fallback_default(mpcb); return; } static void full_mesh_create_subflows(struct sock *meta_sk) { struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); if (mptcp_in_infinite_mapping_weak(mpcb) || mpcb->server_side || sock_flag(meta_sk, SOCK_DEAD)) return; if (mpcb->master_sk && !tcp_sk(mpcb->master_sk)->mptcp->fully_established) return; if (!work_pending(&fmp->subflow_work)) { sock_hold(meta_sk); refcount_inc(&mpcb->mpcb_refcnt); queue_work(mptcp_wq, &fmp->subflow_work); } } /* Called upon release_sock, if the socket was owned by the user during * a path-management event. */ static void full_mesh_release_sock(struct sock *meta_sk) { struct mptcp_loc_addr *mptcp_local; struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); bool meta_v4 = meta_sk->sk_family == AF_INET; struct mptcp_tcp_sock *mptcp; struct hlist_node *tmp; int i; rcu_read_lock_bh(); mptcp_local = rcu_dereference(fm_ns->local); if (!meta_v4 && meta_sk->sk_ipv6only) goto skip_ipv4; /* First, detect modifications or additions */ mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { struct in_addr ifa = mptcp_local->locaddr4[i].addr; bool found = false; mptcp_for_each_sub(mpcb, mptcp) { struct sock *sk = mptcp_to_sock(mptcp); struct tcp_sock *tp = tcp_sk(sk); if (sk->sk_family == AF_INET6 && !mptcp_v6_is_v4_mapped(sk)) continue; if (inet_sk(sk)->inet_saddr != ifa.s_addr) continue; found = true; if (mptcp_local->locaddr4[i].low_prio != tp->mptcp->low_prio) { tp->mptcp->send_mp_prio = 1; tp->mptcp->low_prio = mptcp_local->locaddr4[i].low_prio; tcp_send_ack(sk); } } if (!found) { struct sock *sk; fmp->add_addr++; mpcb->addr_signal = 1; sk = mptcp_select_ack_sock(meta_sk); if (sk) tcp_send_ack(sk); full_mesh_create_subflows(meta_sk); } } skip_ipv4: #if IS_ENABLED(CONFIG_IPV6) /* skip IPv6 addresses if meta-socket is IPv4 */ if (meta_v4) goto removal; mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { struct in6_addr ifa = mptcp_local->locaddr6[i].addr; bool found = false; mptcp_for_each_sub(mpcb, mptcp) { struct sock *sk = mptcp_to_sock(mptcp); struct tcp_sock *tp = tcp_sk(sk); if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) continue; if (!ipv6_addr_equal(&inet6_sk(sk)->saddr, &ifa)) continue; found = true; if (mptcp_local->locaddr6[i].low_prio != tp->mptcp->low_prio) { tp->mptcp->send_mp_prio = 1; tp->mptcp->low_prio = mptcp_local->locaddr6[i].low_prio; tcp_send_ack(sk); } } if (!found) { struct sock *sk; fmp->add_addr++; mpcb->addr_signal = 1; sk = mptcp_select_ack_sock(meta_sk); if (sk) tcp_send_ack(sk); full_mesh_create_subflows(meta_sk); } } removal: #endif /* Now, detect address-removals */ mptcp_for_each_sub_safe(mpcb, mptcp, tmp) { struct sock *sk = mptcp_to_sock(mptcp); bool shall_remove = true; if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) { mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { if (inet_sk(sk)->inet_saddr == mptcp_local->locaddr4[i].addr.s_addr) { shall_remove = false; break; } } } else { mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { if (ipv6_addr_equal(&inet6_sk(sk)->saddr, &mptcp_local->locaddr6[i].addr)) { shall_remove = false; break; } } } if (shall_remove) { /* Reinject, so that pf = 1 and so we * won't select this one as the * ack-sock. */ mptcp_reinject_data(sk, 0); announce_remove_addr(tcp_sk(sk)->mptcp->loc_id, meta_sk); mptcp_sub_force_close(sk); } } /* Just call it optimistically. It actually cannot do any harm */ update_addr_bitfields(meta_sk, mptcp_local); rcu_read_unlock_bh(); } static int full_mesh_get_local_id(const struct sock *meta_sk, sa_family_t family, union inet_addr *addr, bool *low_prio) { struct mptcp_loc_addr *mptcp_local; const struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(meta_sk)); int index, id = -1; /* Handle the backup-flows */ rcu_read_lock_bh(); mptcp_local = rcu_dereference(fm_ns->local); index = mptcp_find_address(mptcp_local, family, addr, 0); if (index != -1) { if (family == AF_INET) { id = mptcp_local->locaddr4[index].loc4_id; *low_prio = mptcp_local->locaddr4[index].low_prio; } else { id = mptcp_local->locaddr6[index].loc6_id; *low_prio = mptcp_local->locaddr6[index].low_prio; } } rcu_read_unlock_bh(); return id; } static void full_mesh_addr_signal(struct sock *sk, unsigned *size, struct tcp_out_options *opts, struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); struct mptcp_cb *mpcb = tp->mpcb; struct sock *meta_sk = mpcb->meta_sk; struct fullmesh_priv *fmp = fullmesh_get_priv(mpcb); struct mptcp_loc_addr *mptcp_local; struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk)); int remove_addr_len; u8 unannouncedv4 = 0, unannouncedv6 = 0; bool meta_v4 = meta_sk->sk_family == AF_INET; mpcb->addr_signal = 0; if (likely(!fmp->add_addr)) goto remove_addr; rcu_read_lock_bh(); mptcp_local = rcu_dereference(fm_ns->local); if (!meta_v4 && meta_sk->sk_ipv6only) goto skip_ipv4; /* IPv4 */ unannouncedv4 = (~fmp->announced_addrs_v4) & mptcp_local->loc4_bits; if (unannouncedv4 && ((mpcb->mptcp_ver == MPTCP_VERSION_0 && MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN) || (mpcb->mptcp_ver >= MPTCP_VERSION_1 && MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1))) { int ind = mptcp_find_free_index(~unannouncedv4); opts->options |= OPTION_MPTCP; opts->mptcp_options |= OPTION_ADD_ADDR; opts->add_addr4.addr_id = mptcp_local->locaddr4[ind].loc4_id; opts->add_addr4.addr = mptcp_local->locaddr4[ind].addr; opts->add_addr_v4 = 1; if (mpcb->mptcp_ver >= MPTCP_VERSION_1) { u8 mptcp_hash_mac[20]; u8 no_key[8]; *(u64 *)no_key = 0; mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key, (u8 *)no_key, (u32 *)mptcp_hash_mac, 2, 1, (u8 *)&mptcp_local->locaddr4[ind].loc4_id, 4, (u8 *)&opts->add_addr4.addr.s_addr); opts->add_addr4.trunc_mac = *(u64 *)mptcp_hash_mac; } if (skb) { fmp->announced_addrs_v4 |= (1 << ind); fmp->add_addr--; } if (mpcb->mptcp_ver < MPTCP_VERSION_1) *size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN; if (mpcb->mptcp_ver >= MPTCP_VERSION_1) *size += MPTCP_SUB_LEN_ADD_ADDR4_ALIGN_VER1; goto skip_ipv6; } if (meta_v4) goto skip_ipv6; skip_ipv4: /* IPv6 */ unannouncedv6 = (~fmp->announced_addrs_v6) & mptcp_local->loc6_bits; if (unannouncedv6 && ((mpcb->mptcp_ver == MPTCP_VERSION_0 && MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN) || (mpcb->mptcp_ver >= MPTCP_VERSION_1 && MAX_TCP_OPTION_SPACE - *size >= MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1))) { int ind = mptcp_find_free_index(~unannouncedv6); opts->options |= OPTION_MPTCP; opts->mptcp_options |= OPTION_ADD_ADDR; opts->add_addr6.addr_id = mptcp_local->locaddr6[ind].loc6_id; opts->add_addr6.addr = mptcp_local->locaddr6[ind].addr; opts->add_addr_v6 = 1; if (mpcb->mptcp_ver >= MPTCP_VERSION_1) { u8 mptcp_hash_mac[20]; u8 no_key[8]; *(u64 *)no_key = 0; mptcp_hmac_sha1((u8 *)&mpcb->mptcp_loc_key, (u8 *)no_key, (u32 *)mptcp_hash_mac, 2, 1, (u8 *)&mptcp_local->locaddr6[ind].loc6_id, 16, (u8 *)&opts->add_addr6.addr.s6_addr); opts->add_addr6.trunc_mac = *(u64 *)mptcp_hash_mac; } if (skb) { fmp->announced_addrs_v6 |= (1 << ind); fmp->add_addr--; } if (mpcb->mptcp_ver < MPTCP_VERSION_1) *size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN; if (mpcb->mptcp_ver >= MPTCP_VERSION_1) *size += MPTCP_SUB_LEN_ADD_ADDR6_ALIGN_VER1; } skip_ipv6: rcu_read_unlock_bh(); if (!unannouncedv4 && !unannouncedv6 && skb) fmp->add_addr--; remove_addr: if (likely(!fmp->remove_addrs)) goto exit; remove_addr_len = mptcp_sub_len_remove_addr_align(fmp->remove_addrs); if (MAX_TCP_OPTION_SPACE - *size < remove_addr_len) goto exit; opts->options |= OPTION_MPTCP; opts->mptcp_options |= OPTION_REMOVE_ADDR; opts->remove_addrs = fmp->remove_addrs; *size += remove_addr_len; if (skb) fmp->remove_addrs = 0; exit: mpcb->addr_signal = !!(fmp->add_addr || fmp->remove_addrs); } static void full_mesh_rem_raddr(struct mptcp_cb *mpcb, u8 rem_id) { mptcp_v4_rem_raddress(mpcb, rem_id); mptcp_v6_rem_raddress(mpcb, rem_id); } static void full_mesh_delete_subflow(struct sock *sk) { struct fullmesh_priv *fmp = fullmesh_get_priv(tcp_sk(sk)->mpcb); struct mptcp_fm_ns *fm_ns = fm_get_ns(sock_net(sk)); struct sock *meta_sk = mptcp_meta_sk(sk); struct mptcp_loc_addr *mptcp_local; int index, i; if (!create_on_err) return; if (!mptcp_can_new_subflow(meta_sk)) return; rcu_read_lock_bh(); mptcp_local = rcu_dereference_bh(fm_ns->local); if (sk->sk_family == AF_INET || mptcp_v6_is_v4_mapped(sk)) { union inet_addr saddr; saddr.ip = inet_sk(sk)->inet_saddr; index = mptcp_find_address(mptcp_local, AF_INET, &saddr, sk->sk_bound_dev_if); if (index < 0) goto out; mptcp_for_each_bit_set(fmp->rem4_bits, i) { struct fullmesh_rem4 *rem4 = &fmp->remaddr4[i]; if (rem4->addr.s_addr != sk->sk_daddr) continue; if (rem4->port && rem4->port != inet_sk(sk)->inet_dport) continue; rem4->bitfield &= ~(1 << index); } #if IS_ENABLED(CONFIG_IPV6) } else { union inet_addr saddr; saddr.in6 = inet6_sk(sk)->saddr; index = mptcp_find_address(mptcp_local, AF_INET6, &saddr, sk->sk_bound_dev_if); if (index < 0) goto out; mptcp_for_each_bit_set(fmp->rem6_bits, i) { struct fullmesh_rem6 *rem6 = &fmp->remaddr6[i]; if (!ipv6_addr_equal(&rem6->addr, &sk->sk_v6_daddr)) continue; if (rem6->port && rem6->port != inet_sk(sk)->inet_dport) continue; rem6->bitfield &= ~(1 << index); } #endif } out: rcu_read_unlock_bh(); /* re-schedule the creation of failed subflows */ if (tcp_sk(sk)->mptcp->sk_err == ETIMEDOUT || sk->sk_err == ETIMEDOUT) full_mesh_create_subflows(meta_sk); } /* Output /proc/net/mptcp_fullmesh */ static int mptcp_fm_seq_show(struct seq_file *seq, void *v) { const struct net *net = seq->private; struct mptcp_loc_addr *mptcp_local; const struct mptcp_fm_ns *fm_ns = fm_get_ns(net); int i; seq_printf(seq, "Index, Address-ID, Backup, IP-address, if-idx\n"); rcu_read_lock_bh(); mptcp_local = rcu_dereference(fm_ns->local); seq_printf(seq, "IPv4, next v4-index: %u\n", mptcp_local->next_v4_index); mptcp_for_each_bit_set(mptcp_local->loc4_bits, i) { struct mptcp_loc4 *loc4 = &mptcp_local->locaddr4[i]; seq_printf(seq, "%u, %u, %u, %pI4 %u\n", i, loc4->loc4_id, loc4->low_prio, &loc4->addr, loc4->if_idx); } seq_printf(seq, "IPv6, next v6-index: %u\n", mptcp_local->next_v6_index); mptcp_for_each_bit_set(mptcp_local->loc6_bits, i) { struct mptcp_loc6 *loc6 = &mptcp_local->locaddr6[i]; seq_printf(seq, "%u, %u, %u, %pI6 %u\n", i, loc6->loc6_id, loc6->low_prio, &loc6->addr, loc6->if_idx); } rcu_read_unlock_bh(); return 0; } static int mptcp_fm_init_net(struct net *net) { struct mptcp_loc_addr *mptcp_local; struct mptcp_fm_ns *fm_ns; int err = 0; fm_ns = kzalloc(sizeof(*fm_ns), GFP_KERNEL); if (!fm_ns) return -ENOBUFS; mptcp_local = kzalloc(sizeof(*mptcp_local), GFP_KERNEL); if (!mptcp_local) { err = -ENOBUFS; goto err_mptcp_local; } if (!proc_create_net_single("mptcp_fullmesh", S_IRUGO, net->proc_net, mptcp_fm_seq_show, NULL)) { err = -ENOMEM; goto err_seq_fops; } mptcp_local->next_v4_index = 1; rcu_assign_pointer(fm_ns->local, mptcp_local); INIT_DELAYED_WORK(&fm_ns->address_worker, mptcp_address_worker); INIT_LIST_HEAD(&fm_ns->events); spin_lock_init(&fm_ns->local_lock); fm_ns->net = net; net->mptcp.path_managers[MPTCP_PM_FULLMESH] = fm_ns; return 0; err_seq_fops: kfree(mptcp_local); err_mptcp_local: kfree(fm_ns); return err; } static void mptcp_fm_exit_net(struct net *net) { struct mptcp_addr_event *eventq, *tmp; struct mptcp_fm_ns *fm_ns; struct mptcp_loc_addr *mptcp_local; fm_ns = fm_get_ns(net); cancel_delayed_work_sync(&fm_ns->address_worker); rcu_read_lock_bh(); mptcp_local = rcu_dereference_bh(fm_ns->local); kfree_rcu(mptcp_local, rcu); spin_lock(&fm_ns->local_lock); list_for_each_entry_safe(eventq, tmp, &fm_ns->events, list) { list_del(&eventq->list); kfree(eventq); } spin_unlock(&fm_ns->local_lock); rcu_read_unlock_bh(); remove_proc_entry("mptcp_fullmesh", net->proc_net); kfree(fm_ns); } static struct pernet_operations full_mesh_net_ops = { .init = mptcp_fm_init_net, .exit = mptcp_fm_exit_net, }; static struct mptcp_pm_ops full_mesh __read_mostly = { .new_session = full_mesh_new_session, .release_sock = full_mesh_release_sock, .fully_established = full_mesh_create_subflows, .new_remote_address = full_mesh_create_subflows, .get_local_id = full_mesh_get_local_id, .addr_signal = full_mesh_addr_signal, .add_raddr = full_mesh_add_raddr, .rem_raddr = full_mesh_rem_raddr, .delete_subflow = full_mesh_delete_subflow, .name = "fullmesh", .owner = THIS_MODULE, }; /* General initialization of MPTCP_PM */ static int __init full_mesh_register(void) { int ret; BUILD_BUG_ON(sizeof(struct fullmesh_priv) > MPTCP_PM_SIZE); ret = register_pernet_subsys(&full_mesh_net_ops); if (ret) goto out; ret = register_inetaddr_notifier(&mptcp_pm_inetaddr_notifier); if (ret) goto err_reg_inetaddr; ret = register_netdevice_notifier(&mptcp_pm_netdev_notifier); if (ret) goto err_reg_netdev; #if IS_ENABLED(CONFIG_IPV6) ret = register_inet6addr_notifier(&inet6_addr_notifier); if (ret) goto err_reg_inet6addr; #endif ret = mptcp_register_path_manager(&full_mesh); if (ret) goto err_reg_pm; out: return ret; err_reg_pm: #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&inet6_addr_notifier); err_reg_inet6addr: #endif unregister_netdevice_notifier(&mptcp_pm_netdev_notifier); err_reg_netdev: unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier); err_reg_inetaddr: unregister_pernet_subsys(&full_mesh_net_ops); goto out; } static void full_mesh_unregister(void) { #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&inet6_addr_notifier); #endif unregister_netdevice_notifier(&mptcp_pm_netdev_notifier); unregister_inetaddr_notifier(&mptcp_pm_inetaddr_notifier); unregister_pernet_subsys(&full_mesh_net_ops); mptcp_unregister_path_manager(&full_mesh); } module_init(full_mesh_register); module_exit(full_mesh_unregister); MODULE_AUTHOR("Christoph Paasch"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Full-Mesh MPTCP"); MODULE_VERSION("0.88"); #endif