--- zzzz-none-000/linux-5.4.213/net/ipv6/ip6_tunnel.c 2022-09-15 10:04:56.000000000 +0000 +++ alder-5690pro-762/linux-5.4.213/net/ipv6/ip6_tunnel.c 2024-08-14 09:02:13.000000000 +0000 @@ -11,6 +11,9 @@ * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c * * RFC 2473 + * + * Changes: + * Steven Barth : MAP-E FMR support */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -67,9 +70,9 @@ module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +static u32 HASH(const struct in6_addr *addr) { - u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + u32 hash = ipv6_addr_hash(addr); return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); } @@ -119,6 +122,24 @@ return &dev->stats; } +/* + * Update offload stats + */ +void ip6_update_offload_stats(struct net_device *dev, void *ptr) +{ + struct pcpu_sw_netstats *tstats = per_cpu_ptr(dev->tstats, 0); + const struct pcpu_sw_netstats *offload_stats = + (struct pcpu_sw_netstats *)ptr; + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_packets += offload_stats->tx_packets; + tstats->tx_bytes += offload_stats->tx_bytes; + tstats->rx_packets += offload_stats->rx_packets; + tstats->rx_bytes += offload_stats->rx_bytes; + u64_stats_update_end(&tstats->syncp); +} +EXPORT_SYMBOL(ip6_update_offload_stats); + /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @remote: the address of the tunnel exit-point @@ -136,20 +157,29 @@ static struct ip6_tnl * ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) { - unsigned int hash = HASH(remote, local); + unsigned int hash = HASH(local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct in6_addr any; + struct __ip6_tnl_fmr *fmr; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { - if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr) && - (t->dev->flags & IFF_UP)) + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !(t->dev->flags & IFF_UP)) + continue; + + if (ipv6_addr_equal(remote, &t->parms.raddr)) return t; + + for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) { + if (ipv6_prefix_equal(remote, &fmr->ip6_prefix, + fmr->ip6_prefix_len)) + return t; + } } memset(&any, 0, sizeof(any)); - hash = HASH(&any, local); + hash = HASH(local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_any(&t->parms.raddr) && @@ -157,7 +187,7 @@ return t; } - hash = HASH(remote, &any); + hash = HASH(&any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && ipv6_addr_any(&t->parms.laddr) && @@ -197,7 +227,7 @@ if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; - h = HASH(remote, local); + h = HASH(local); } return &ip6n->tnls[prio][h]; } @@ -377,6 +407,12 @@ struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + while (t->parms.fmrs) { + struct __ip6_tnl_fmr *next = t->parms.fmrs->next; + kfree(t->parms.fmrs); + t->parms.fmrs = next; + } + if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else @@ -766,6 +802,125 @@ } EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); +/** + * ip4ip6_fmr_calc - calculate target / source IPv6-address based on FMR + * @dest: destination IPv6 address buffer + * @skb: received socket buffer + * @fmr: MAP FMR + * @xmit: Calculate for xmit or rcv + **/ +static void ip4ip6_fmr_calc(struct in6_addr *dest, + const struct iphdr *iph, const uint8_t *end, + const struct __ip6_tnl_fmr *fmr, bool xmit, bool draft03) +{ + int psidlen = fmr->ea_len - (32 - fmr->ip4_prefix_len); + u8 *portp = NULL; + bool use_dest_addr; + const struct iphdr *dsth = iph; + + if ((u8*)dsth >= end) + return; + + /* find significant IP header */ + if (iph->protocol == IPPROTO_ICMP) { + struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4); + if (ih && ((u8*)&ih[1]) <= end && ( + ih->type == ICMP_DEST_UNREACH || + ih->type == ICMP_SOURCE_QUENCH || + ih->type == ICMP_TIME_EXCEEDED || + ih->type == ICMP_PARAMETERPROB || + ih->type == ICMP_REDIRECT)) + dsth = (const struct iphdr*)&ih[1]; + } + + /* in xmit-path use dest port by default and source port only if + this is an ICMP reply to something else; vice versa in rcv-path */ + use_dest_addr = (xmit && dsth == iph) || (!xmit && dsth != iph); + + /* get dst port */ + if (((u8 *)&dsth[1]) <= end && ( + dsth->protocol == IPPROTO_UDP || + dsth->protocol == IPPROTO_TCP || + dsth->protocol == IPPROTO_SCTP || + dsth->protocol == IPPROTO_DCCP)) { + /* for UDP, TCP, SCTP and DCCP source and dest port + follow IPv4 header directly */ + portp = ((u8*)dsth) + dsth->ihl * 4; + + if (use_dest_addr) + portp += sizeof(u16); + } else if (iph->protocol == IPPROTO_ICMP) { + struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4); + + /* use icmp identifier as port */ + if (((u8 *)ih) <= end && ( + (use_dest_addr && ( + ih->type == ICMP_ECHOREPLY || + ih->type == ICMP_TIMESTAMPREPLY || + ih->type == ICMP_INFO_REPLY || + ih->type == ICMP_ADDRESSREPLY)) || + (!use_dest_addr && ( + ih->type == ICMP_ECHO || + ih->type == ICMP_TIMESTAMP || + ih->type == ICMP_INFO_REQUEST || + ih->type == ICMP_ADDRESS) + ))) + portp = (u8*)&ih->un.echo.id; + } + + if ((portp && &portp[2] <= end) || psidlen == 0) { + int frombyte = fmr->ip6_prefix_len / 8; + int fromrem = fmr->ip6_prefix_len % 8; + int bytes = sizeof(struct in6_addr) - frombyte; + const u32 *addr = (use_dest_addr) ? &dsth->daddr : &dsth->saddr; + u64 eabits = ((u64)ntohl(*addr)) << (32 + fmr->ip4_prefix_len); + u64 t = 0; + + /* extract PSID from port and add it to eabits */ + u16 psidbits = 0; + if (psidlen > 0) { + psidbits = ((u16)portp[0]) << 8 | ((u16)portp[1]); + psidbits >>= 16 - psidlen - fmr->offset; + psidbits = (u16)(psidbits << (16 - psidlen)); + eabits |= ((u64)psidbits) << (48 - (fmr->ea_len - psidlen)); + } + + /* rewrite destination address */ + *dest = fmr->ip6_prefix; + memcpy(&dest->s6_addr[10], addr, sizeof(*addr)); + dest->s6_addr16[7] = htons(psidbits >> (16 - psidlen)); + + if (bytes > sizeof(u64)) + bytes = sizeof(u64); + + /* insert eabits */ + memcpy(&t, &dest->s6_addr[frombyte], bytes); + t = be64_to_cpu(t) & ~(((((u64)1) << fmr->ea_len) - 1) + << (64 - fmr->ea_len - fromrem)); + t = cpu_to_be64(t | (eabits >> fromrem)); + memcpy(&dest->s6_addr[frombyte], &t, bytes); + if (draft03) { + /** + * Draft03 IPv6 address format + * +--+---+---+---+---+---+---+---+---+ + * |PL| 8 16 24 32 40 48 56 | + * +--+---+---+---+---+---+---+---+---+ + * |64| u | IPv4 address |PSID |0 | + * +--+---+---+---+---+---+---+---+---+ + * Final specification IPv6 address format + * +--+---+---+---+---+---+---+---+---+ + * |PL| 8 16 24 32 40 48 56 | + * +--+---+---+---+---+---+---+---+---+ + * |64| 0 | IPv4 address |PSID | + * +--+---+---+---+---+---+---+---+---+ + * We need move last six Bytes 1 byte forward + */ + memmove(&dest->s6_addr[9], &dest->s6_addr[10], 6); + dest->s6_addr[15] = 0; + } + } +} + static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, @@ -818,6 +973,28 @@ skb_reset_network_header(skb); memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + if (tpi->proto == htons(ETH_P_IP) && tunnel->parms.fmrs && + !ipv6_addr_equal(&ipv6h->saddr, &tunnel->parms.raddr)) { + /* Packet didn't come from BR, so lookup FMR */ + struct __ip6_tnl_fmr *fmr; + struct in6_addr expected = tunnel->parms.raddr; + for (fmr = tunnel->parms.fmrs; fmr; fmr = fmr->next) + if (ipv6_prefix_equal(&ipv6h->saddr, + &fmr->ip6_prefix, fmr->ip6_prefix_len)) + break; + + /* Check that IPv6 matches IPv4 source to prevent spoofing */ + if (fmr) + ip4ip6_fmr_calc(&expected, ip_hdr(skb), + skb_tail_pointer(skb), fmr, false, + tunnel->parms.draft03); + + if (!ipv6_addr_equal(&ipv6h->saddr, &expected)) { + rcu_read_unlock(); + goto drop; + } + } + __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); err = dscp_ecn_decapsulate(tunnel, ipv6h, skb); @@ -844,6 +1021,9 @@ if (tun_dst) skb_dst_set(skb, (struct dst_entry *)tun_dst); + /* Reset the skb_iif to Tunnels interface index */ + skb->skb_iif = tunnel->dev->ifindex; + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -920,7 +1100,6 @@ rcu_read_unlock(); return ret; - drop: rcu_read_unlock(); kfree_skb(skb); @@ -958,6 +1137,7 @@ opt->ops.opt_nflen = 8; } + /** * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device @@ -1222,6 +1402,9 @@ ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; + + /* Reset the skb_iif to Tunnels interface index */ + skb->skb_iif = dev->ifindex; ip6tunnel_xmit(NULL, skb, dev); return 0; tx_err_link_failure: @@ -1244,6 +1427,7 @@ __u32 mtu; u8 tproto; int err; + struct __ip6_tnl_fmr *fmr; iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -1287,6 +1471,19 @@ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); + /* try to find matching FMR */ + for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) { + unsigned mshift = 32 - fmr->ip4_prefix_len; + if (ntohl(fmr->ip4_prefix.s_addr) >> mshift == + ntohl(ip_hdr(skb)->daddr) >> mshift) + break; + } + + /* change dstaddr according to FMR */ + if (fmr) + ip4ip6_fmr_calc(&fl6.daddr, ip_hdr(skb), skb_tail_pointer(skb), fmr, + true, t->parms.draft03); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1375,6 +1572,7 @@ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1504,6 +1702,14 @@ t->parms.link = p->link; t->parms.proto = p->proto; t->parms.fwmark = p->fwmark; + + while (t->parms.fmrs) { + struct __ip6_tnl_fmr *next = t->parms.fmrs->next; + kfree(t->parms.fmrs); + t->parms.fmrs = next; + } + t->parms.fmrs = p->fmrs; + dst_cache_reset(&t->dst_cache); ip6_tnl_link_config(t); return 0; @@ -1542,6 +1748,7 @@ p->flowinfo = u->flowinfo; p->link = u->link; p->proto = u->proto; + p->fmrs = NULL; memcpy(p->name, u->name, sizeof(u->name)); } @@ -1783,6 +1990,7 @@ .ndo_start_xmit = ip6_tnl_start_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_stats = ip6_get_stats, .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1926,6 +2134,15 @@ return 0; } +static const struct nla_policy ip6_tnl_fmr_policy[IFLA_IPTUN_FMR_MAX + 1] = { + [IFLA_IPTUN_FMR_IP6_PREFIX] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_FMR_IP4_PREFIX] = { .len = sizeof(struct in_addr) }, + [IFLA_IPTUN_FMR_IP6_PREFIX_LEN] = { .type = NLA_U8 }, + [IFLA_IPTUN_FMR_IP4_PREFIX_LEN] = { .type = NLA_U8 }, + [IFLA_IPTUN_FMR_EA_LEN] = { .type = NLA_U8 }, + [IFLA_IPTUN_FMR_OFFSET] = { .type = NLA_U8 } +}; + static void ip6_tnl_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { @@ -1963,6 +2180,49 @@ if (data[IFLA_IPTUN_FWMARK]) parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); + + if (data[IFLA_IPTUN_DRAFT03]) + parms->draft03 = nla_get_u8(data[IFLA_IPTUN_DRAFT03]); + + if (data[IFLA_IPTUN_FMRS]) { + unsigned rem; + struct nlattr *fmr; + nla_for_each_nested(fmr, data[IFLA_IPTUN_FMRS], rem) { + struct nlattr *fmrd[IFLA_IPTUN_FMR_MAX + 1], *c; + struct __ip6_tnl_fmr *nfmr; + + nla_parse_nested_deprecated(fmrd, IFLA_IPTUN_FMR_MAX, + fmr, ip6_tnl_fmr_policy, NULL); + + if (!(nfmr = kzalloc(sizeof(*nfmr), GFP_KERNEL))) + continue; + + nfmr->offset = 6; + + if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX])) + nla_memcpy(&nfmr->ip6_prefix, fmrd[IFLA_IPTUN_FMR_IP6_PREFIX], + sizeof(nfmr->ip6_prefix)); + + if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX])) + nla_memcpy(&nfmr->ip4_prefix, fmrd[IFLA_IPTUN_FMR_IP4_PREFIX], + sizeof(nfmr->ip4_prefix)); + + if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX_LEN])) + nfmr->ip6_prefix_len = nla_get_u8(c); + + if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX_LEN])) + nfmr->ip4_prefix_len = nla_get_u8(c); + + if ((c = fmrd[IFLA_IPTUN_FMR_EA_LEN])) + nfmr->ea_len = nla_get_u8(c); + + if ((c = fmrd[IFLA_IPTUN_FMR_OFFSET])) + nfmr->offset = nla_get_u8(c); + + nfmr->next = parms->fmrs; + parms->fmrs = nfmr; + } + } } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], @@ -2078,6 +2338,12 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) { + const struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_fmr *c; + int fmrs = 0; + for (c = t->parms.fmrs; c; c = c->next) + ++fmrs; + return /* IFLA_IPTUN_LINK */ nla_total_size(4) + @@ -2107,6 +2373,24 @@ nla_total_size(0) + /* IFLA_IPTUN_FWMARK */ nla_total_size(4) + + /* IFLA_IPTUN_FMRS */ + nla_total_size(0) + + ( + /* nest */ + nla_total_size(0) + + /* IFLA_IPTUN_FMR_IP6_PREFIX */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_FMR_IP4_PREFIX */ + nla_total_size(sizeof(struct in_addr)) + + /* IFLA_IPTUN_FMR_EA_LEN */ + nla_total_size(1) + + /* IFLA_IPTUN_FMR_IP6_PREFIX_LEN */ + nla_total_size(1) + + /* IFLA_IPTUN_FMR_IP4_PREFIX_LEN */ + nla_total_size(1) + + /* IFLA_IPTUN_FMR_OFFSET */ + nla_total_size(1) + ) * fmrs + 0; } @@ -2114,6 +2398,9 @@ { struct ip6_tnl *tunnel = netdev_priv(dev); struct __ip6_tnl_parm *parm = &tunnel->parms; + struct __ip6_tnl_fmr *c; + int fmrcnt = 0; + struct nlattr *fmrs; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || @@ -2123,9 +2410,27 @@ nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) || - nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark)) + nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark) || + !(fmrs = nla_nest_start(skb, IFLA_IPTUN_FMRS))) goto nla_put_failure; + for (c = parm->fmrs; c; c = c->next) { + struct nlattr *fmr = nla_nest_start(skb, ++fmrcnt); + if (!fmr || + nla_put(skb, IFLA_IPTUN_FMR_IP6_PREFIX, + sizeof(c->ip6_prefix), &c->ip6_prefix) || + nla_put(skb, IFLA_IPTUN_FMR_IP4_PREFIX, + sizeof(c->ip4_prefix), &c->ip4_prefix) || + nla_put_u8(skb, IFLA_IPTUN_FMR_IP6_PREFIX_LEN, c->ip6_prefix_len) || + nla_put_u8(skb, IFLA_IPTUN_FMR_IP4_PREFIX_LEN, c->ip4_prefix_len) || + nla_put_u8(skb, IFLA_IPTUN_FMR_EA_LEN, c->ea_len) || + nla_put_u8(skb, IFLA_IPTUN_FMR_OFFSET, c->offset)) + goto nla_put_failure; + + nla_nest_end(skb, fmr); + } + nla_nest_end(skb, fmrs); + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || @@ -2150,6 +2455,26 @@ } EXPORT_SYMBOL(ip6_tnl_get_link_net); +bool ip6_tunnel_is_fallback_dev(struct net_device *dev) +{ + struct net *net; + struct ip6_tnl_net *ip6n; + struct net_device *fb_tnl_dev; + + net = dev_net(dev); + if (!net) + return false; + + ip6n = net_generic(net, ip6_tnl_net_id); + if (!ip6n) + return false; + + fb_tnl_dev = ip6n->fb_tnl_dev; + + return (fb_tnl_dev == dev); +} +EXPORT_SYMBOL(ip6_tunnel_is_fallback_dev); + static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, [IFLA_IPTUN_LOCAL] = { .len = sizeof(struct in6_addr) }, @@ -2165,6 +2490,7 @@ [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 }, + [IFLA_IPTUN_FMRS] = { .type = NLA_NESTED }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = {