--- zzzz-none-000/linux-3.10.107/net/ipv6/ip6_tunnel.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/ipv6/ip6_tunnel.c 2021-02-04 17:41:59.000000000 +0000 @@ -16,6 +16,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * + * Changes: + * Steven Barth : MAP-E FMR support */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -29,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -41,6 +42,7 @@ #include #include #include +#include #include #include @@ -64,15 +66,6 @@ MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); -#ifdef IP6_TNL_DEBUG -#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) -#else -#define IP6_TNL_TRACE(x...) do {;} while(0) -#endif - -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -#define IPV6_TCLASS_SHIFT 20 - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -80,11 +73,9 @@ module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +static u32 HASH(const struct in6_addr *addr) { - u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); - - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(ipv6_addr_hash(addr), HASH_SIZE_SHIFT); } static int ip6_tnl_dev_init(struct net_device *dev); @@ -103,20 +94,21 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_tstats tmp, sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { unsigned int start; - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); + start = u64_stats_fetch_begin_irq(&tstats->syncp); tmp.rx_packets = tstats->rx_packets; tmp.rx_bytes = tstats->rx_bytes; tmp.tx_packets = tstats->tx_packets; tmp.tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); sum.rx_packets += tmp.rx_packets; sum.rx_bytes += tmp.rx_bytes; @@ -131,39 +123,113 @@ } /* + * Update offload stats + */ +void ip6_update_offload_stats(struct net_device *dev, void *ptr) +{ + struct pcpu_sw_netstats *tstats = per_cpu_ptr(dev->tstats, 0); + const struct pcpu_sw_netstats *offload_stats = + (struct pcpu_sw_netstats *)ptr; + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_packets += offload_stats->tx_packets; + tstats->tx_bytes += offload_stats->tx_bytes; + tstats->rx_packets += offload_stats->rx_packets; + tstats->rx_bytes += offload_stats->rx_bytes; + u64_stats_update_end(&tstats->syncp); +} +EXPORT_SYMBOL(ip6_update_offload_stats); + +/* * Locking : hash tables are protected by RCU and RTNL */ -struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +static void ip6_tnl_per_cpu_dst_set(struct ip6_tnl_dst *idst, + struct dst_entry *dst) { - struct dst_entry *dst = t->dst_cache; + write_seqlock_bh(&idst->lock); + dst_release(rcu_dereference_protected( + idst->dst, + lockdep_is_held(&idst->lock.lock))); + if (dst) { + dst_hold(dst); + idst->cookie = rt6_get_cookie((struct rt6_info *)dst); + } else { + idst->cookie = 0; + } + rcu_assign_pointer(idst->dst, dst); + write_sequnlock_bh(&idst->lock); +} - if (dst && dst->obsolete && - dst->ops->check(dst, t->dst_cookie) == NULL) { - t->dst_cache = NULL; +struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t) +{ + struct ip6_tnl_dst *idst; + struct dst_entry *dst; + unsigned int seq; + u32 cookie; + + idst = raw_cpu_ptr(t->dst_cache); + + rcu_read_lock(); + do { + seq = read_seqbegin(&idst->lock); + dst = rcu_dereference(idst->dst); + cookie = idst->cookie; + } while (read_seqretry(&idst->lock, seq)); + + if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + dst = NULL; + rcu_read_unlock(); + + if (dst && dst->obsolete && !dst->ops->check(dst, cookie)) { + ip6_tnl_per_cpu_dst_set(idst, NULL); dst_release(dst); - return NULL; + dst = NULL; } - return dst; } -EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); +EXPORT_SYMBOL_GPL(ip6_tnl_dst_get); void ip6_tnl_dst_reset(struct ip6_tnl *t) { - dst_release(t->dst_cache); - t->dst_cache = NULL; + int i; + + for_each_possible_cpu(i) + ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); } EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); -void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst) { - struct rt6_info *rt = (struct rt6_info *) dst; - t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; - dst_release(t->dst_cache); - t->dst_cache = dst; + ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), dst); + } -EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); +EXPORT_SYMBOL_GPL(ip6_tnl_dst_set); + +void ip6_tnl_dst_destroy(struct ip6_tnl *t) +{ + if (!t->dst_cache) + return; + + ip6_tnl_dst_reset(t); + free_percpu(t->dst_cache); +} +EXPORT_SYMBOL_GPL(ip6_tnl_dst_destroy); + +int ip6_tnl_dst_init(struct ip6_tnl *t) +{ + int i; + + t->dst_cache = alloc_percpu(struct ip6_tnl_dst); + if (!t->dst_cache) + return -ENOMEM; + + for_each_possible_cpu(i) + seqlock_init(&per_cpu_ptr(t->dst_cache, i)->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(ip6_tnl_dst_init); /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses @@ -182,16 +248,44 @@ static struct ip6_tnl * ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local) { - unsigned int hash = HASH(remote, local); + unsigned int hash = HASH(local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct in6_addr any; + struct __ip6_tnl_fmr *fmr; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !(t->dev->flags & IFF_UP)) + continue; + + if (ipv6_addr_equal(remote, &t->parms.raddr)) + return t; + + for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) { + if (ipv6_prefix_equal(remote, &fmr->ip6_prefix, + fmr->ip6_prefix_len)) + return t; + } + } + + memset(&any, 0, sizeof(any)); + hash = HASH(local); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } + + hash = HASH(&any); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -220,7 +314,7 @@ if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { prio = 1; - h = HASH(remote, local); + h = HASH(local); } return &ip6n->tnls[prio][h]; } @@ -262,6 +356,9 @@ static void ip6_dev_free(struct net_device *dev) { + struct ip6_tnl *t = netdev_priv(dev); + + ip6_tnl_dst_destroy(t); free_percpu(dev->tstats); free_netdev(dev); } @@ -299,7 +396,7 @@ * Create tunnel matching given parameters. * * Return: - * created tunnel or NULL + * created tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) @@ -307,21 +404,23 @@ struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; - int err; + int err = -ENOMEM; if (p->name[0]) strlcpy(name, p->name, IFNAMSIZ); else sprintf(name, "ip6tnl%%d"); - dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup); - if (dev == NULL) + dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, + ip6_tnl_dev_setup); + if (!dev) goto failed; dev_net_set(dev, net); t = netdev_priv(dev); t->parms = *p; + t->net = dev_net(dev); err = ip6_tnl_create2(dev); if (err < 0) goto failed_free; @@ -331,7 +430,7 @@ failed_free: ip6_dev_free(dev); failed: - return NULL; + return ERR_PTR(err); } /** @@ -345,7 +444,7 @@ * tunnel device is created and registered for use. * * Return: - * matching tunnel or NULL + * matching tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, @@ -361,11 +460,15 @@ (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) + ipv6_addr_equal(remote, &t->parms.raddr)) { + if (create) + return ERR_PTR(-EEXIST); + return t; + } } if (!create) - return NULL; + return ERR_PTR(-ENODEV); return ip6_tnl_create(net, p); } @@ -381,9 +484,15 @@ ip6_tnl_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + while (t->parms.fmrs) { + struct __ip6_tnl_fmr *next = t->parms.fmrs->next; + kfree(t->parms.fmrs); + t->parms.fmrs = next; + } + if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else @@ -480,6 +589,7 @@ int rel_msg = 0; u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; + u8 tproto; __u32 rel_info = 0; __u16 len; int err = -ENOENT; @@ -489,11 +599,12 @@ processing of the error. */ rcu_read_lock(); - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, - &ipv6h->saddr)) == NULL) + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); + if (!t) goto out; - if (t->parms.proto != ipproto && t->parms.proto != 0) + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) goto out; err = 0; @@ -503,14 +614,14 @@ struct ipv6_tlv_tnl_enc_lim *tel; __u32 mtu; case ICMPV6_DEST_UNREACH: - net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", - t->parms.name); + net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); rel_msg = 1; break; case ICMPV6_TIME_EXCEED: if ((*code) == ICMPV6_EXC_HOPLIMIT) { - net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", - t->parms.name); + net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); rel_msg = 1; } break; @@ -522,13 +633,13 @@ if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { - net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", - t->parms.name); + net_dbg_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); rel_msg = 1; } } else { - net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", - t->parms.name); + net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); } break; case ICMPV6_PKT_TOOBIG: @@ -537,7 +648,8 @@ mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { + len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); + if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; @@ -758,7 +870,7 @@ { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; - struct net *net = dev_net(t->dev); + struct net *net = t->net; if ((p->flags & IP6_TNL_F_CAP_RCV) || ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && @@ -777,6 +889,127 @@ } EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); + +/** + * ip4ip6_fmr_calc - calculate target / source IPv6-address based on FMR + * @dest: destination IPv6 address buffer + * @skb: received socket buffer + * @fmr: MAP FMR + * @xmit: Calculate for xmit or rcv + **/ +static void ip4ip6_fmr_calc(struct in6_addr *dest, + const struct iphdr *iph, const uint8_t *end, + const struct __ip6_tnl_fmr *fmr, bool xmit, bool draft03) +{ + int psidlen = fmr->ea_len - (32 - fmr->ip4_prefix_len); + u8 *portp = NULL; + bool use_dest_addr; + const struct iphdr *dsth = iph; + + if ((u8*)dsth >= end) + return; + + /* find significant IP header */ + if (iph->protocol == IPPROTO_ICMP) { + struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4); + if (ih && ((u8*)&ih[1]) <= end && ( + ih->type == ICMP_DEST_UNREACH || + ih->type == ICMP_SOURCE_QUENCH || + ih->type == ICMP_TIME_EXCEEDED || + ih->type == ICMP_PARAMETERPROB || + ih->type == ICMP_REDIRECT)) + dsth = (const struct iphdr*)&ih[1]; + } + + /* in xmit-path use dest port by default and source port only if + this is an ICMP reply to something else; vice versa in rcv-path */ + use_dest_addr = (xmit && dsth == iph) || (!xmit && dsth != iph); + + /* get dst port */ + if (((u8 *)&dsth[1]) <= end && ( + dsth->protocol == IPPROTO_UDP || + dsth->protocol == IPPROTO_TCP || + dsth->protocol == IPPROTO_SCTP || + dsth->protocol == IPPROTO_DCCP)) { + /* for UDP, TCP, SCTP and DCCP source and dest port + follow IPv4 header directly */ + portp = ((u8*)dsth) + dsth->ihl * 4; + + if (use_dest_addr) + portp += sizeof(u16); + } else if (iph->protocol == IPPROTO_ICMP) { + struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4); + + /* use icmp identifier as port */ + if (((u8 *)ih) <= end && ( + (use_dest_addr && ( + ih->type == ICMP_ECHOREPLY || + ih->type == ICMP_TIMESTAMPREPLY || + ih->type == ICMP_INFO_REPLY || + ih->type == ICMP_ADDRESSREPLY)) || + (!use_dest_addr && ( + ih->type == ICMP_ECHO || + ih->type == ICMP_TIMESTAMP || + ih->type == ICMP_INFO_REQUEST || + ih->type == ICMP_ADDRESS) + ))) + portp = (u8*)&ih->un.echo.id; + } + + if ((portp && &portp[2] <= end) || psidlen == 0) { + int frombyte = fmr->ip6_prefix_len / 8; + int fromrem = fmr->ip6_prefix_len % 8; + int bytes = sizeof(struct in6_addr) - frombyte; + const u32 *addr = (use_dest_addr) ? &dsth->daddr : &dsth->saddr; + u64 eabits = ((u64)ntohl(*addr)) << (32 + fmr->ip4_prefix_len); + u64 t = 0; + + /* extract PSID from port and add it to eabits */ + u16 psidbits = 0; + if (psidlen > 0) { + psidbits = ((u16)portp[0]) << 8 | ((u16)portp[1]); + psidbits >>= 16 - psidlen - fmr->offset; + psidbits = (u16)(psidbits << (16 - psidlen)); + eabits |= ((u64)psidbits) << (48 - (fmr->ea_len - psidlen)); + } + + /* rewrite destination address */ + *dest = fmr->ip6_prefix; + memcpy(&dest->s6_addr[10], addr, sizeof(*addr)); + dest->s6_addr16[7] = htons(psidbits >> (16 - psidlen)); + + if (bytes > sizeof(u64)) + bytes = sizeof(u64); + + /* insert eabits */ + memcpy(&t, &dest->s6_addr[frombyte], bytes); + t = be64_to_cpu(t) & ~(((((u64)1) << fmr->ea_len) - 1) + << (64 - fmr->ea_len - fromrem)); + t = cpu_to_be64(t | (eabits >> fromrem)); + memcpy(&dest->s6_addr[frombyte], &t, bytes); + if (draft03) { + /** + * Draft03 IPv6 address format + * +--+---+---+---+---+---+---+---+---+ + * |PL| 8 16 24 32 40 48 56 | + * +--+---+---+---+---+---+---+---+---+ + * |64| u | IPv4 address |PSID |0 | + * +--+---+---+---+---+---+---+---+---+ + * Final specification IPv6 address format + * +--+---+---+---+---+---+---+---+---+ + * |PL| 8 16 24 32 40 48 56 | + * +--+---+---+---+---+---+---+---+---+ + * |64| 0 | IPv4 address |PSID | + * +--+---+---+---+---+---+---+---+---+ + * We need move last six Bytes 1 byte forward + */ + memmove(&dest->s6_addr[9], &dest->s6_addr[10], 6); + dest->s6_addr[15] = 0; + } + } +} + + /** * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally * @skb: received socket buffer @@ -794,15 +1027,16 @@ { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 tproto; int err; rcu_read_lock(); + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + if (t) { + struct pcpu_sw_netstats *tstats; - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, - &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; - - if (t->parms.proto != ipproto && t->parms.proto != 0) { + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) { rcu_read_unlock(); goto discard; } @@ -817,14 +1051,34 @@ rcu_read_unlock(); goto discard; } - secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); skb->protocol = htons(protocol); - skb->pkt_type = PACKET_HOST; memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); + if (protocol == ETH_P_IP && + !ipv6_addr_equal(&ipv6h->saddr, &t->parms.raddr)) { + /* Packet didn't come from BR, so lookup FMR */ + struct __ip6_tnl_fmr *fmr; + struct in6_addr expected = t->parms.raddr; + for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) + if (ipv6_prefix_equal(&ipv6h->saddr, + &fmr->ip6_prefix, fmr->ip6_prefix_len)) + break; + + /* Check that IPv6 matches IPv4 source to prevent spoofing */ + if (fmr) + ip4ip6_fmr_calc(&expected, ip_hdr(skb), + skb_tail_pointer(skb), + fmr, false, + t->parms.draft03); + + if (!ipv6_addr_equal(&ipv6h->saddr, &expected)) { + rcu_read_unlock(); + goto discard; + } + } - __skb_tunnel_rx(skb, t->dev); + __skb_tunnel_rx(skb, t->dev, t->net); err = dscp_ecn_decapsulate(t, ipv6h, skb); if (unlikely(err)) { @@ -846,6 +1100,8 @@ tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + /* Reset the skb_iif to Tunnels interface index */ + skb->skb_iif = t->dev->ifindex; netif_rx(skb); rcu_read_unlock(); @@ -910,24 +1166,28 @@ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; - struct net *net = dev_net(t->dev); + struct net *net = t->net; - if (p->flags & IP6_TNL_F_CAP_XMIT) { + if ((p->flags & IP6_TNL_F_CAP_XMIT) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { struct net_device *ldev = NULL; rcu_read_lock(); if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); - else if (!ipv6_addr_is_multicast(&p->raddr) && - unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) + else if (!ipv6_addr_is_multicast(raddr) && + unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else @@ -964,8 +1224,8 @@ int encap_limit, __u32 *pmtu) { - struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; @@ -977,7 +1237,29 @@ u8 proto; int err = -1; - if (!(t->parms.flags & + /* NBMA tunnel */ + if (ipv6_addr_any(&t->parms.raddr)) { + struct in6_addr *addr6; + struct neighbour *neigh; + int addr_type; + + if (!skb_dst(skb)) + goto tx_err_link_failure; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; + + addr6 = (struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; + + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } else if (!(t->parms.flags & (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { /* enable the cache only only if the routing decision does * not depend on the current inner header value @@ -986,19 +1268,23 @@ } if (use_cache) - dst = ip6_tnl_dst_check(t); + dst = ip6_tnl_dst_get(t); + + if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) + goto tx_err_link_failure; + if (!dst) { - ndst = ip6_route_output(net, NULL, fl6); + dst = ip6_route_output(net, NULL, fl6); - if (ndst->error) + if (dst->error) goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto tx_err_link_failure; } - dst = ndst; + ndst = dst; } tdev = dst->dev; @@ -1009,7 +1295,7 @@ t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - sizeof (*ipv6h); + mtu = dst_mtu(dst) - sizeof(*ipv6h); if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1024,6 +1310,8 @@ goto tx_err_dst_release; } + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + /* * Okay, now see if we can stuff it in the buffer as-is. */ @@ -1033,7 +1321,8 @@ (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; - if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) + new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) goto tx_err_dst_release; if (skb->sk) @@ -1041,13 +1330,11 @@ consume_skb(skb); skb = new_skb; } - skb_dst_drop(skb); - if (!use_cache) { - skb_dst_set(skb, dst); - ndst = NULL; - } else { - skb_dst_set_noref(skb, dst); - } + + if (use_cache && ndst) + ip6_tnl_dst_set(t, ndst); + skb_dst_set(skb, dst); + skb->transport_header = skb->network_header; proto = fl6->flowi6_proto; @@ -1055,23 +1342,31 @@ init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); } + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel); + ip6_flow_hdr(ipv6h, dsfield, + ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - ip6tunnel_xmit(skb, dev); - if (ndst) - ip6_tnl_dst_store(t, ndst); + + /* Reset the skb_iif to Tunnels interface index */ + skb->skb_iif = dev->ifindex; + ip6tunnel_xmit(NULL, skb, dev); return 0; tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -1084,16 +1379,20 @@ struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; + struct __ip6_tnl_fmr *fmr; - if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t)) + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != IPPROTO_IPIP && tproto != 0) return -1; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1104,6 +1403,19 @@ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; + /* try to find matching FMR */ + for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) { + unsigned mshift = 32 - fmr->ip4_prefix_len; + if (ntohl(fmr->ip4_prefix.s_addr) >> mshift == + ntohl(iph->daddr) >> mshift) + break; + } + + /* change dstaddr according to FMR */ + if (fmr) + ip4ip6_fmr_calc(&fl6.daddr, iph, skb_tail_pointer(skb), fmr, + true, t->parms.draft03); + err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ @@ -1126,10 +1438,12 @@ struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) + tproto = ACCESS_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || + ip6_tnl_addr_conflict(t, ipv6h)) return -1; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); @@ -1145,14 +1459,14 @@ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + fl6.flowlabel |= net_hdr_word(ipv6h) & IPV6_TCLASS_MASK; if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -1224,26 +1538,24 @@ else dev->flags &= ~IFF_POINTOPOINT; - dev->iflink = p->link; - if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(dev_net(dev), + struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); - if (rt == NULL) + if (!rt) return; if (rt->dst.dev) { dev->hard_header_len = rt->dst.dev->hard_header_len + - sizeof (struct ipv6hdr); + sizeof(struct ipv6hdr); - dev->mtu = rt->dst.dev->mtu - sizeof (struct ipv6hdr); + dev->mtu = rt->dst.dev->mtu - sizeof(struct ipv6hdr); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu-=8; + dev->mtu -= 8; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1272,6 +1584,14 @@ t->parms.flowinfo = p->flowinfo; t->parms.link = p->link; t->parms.proto = p->proto; + + while (t->parms.fmrs) { + struct __ip6_tnl_fmr *next = t->parms.fmrs->next; + kfree(t->parms.fmrs); + t->parms.fmrs = next; + } + t->parms.fmrs = p->fmrs; + ip6_tnl_dst_reset(t); ip6_tnl_link_config(t); return 0; @@ -1279,7 +1599,7 @@ static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) { - struct net *net = dev_net(t->dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); int err; @@ -1291,6 +1611,14 @@ return err; } +static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + /* for default tnl0 device allow to change only the proto */ + t->parms.proto = p->proto; + netdev_state_change(t->dev); + return 0; +} + static void ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) { @@ -1302,6 +1630,7 @@ p->flowinfo = u->flowinfo; p->link = u->link; p->proto = u->proto; + p->fmrs = NULL; memcpy(p->name, u->name, sizeof(u->name)); } @@ -1353,26 +1682,26 @@ int err = 0; struct ip6_tnl_parm p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t = NULL; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); + if (IS_ERR(t)) + t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); } - if (t == NULL) - t = netdev_priv(dev); ip6_tnl_parm_to_user(&p, &t->parms); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) { err = -EFAULT; } break; @@ -1382,7 +1711,7 @@ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) break; err = -EINVAL; if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && @@ -1390,25 +1719,28 @@ break; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); - if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (cmd == SIOCCHGTUNNEL) { + if (!IS_ERR(t)) { if (t->dev != dev) { err = -EEXIST; break; } } else t = netdev_priv(dev); - - err = ip6_tnl_update(t, &p1); + if (dev == ip6n->fb_tnl_dev) + err = ip6_tnl0_update(t, &p1); + else + err = ip6_tnl_update(t, &p1); } - if (t) { + if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + } else { + err = PTR_ERR(t); + } break; case SIOCDELTUNNEL: err = -EPERM; @@ -1417,12 +1749,12 @@ if (dev == ip6n->fb_tnl_dev) { err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) break; err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); - if (t == NULL) + if (IS_ERR(t)) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) @@ -1451,13 +1783,28 @@ static int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < IPV6_MIN_MTU) { - return -EINVAL; + struct ip6_tnl *tnl = netdev_priv(dev); + + if (tnl->parms.proto == IPPROTO_IPIP) { + if (new_mtu < 68) + return -EINVAL; + } else { + if (new_mtu < IPV6_MIN_MTU) + return -EINVAL; } + if (new_mtu > 0xFFF8 - dev->hard_header_len) + return -EINVAL; dev->mtu = new_mtu; return 0; } +int ip6_tnl_get_iflink(const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + return t->parms.link; +} +EXPORT_SYMBOL(ip6_tnl_get_iflink); static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, @@ -1465,7 +1812,9 @@ .ndo_start_xmit = ip6_tnl_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_stats = ip6_get_stats, + .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1485,15 +1834,17 @@ dev->destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); - dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr); + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); + dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr); t = netdev_priv(dev); if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - dev->mtu-=8; + dev->mtu -= 8; dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + netif_keep_dst(dev); + /* This perm addr will be used as interface identifier by IPv6 */ + dev->addr_assign_type = NET_ADDR_RANDOM; + eth_random_addr(dev->perm_addr); } @@ -1506,11 +1857,21 @@ ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); + int ret; t->dev = dev; - dev->tstats = alloc_percpu(struct pcpu_tstats); + t->net = dev_net(dev); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + + ret = ip6_tnl_dst_init(t); + if (ret) { + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; + } + return 0; } @@ -1566,6 +1927,15 @@ return 0; } +static const struct nla_policy ip6_tnl_fmr_policy[IFLA_IPTUN_FMR_MAX + 1] = { + [IFLA_IPTUN_FMR_IP6_PREFIX] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_FMR_IP4_PREFIX] = { .len = sizeof(struct in_addr) }, + [IFLA_IPTUN_FMR_IP6_PREFIX_LEN] = { .type = NLA_U8 }, + [IFLA_IPTUN_FMR_IP4_PREFIX_LEN] = { .type = NLA_U8 }, + [IFLA_IPTUN_FMR_EA_LEN] = { .type = NLA_U8 }, + [IFLA_IPTUN_FMR_OFFSET] = { .type = NLA_U8 } +}; + static void ip6_tnl_netlink_parms(struct nlattr *data[], struct __ip6_tnl_parm *parms) { @@ -1578,12 +1948,10 @@ parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL], - sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE], - sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -1599,18 +1967,62 @@ if (data[IFLA_IPTUN_PROTO]) parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + + if (data[IFLA_IPTUN_DRAFT03]) + parms->draft03 = nla_get_u8(data[IFLA_IPTUN_DRAFT03]); + + if (data[IFLA_IPTUN_FMRS]) { + unsigned rem; + struct nlattr *fmr; + nla_for_each_nested(fmr, data[IFLA_IPTUN_FMRS], rem) { + struct nlattr *fmrd[IFLA_IPTUN_FMR_MAX + 1], *c; + struct __ip6_tnl_fmr *nfmr; + + nla_parse_nested(fmrd, IFLA_IPTUN_FMR_MAX, + fmr, ip6_tnl_fmr_policy); + + if (!(nfmr = kzalloc(sizeof(*nfmr), GFP_KERNEL))) + continue; + + nfmr->offset = 6; + + if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX])) + nla_memcpy(&nfmr->ip6_prefix, fmrd[IFLA_IPTUN_FMR_IP6_PREFIX], + sizeof(nfmr->ip6_prefix)); + + if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX])) + nla_memcpy(&nfmr->ip4_prefix, fmrd[IFLA_IPTUN_FMR_IP4_PREFIX], + sizeof(nfmr->ip4_prefix)); + + if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX_LEN])) + nfmr->ip6_prefix_len = nla_get_u8(c); + + if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX_LEN])) + nfmr->ip4_prefix_len = nla_get_u8(c); + + if ((c = fmrd[IFLA_IPTUN_FMR_EA_LEN])) + nfmr->ea_len = nla_get_u8(c); + + if ((c = fmrd[IFLA_IPTUN_FMR_OFFSET])) + nfmr->offset = nla_get_u8(c); + + nfmr->next = parms->fmrs; + parms->fmrs = nfmr; + } + } } static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); - struct ip6_tnl *nt; + struct ip6_tnl *nt, *t; nt = netdev_priv(dev); ip6_tnl_netlink_parms(data, &nt->parms); - if (ip6_tnl_locate(net, &nt->parms, 0)) + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) return -EEXIST; return ip6_tnl_create2(dev); @@ -1619,9 +2031,9 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip6_tnl *t; + struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm p; - struct net *net = dev_net(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); if (dev == ip6n->fb_tnl_dev) @@ -1630,8 +2042,7 @@ ip6_tnl_netlink_parms(data, &p); t = ip6_tnl_locate(net, &p, 0); - - if (t) { + if (!IS_ERR(t)) { if (t->dev != dev) return -EEXIST; } else @@ -1651,6 +2062,12 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) { + const struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_fmr *c; + int fmrs = 0; + for (c = t->parms.fmrs; c; c = c->next) + ++fmrs; + return /* IFLA_IPTUN_LINK */ nla_total_size(4) + @@ -1668,6 +2085,24 @@ nla_total_size(4) + /* IFLA_IPTUN_PROTO */ nla_total_size(1) + + /* IFLA_IPTUN_FMRS */ + nla_total_size(0) + + ( + /* nest */ + nla_total_size(0) + + /* IFLA_IPTUN_FMR_IP6_PREFIX */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_FMR_IP4_PREFIX */ + nla_total_size(sizeof(struct in_addr)) + + /* IFLA_IPTUN_FMR_EA_LEN */ + nla_total_size(1) + + /* IFLA_IPTUN_FMR_IP6_PREFIX_LEN */ + nla_total_size(1) + + /* IFLA_IPTUN_FMR_IP4_PREFIX_LEN */ + nla_total_size(1) + + /* IFLA_IPTUN_FMR_OFFSET */ + nla_total_size(1) + ) * fmrs + 0; } @@ -1677,21 +2112,25 @@ struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) - goto nla_put_failure; + return -EMSGSIZE; + return 0; +} + +struct net *ip6_tnl_get_link_net(const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); -nla_put_failure: - return -EMSGSIZE; + return tunnel->net; } +EXPORT_SYMBOL(ip6_tnl_get_link_net); static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_LINK] = { .type = NLA_U32 }, @@ -1702,6 +2141,7 @@ [IFLA_IPTUN_FLOWINFO] = { .type = NLA_U32 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U32 }, [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, + [IFLA_IPTUN_FMRS] = { .type = NLA_NESTED }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { @@ -1716,6 +2156,7 @@ .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, + .get_link_net = ip6_tnl_get_link_net, }; static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { @@ -1730,22 +2171,30 @@ .priority = 1, }; -static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) +static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) { + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct net_device *dev, *aux; int h; struct ip6_tnl *t; LIST_HEAD(list); + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6_link_ops) + unregister_netdevice_queue(dev, &list); + for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); - while (t != NULL) { - unregister_netdevice_queue(t->dev, &list); + while (t) { + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, &list); t = rtnl_dereference(t->next); } } - t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); unregister_netdevice_many(&list); } @@ -1760,12 +2209,16 @@ err = -ENOMEM; ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0", - ip6_tnl_dev_setup); + NET_NAME_UNKNOWN, ip6_tnl_dev_setup); if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL; err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) @@ -1788,10 +2241,8 @@ static void __net_exit ip6_tnl_exit_net(struct net *net) { - struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - rtnl_lock(); - ip6_tnl_destroy_tunnels(ip6n); + ip6_tnl_destroy_tunnels(net); rtnl_unlock(); }