--- zzzz-none-000/linux-3.10.107/net/ipv4/ip_tunnel.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/ipv4/ip_tunnel.c 2021-02-04 17:41:59.000000000 +0000 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -61,57 +63,66 @@ #include #endif -static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, - __be32 key, __be32 remote) +static unsigned int ip_tunnel_hash(__be32 key, __be32 remote) { return hash_32((__force u32)key ^ (__force u32)remote, IP_TNL_HASH_BITS); } -/* Often modified stats are per cpu, other are shared (netdev->stats) */ -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) +static void __tunnel_dst_set(struct ip_tunnel_dst *idst, + struct dst_entry *dst, __be32 saddr) { - int i; + struct dst_entry *old_dst; - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->multicast = dev->stats.multicast; - - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; + dst_clone(dst); + old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); + dst_release(old_dst); + idst->saddr = saddr; +} - tot->collisions = dev->stats.collisions; +static noinline void tunnel_dst_set(struct ip_tunnel *t, + struct dst_entry *dst, __be32 saddr) +{ + __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr); +} - return tot; +static void tunnel_dst_reset(struct ip_tunnel *t) +{ + tunnel_dst_set(t, NULL, 0); +} + +void ip_tunnel_dst_reset_all(struct ip_tunnel *t) +{ + int i; + + for_each_possible_cpu(i) + __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0); +} +EXPORT_SYMBOL(ip_tunnel_dst_reset_all); + +static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, + u32 cookie, __be32 *saddr) +{ + struct ip_tunnel_dst *idst; + struct dst_entry *dst; + + rcu_read_lock(); + idst = raw_cpu_ptr(t->dst_cache); + dst = rcu_dereference(idst->dst); + if (dst && !atomic_inc_not_zero(&dst->__refcnt)) + dst = NULL; + if (dst) { + if (!dst->obsolete || dst->ops->check(dst, cookie)) { + *saddr = idst->saddr; + } else { + tunnel_dst_reset(t); + dst_release(dst); + dst = NULL; + } + } + rcu_read_unlock(); + return (struct rtable *)dst; } -EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, __be16 flags, __be32 key) @@ -146,7 +157,7 @@ struct ip_tunnel *t, *cand = NULL; struct hlist_head *head; - hash = ip_tunnel_hash(itn, key, remote); + hash = ip_tunnel_hash(key, remote); head = &itn->tunnels[hash]; hlist_for_each_entry_rcu(t, head, hash_node) { @@ -179,7 +190,7 @@ cand = t; } - hash = ip_tunnel_hash(itn, key, 0); + hash = ip_tunnel_hash(key, 0); head = &itn->tunnels[hash]; hlist_for_each_entry_rcu(t, head, hash_node) { @@ -219,10 +230,13 @@ if (cand) return cand; + t = rcu_dereference(itn->collect_md_tun); + if (t) + return t; + if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) return netdev_priv(itn->fb_tunnel_dev); - return NULL; } EXPORT_SYMBOL_GPL(ip_tunnel_lookup); @@ -232,13 +246,17 @@ { unsigned int h; __be32 remote; + __be32 i_key = parms->i_key; if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) remote = parms->iph.daddr; else remote = 0; - h = ip_tunnel_hash(itn, parms->i_key, remote); + if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) + i_key = 0; + + h = ip_tunnel_hash(i_key, remote); return &itn->tunnels[h]; } @@ -246,11 +264,15 @@ { struct hlist_head *head = ip_bucket(itn, &t->parms); + if (t->collect_md) + rcu_assign_pointer(itn->collect_md_tun, t); hlist_add_head_rcu(&t->hash_node, head); } -static void ip_tunnel_del(struct ip_tunnel *t) +static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t) { + if (t->collect_md) + rcu_assign_pointer(itn->collect_md_tun, NULL); hlist_del_init_rcu(&t->hash_node); } @@ -261,6 +283,7 @@ __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; + __be16 flags = parms->i_flags; int link = parms->link; struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); @@ -268,9 +291,9 @@ hlist_for_each_entry_rcu(t, head, hash_node) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - key == t->parms.i_key && link == t->parms.link && - type == t->dev->type) + type == t->dev->type && + ip_tunnel_key_match(&t->parms, flags, key)) break; } return t; @@ -297,7 +320,7 @@ } ASSERT_RTNL(); - dev = alloc_netdev(ops->priv_size, name, ops->setup); + dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup); if (!dev) { err = -ENOMEM; goto failed; @@ -308,6 +331,7 @@ tunnel = netdev_priv(dev); tunnel->parms = *parms; + tunnel->net = net; err = register_netdevice(dev); if (err) @@ -321,11 +345,10 @@ return ERR_PTR(err); } -static inline struct rtable *ip_route_output_tunnel(struct net *net, - struct flowi4 *fl4, - int proto, - __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif) +static inline void init_tunnel_flow(struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = oif; @@ -334,7 +357,6 @@ fl4->flowi4_tos = tos; fl4->flowi4_proto = proto; fl4->fl4_gre_key = key; - return ip_route_output_key(net, fl4); } static int ip_tunnel_bind_dev(struct net_device *dev) @@ -353,14 +375,14 @@ struct flowi4 fl4; struct rtable *rt; - rt = ip_route_output_tunnel(dev_net(dev), &fl4, - tunnel->parms.iph.protocol, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); + init_tunnel_flow(&fl4, iph->protocol, iph->daddr, + iph->saddr, tunnel->parms.o_key, + RT_TOS(iph->tos), tunnel->parms.link); + rt = ip_route_output_key(tunnel->net, &fl4); + if (!IS_ERR(rt)) { tdev = rt->dst.dev; + tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) @@ -368,13 +390,12 @@ } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { hlen = tdev->hard_header_len + tdev->needed_headroom; mtu = tdev->mtu; } - dev->iflink = tunnel->parms.link; dev->needed_headroom = t_hlen + hlen; mtu -= (dev->hard_header_len + t_hlen); @@ -389,14 +410,13 @@ struct ip_tunnel_net *itn, struct ip_tunnel_parm *parms) { - struct ip_tunnel *nt, *fbt; + struct ip_tunnel *nt; struct net_device *dev; BUG_ON(!itn->fb_tunnel_dev); - fbt = netdev_priv(itn->fb_tunnel_dev); dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); if (IS_ERR(dev)) - return NULL; + return ERR_CAST(dev); dev->mtu = ip_tunnel_bind_dev(dev); @@ -406,24 +426,15 @@ } int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, - const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error) + const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, + bool log_ecn_error) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; const struct iphdr *iph = ip_hdr(skb); int err; - secpath_reset(skb); - - skb->protocol = tpi->proto; - - skb->mac_header = skb->network_header; - __pskb_pull(skb, hdr_len); - skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } @@ -446,23 +457,8 @@ tunnel->i_seqno = ntohl(tpi->seq) + 1; } - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - skb->pkt_type = PACKET_HOST; - __skb_tunnel_rx(skb, tunnel->dev); - skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -481,6 +477,18 @@ tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); + + if (tunnel->dev->type == ARPHRD_ETHER) { + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } + + if (tun_dst) + skb_dst_set(skb, (struct dst_entry *)tun_dst); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -490,8 +498,107 @@ } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); +static int ip_encap_hlen(struct ip_tunnel_encap *e) +{ + const struct ip_tunnel_encap_ops *ops; + int hlen = -EINVAL; + + if (e->type == TUNNEL_ENCAP_NONE) + return 0; + + if (e->type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(iptun_encaps[e->type]); + if (likely(ops && ops->encap_hlen)) + hlen = ops->encap_hlen(e); + rcu_read_unlock(); + + return hlen; +} + +const struct ip_tunnel_encap_ops __rcu * + iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly; + +int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops, + unsigned int num) +{ + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + return !cmpxchg((const struct ip_tunnel_encap_ops **) + &iptun_encaps[num], + NULL, ops) ? 0 : -1; +} +EXPORT_SYMBOL(ip_tunnel_encap_add_ops); + +int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops, + unsigned int num) +{ + int ret; + + if (num >= MAX_IPTUN_ENCAP_OPS) + return -ERANGE; + + ret = (cmpxchg((const struct ip_tunnel_encap_ops **) + &iptun_encaps[num], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(ip_tunnel_encap_del_ops); + +int ip_tunnel_encap_setup(struct ip_tunnel *t, + struct ip_tunnel_encap *ipencap) +{ + int hlen; + + memset(&t->encap, 0, sizeof(t->encap)); + + hlen = ip_encap_hlen(ipencap); + if (hlen < 0) + return hlen; + + t->encap.type = ipencap->type; + t->encap.sport = ipencap->sport; + t->encap.dport = ipencap->dport; + t->encap.flags = ipencap->flags; + + t->encap_hlen = hlen; + t->hlen = t->encap_hlen + t->tun_hlen; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup); + +int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t, + u8 *protocol, struct flowi4 *fl4) +{ + const struct ip_tunnel_encap_ops *ops; + int ret = -EINVAL; + + if (t->encap.type == TUNNEL_ENCAP_NONE) + return 0; + + if (t->encap.type >= MAX_IPTUN_ENCAP_OPS) + return -EINVAL; + + rcu_read_lock(); + ops = rcu_dereference(iptun_encaps[t->encap.type]); + if (likely(ops && ops->build_header)) + ret = ops->build_header(skb, &t->encap, protocol, fl4); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(ip_tunnel_encap); + static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, - struct rtable *rt, __be16 df) + struct rtable *rt, __be16 df, + const struct iphdr *inner_iph) { struct ip_tunnel *tunnel = netdev_priv(dev); int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; @@ -508,7 +615,9 @@ if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && - (df & htons(IP_DF)) && mtu < pkt_size) { + (inner_iph->frag_off & htons(IP_DF)) && + mtu < pkt_size) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; } @@ -538,27 +647,29 @@ } void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const struct iphdr *tnl_params) + const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; - struct iphdr *iph; struct flowi4 fl4; u8 tos, ttl; __be16 df; struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; + int err; + bool connected; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + connected = (tunnel->parms.iph.daddr != 0); + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ - if (skb_dst(skb) == NULL) { + if (!skb_dst(skb)) { dev->stats.tx_fifo_errors++; goto tx_error; } @@ -576,7 +687,7 @@ neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); - if (neigh == NULL) + if (!neigh) goto tx_error; addr6 = (const struct in6_addr *)&neigh->primary_key; @@ -600,37 +711,48 @@ #endif else goto tx_error; + + connected = false; } tos = tnl_params->tos; if (tos & 0x1) { tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { tos = inner_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) + connected = false; + } else if (skb->protocol == htons(ETH_P_IPV6)) { tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + connected = false; + } } - rt = ip_route_output_tunnel(dev_net(dev), &fl4, - tunnel->parms.iph.protocol, - dst, tnl_params->saddr, - tunnel->parms.o_key, - RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); + + if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; + + rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL; + + if (!rt) { + rt = ip_route_output_key(tunnel->net, &fl4); + + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (connected) + tunnel_dst_set(tunnel, &rt->dst, fl4.saddr); } - tdev = rt->dst.dev; - if (tdev == dev) { + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; goto tx_error; } - - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) { ip_rt_put(rt); goto tx_error; } @@ -640,12 +762,12 @@ tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst_link_failure(skb); } else tunnel->err_count = 0; } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); ttl = tnl_params->ttl; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -662,38 +784,22 @@ if (skb->protocol == htons(ETH_P_IP)) df |= (inner_iph->frag_off&htons(IP_DF)); - max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) - + rt->dst.header_len; + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return; } - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* Push down and install the IP header. */ - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - - iph = ip_hdr(skb); - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); - - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = tnl_params->protocol; - iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1); + err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, + tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - iptunnel_xmit(skb, dev); return; #if IS_ENABLED(CONFIG_IPV6) @@ -702,7 +808,7 @@ #endif tx_error: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); } EXPORT_SYMBOL_GPL(ip_tunnel_xmit); @@ -712,7 +818,7 @@ struct ip_tunnel_parm *p, bool set_mtu) { - ip_tunnel_del(t); + ip_tunnel_del(itn, t); t->parms.iph.saddr = p->iph.saddr; t->parms.iph.daddr = p->iph.daddr; t->parms.i_key = p->i_key; @@ -735,25 +841,25 @@ if (set_mtu) dev->mtu = mtu; } + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); } int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ip_tunnel *tunnel = netdev_priv(dev); - struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = t->net; + struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id); BUG_ON(!itn->fb_tunnel_dev); switch (cmd) { case SIOCGETTUNNEL: - t = NULL; - if (dev == itn->fb_tunnel_dev) + if (dev == itn->fb_tunnel_dev) { t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) - t = netdev_priv(dev); + if (!t) + t = netdev_priv(dev); + } memcpy(p, &t->parms, sizeof(*p)); break; @@ -764,18 +870,27 @@ goto done; if (p->iph.ttl) p->iph.frag_off |= htons(IP_DF); - if (!(p->i_flags&TUNNEL_KEY)) - p->i_key = 0; - if (!(p->o_flags&TUNNEL_KEY)) - p->o_key = 0; + if (!(p->i_flags & VTI_ISVTI)) { + if (!(p->i_flags & TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags & TUNNEL_KEY)) + p->o_key = 0; + } t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (!t && (cmd == SIOCADDTUNNEL)) - t = ip_tunnel_create(net, itn, p); + if (cmd == SIOCADDTUNNEL) { + if (!t) { + t = ip_tunnel_create(net, itn, p); + err = PTR_ERR_OR_ZERO(t); + break; + } + err = -EEXIST; + break; + } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -800,8 +915,9 @@ if (t) { err = 0; ip_tunnel_update(itn, t, dev, p, true); - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + } else { + err = -ENOENT; + } break; case SIOCDELTUNNEL: @@ -812,7 +928,7 @@ if (dev == itn->fb_tunnel_dev) { err = -ENOENT; t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(itn->fb_tunnel_dev)) @@ -832,17 +948,31 @@ } EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); -int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); + int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen; - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + if (new_mtu < 68) return -EINVAL; + + if (new_mtu > max_mtu) { + if (strict) + return -EINVAL; + + new_mtu = max_mtu; + } + dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + return __ip_tunnel_change_mtu(dev, new_mtu, true); +} EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); static void ip_tunnel_dev_free(struct net_device *dev) @@ -850,80 +980,109 @@ struct ip_tunnel *tunnel = netdev_priv(dev); gro_cells_destroy(&tunnel->gro_cells); + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) { - struct net *net = dev_net(dev); struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_net *itn; - itn = net_generic(net, tunnel->ip_tnl_net_id); + itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id); if (itn->fb_tunnel_dev != dev) { - ip_tunnel_del(netdev_priv(dev)); + ip_tunnel_del(itn, netdev_priv(dev)); unregister_netdevice_queue(dev, head); } } EXPORT_SYMBOL_GPL(ip_tunnel_dellink); +struct net *ip_tunnel_get_link_net(const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + return tunnel->net; +} +EXPORT_SYMBOL(ip_tunnel_get_link_net); + +int ip_tunnel_get_iflink(const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + return tunnel->parms.link; +} +EXPORT_SYMBOL(ip_tunnel_get_iflink); + int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname) { struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); struct ip_tunnel_parm parms; + unsigned int i; - itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); - if (!itn->tunnels) - return -ENOMEM; + for (i = 0; i < IP_TNL_HASH_SIZE; i++) + INIT_HLIST_HEAD(&itn->tunnels[i]); if (!ops) { itn->fb_tunnel_dev = NULL; return 0; } + memset(&parms, 0, sizeof(parms)); if (devname) strlcpy(parms.name, devname, IFNAMSIZ); rtnl_lock(); itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); - rtnl_unlock(); - if (IS_ERR(itn->fb_tunnel_dev)) { - kfree(itn->tunnels); - return PTR_ERR(itn->fb_tunnel_dev); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + if (!IS_ERR(itn->fb_tunnel_dev)) { + itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); + ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); } + rtnl_unlock(); - return 0; + return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev); } EXPORT_SYMBOL_GPL(ip_tunnel_init_net); -static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, + struct rtnl_link_ops *ops) { + struct net *net = dev_net(itn->fb_tunnel_dev); + struct net_device *dev, *aux; int h; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == ops) + unregister_netdevice_queue(dev, head); + for (h = 0; h < IP_TNL_HASH_SIZE; h++) { struct ip_tunnel *t; struct hlist_node *n; struct hlist_head *thead = &itn->tunnels[h]; hlist_for_each_entry_safe(t, n, thead, hash_node) - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, head); } - if (itn->fb_tunnel_dev) - unregister_netdevice_queue(itn->fb_tunnel_dev, head); } -void ip_tunnel_delete_net(struct ip_tunnel_net *itn) +void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) { LIST_HEAD(list); rtnl_lock(); - ip_tunnel_destroy(itn, &list); + ip_tunnel_destroy(itn, &list, ops); unregister_netdevice_many(&list); rtnl_unlock(); - kfree(itn->tunnels); } EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); @@ -939,9 +1098,15 @@ nt = netdev_priv(dev); itn = net_generic(net, nt->ip_tnl_net_id); - if (ip_tunnel_find(itn, p, dev->type)) - return -EEXIST; + if (nt->collect_md) { + if (rtnl_dereference(itn->collect_md_tun)) + return -EEXIST; + } else { + if (ip_tunnel_find(itn, p, dev->type)) + return -EEXIST; + } + nt->net = net; nt->parms = *p; err = register_netdevice(dev); if (err) @@ -955,7 +1120,6 @@ dev->mtu = mtu; ip_tunnel_add(itn, nt); - out: return err; } @@ -964,23 +1128,21 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p) { - struct ip_tunnel *t, *nt; - struct net *net = dev_net(dev); + struct ip_tunnel *t; struct ip_tunnel *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); if (dev == itn->fb_tunnel_dev) return -EINVAL; - nt = netdev_priv(dev); - t = ip_tunnel_find(itn, p, dev->type); if (t) { if (t->dev != dev) return -EEXIST; } else { - t = nt; + t = tunnel; if (dev->type != ARPHRD_ETHER) { unsigned int nflags = 0; @@ -1008,35 +1170,49 @@ int err; dev->destructor = ip_tunnel_dev_free; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + err = gro_cells_init(&tunnel->gro_cells, dev); if (err) { + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); return err; } tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; iph->ihl = 5; + if (tunnel->collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); void ip_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); struct ip_tunnel *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct ip_tunnel_net *itn; itn = net_generic(net, tunnel->ip_tnl_net_id); /* fb_tunnel_dev will be unregisted in net-exit call. */ if (itn->fb_tunnel_dev != dev) - ip_tunnel_del(netdev_priv(dev)); + ip_tunnel_del(itn, netdev_priv(dev)); + + ip_tunnel_dst_reset_all(tunnel); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit);