--- zzzz-none-000/linux-3.10.107/net/ipv4/icmp.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/ipv4/icmp.c 2021-02-04 17:41:59.000000000 +0000 @@ -96,6 +96,7 @@ #include #include #include +#include /* * Build xmit assembly blocks @@ -190,7 +191,7 @@ */ struct icmp_control { - void (*handler)(struct sk_buff *skb); + bool (*handler)(struct sk_buff *skb); short error; /* This ICMP is classed as an error message */ }; @@ -205,7 +206,7 @@ */ static struct sock *icmp_sk(struct net *net) { - return net->ipv4.icmp_sk[smp_processor_id()]; + return *this_cpu_ptr(net->ipv4.icmp_sk); } static inline struct sock *icmp_xmit_lock(struct net *net) @@ -231,12 +232,62 @@ spin_unlock_bh(&sk->sk_lock.slock); } +int sysctl_icmp_msgs_per_sec __read_mostly = 1000; +int sysctl_icmp_msgs_burst __read_mostly = 50; + +static struct { + spinlock_t lock; + u32 credit; + u32 stamp; +} icmp_global = { + .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock), +}; + +/** + * icmp_global_allow - Are we allowed to send one more ICMP message ? + * + * Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec. + * Returns false if we reached the limit and can not send another packet. + * Note: called with BH disabled + */ +bool icmp_global_allow(void) +{ + u32 credit, delta, incr = 0, now = (u32)jiffies; + bool rc = false; + + /* Check if token bucket is empty and cannot be refilled + * without taking the spinlock. + */ + if (!icmp_global.credit) { + delta = min_t(u32, now - icmp_global.stamp, HZ); + if (delta < HZ / 50) + return false; + } + + spin_lock(&icmp_global.lock); + delta = min_t(u32, now - icmp_global.stamp, HZ); + if (delta >= HZ / 50) { + incr = sysctl_icmp_msgs_per_sec * delta / HZ ; + if (incr) + icmp_global.stamp = now; + } + credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); + if (credit) { + credit--; + rc = true; + } + icmp_global.credit = credit; + spin_unlock(&icmp_global.lock); + return rc; +} +EXPORT_SYMBOL(icmp_global_allow); + /* * Send an ICMP frame. */ -static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, - struct flowi4 *fl4, int type, int code) +static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, + struct flowi4 *fl4, int type, int code) { struct dst_entry *dst = &rt->dst; bool rc = true; @@ -253,8 +304,15 @@ goto out; /* Limit if icmp type is enabled in ratemask. */ - if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { - struct inet_peer *peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, 1); + if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) + goto out; + + rc = false; + if (icmp_global_allow()) { + int vif = l3mdev_master_ifindex(dst->dev); + struct inet_peer *peer; + + peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); if (peer) @@ -337,22 +395,27 @@ struct sock *sk; struct inet_sock *inet; __be32 daddr, saddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; sk = icmp_xmit_lock(net); - if (sk == NULL) + if (!sk) return; inet = inet_sk(sk); icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; + if (icmp_param->replyopts.opt.opt.optlen) { ipc.opt = &icmp_param->replyopts.opt; if (ipc.opt->opt.srr) @@ -361,8 +424,10 @@ memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = saddr; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; + fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) @@ -375,11 +440,27 @@ icmp_xmit_unlock(sk); } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* Source and destination is swapped. See ip_multipath_icmp_hash */ +static int icmp_multipath_hash_skb(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + + return fib_multipath_hash(iph->daddr, iph->saddr); +} + +#else + +#define icmp_multipath_hash_skb(skb) (-1) + +#endif + static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -391,12 +472,16 @@ fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; + fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); + security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key(net, fl4); + rt = __ip_route_output_key_hash(net, fl4, + icmp_multipath_hash_skb(skb_in)); if (IS_ERR(rt)) return rt; @@ -417,7 +502,8 @@ if (err) goto relookup_failed; - if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) { + if (inet_addr_type_dev_table(net, skb_in->dev, + fl4_dec.saddr) == RTN_LOCAL) { rt2 = __ip_route_output_key(net, &fl4_dec); if (IS_ERR(rt2)) err = PTR_ERR(rt2); @@ -433,6 +519,7 @@ } /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ + skb_dst_set(skb_in, NULL); err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, RT_TOS(tos), rt2->dst.dev); @@ -482,12 +569,13 @@ { struct iphdr *iph; int room; - struct icmp_bxm icmp_param; + struct icmp_bxm *icmp_param; struct rtable *rt = skb_rtable(skb_in); struct ipcm_cookie ipc; struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -503,7 +591,8 @@ iph = ip_hdr(skb_in); if ((u8 *)iph < skb_in->head || - (skb_in->network_header + sizeof(*iph)) > skb_in->tail) + (skb_network_header(skb_in) + sizeof(*iph)) > + skb_tail_pointer(skb_in)) goto out; /* @@ -544,7 +633,7 @@ skb_in->data, sizeof(_inner_type), &_inner_type); - if (itp == NULL) + if (!itp) goto out; /* @@ -557,10 +646,14 @@ } } - sk = icmp_xmit_lock(net); - if (sk == NULL) + icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC); + if (!icmp_param) return; + sk = icmp_xmit_lock(net); + if (!sk) + goto out_free; + /* * Construct source address and options. */ @@ -584,8 +677,9 @@ tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -593,19 +687,22 @@ * Prepare data for ICMP header. */ - icmp_param.data.icmph.type = type; - icmp_param.data.icmph.code = code; - icmp_param.data.icmph.un.gateway = info; - icmp_param.data.icmph.checksum = 0; - icmp_param.skb = skb_in; - icmp_param.offset = skb_network_offset(skb_in); + icmp_param->data.icmph.type = type; + icmp_param->data.icmph.code = code; + icmp_param->data.icmph.un.gateway = info; + icmp_param->data.icmph.checksum = 0; + icmp_param->skb = skb_in; + icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts.opt; + ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, - type, code, &icmp_param); + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, + type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -617,19 +714,21 @@ room = dst_mtu(&rt->dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; + room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); - icmp_param.data_len = skb_in->len - icmp_param.offset; - if (icmp_param.data_len > room) - icmp_param.data_len = room; - icmp_param.head_len = sizeof(struct icmphdr); + icmp_param->data_len = skb_in->len - icmp_param->offset; + if (icmp_param->data_len > room) + icmp_param->data_len = room; + icmp_param->head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); +out_free: + kfree(icmp_param); out:; } EXPORT_SYMBOL(icmp_send); @@ -644,23 +743,34 @@ /* Checkin full IP header plus 8 bytes of protocol to * avoid additional coding at protocol handlers. */ - if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) + if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { + ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); return; + } raw_icmp_error(skb, protocol, info); - rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, info); +} + +static bool icmp_tag_validation(int proto) +{ + bool ok; + + rcu_read_lock(); + ok = rcu_dereference(inet_protos[proto])->icmp_strict_tag_validation; rcu_read_unlock(); + return ok; } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * ICMP_PARAMETERPROB. */ -static void icmp_unreach(struct sk_buff *skb) +static bool icmp_unreach(struct sk_buff *skb) { const struct iphdr *iph; struct icmphdr *icmph; @@ -692,16 +802,28 @@ case ICMP_PORT_UNREACH: break; case ICMP_FRAG_NEEDED: - if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), - &iph->daddr); - } else { + /* for documentation of the ip_no_pmtu_disc + * values please see + * Documentation/networking/ip-sysctl.txt + */ + switch (net->ipv4.sysctl_ip_no_pmtu_disc) { + default: + net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n", + &iph->daddr); + break; + case 2: + goto out; + case 3: + if (!icmp_tag_validation(iph->protocol)) + goto out; + /* fall through */ + case 0: info = ntohs(icmph->un.frag.mtu); } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"), - &iph->daddr); + net_dbg_ratelimited("%pI4: Source Route Failed\n", + &iph->daddr); break; default: break; @@ -730,7 +852,7 @@ */ if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && - inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { + inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", &ip_hdr(skb)->saddr, icmph->type, icmph->code, @@ -741,10 +863,10 @@ icmp_socket_deliver(skb, info); out: - return; + return true; out_err: ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); - goto out; + return false; } @@ -752,17 +874,20 @@ * Handle ICMP_REDIRECT. */ -static void icmp_redirect(struct sk_buff *skb) +static bool icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS); - return; + return false; } - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return; + if (!pskb_may_pull(skb, sizeof(struct iphdr))) { + /* there aught to be a stat */ + return false; + } icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway); + return true; } /* @@ -777,7 +902,7 @@ * See also WRT handling of options once they are done and working. */ -static void icmp_echo(struct sk_buff *skb) +static bool icmp_echo(struct sk_buff *skb) { struct net *net; @@ -793,6 +918,8 @@ icmp_param.head_len = sizeof(struct icmphdr); icmp_reply(&icmp_param, skb); } + /* should there be an ICMP stat for ignored echos? */ + return true; } /* @@ -802,7 +929,7 @@ * MUST be accurate to a few minutes. * MUST be updated at least at 15Hz. */ -static void icmp_timestamp(struct sk_buff *skb) +static bool icmp_timestamp(struct sk_buff *skb) { struct timespec tv; struct icmp_bxm icmp_param; @@ -829,15 +956,17 @@ icmp_param.data_len = 0; icmp_param.head_len = sizeof(struct icmphdr) + 12; icmp_reply(&icmp_param, skb); -out: - return; + return true; + out_err: ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); - goto out; + return false; } -static void icmp_discard(struct sk_buff *skb) +static bool icmp_discard(struct sk_buff *skb) { + /* pretend it was a success */ + return true; } /* @@ -848,6 +977,7 @@ struct icmphdr *icmph; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); + bool success; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -871,16 +1001,8 @@ ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS); - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_fold(skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - if (__skb_checksum_complete(skb)) - goto csum_error; - } + if (skb_checksum_simple_validate(skb)) + goto csum_error; if (!pskb_pull(skb, sizeof(*icmph))) goto error; @@ -922,7 +1044,12 @@ } } - icmp_pointers[icmph->type].handler(skb); + success = icmp_pointers[icmph->type].handler(skb); + + if (success) { + consume_skb(skb); + return 0; + } drop: kfree_skb(skb); @@ -937,7 +1064,8 @@ void icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int offset = iph->ihl<<2; + struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); @@ -947,7 +1075,7 @@ * triggered by ICMP_ECHOREPLY which sent from kernel. */ if (icmph->type != ICMP_ECHOREPLY) { - ping_err(skb, info); + ping_err(skb, offset, info); return; } @@ -1036,8 +1164,8 @@ int i; for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); net->ipv4.icmp_sk = NULL; } @@ -1045,9 +1173,8 @@ { int i, err; - net->ipv4.icmp_sk = - kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv4.icmp_sk == NULL) + net->ipv4.icmp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { @@ -1058,7 +1185,7 @@ if (err < 0) goto fail; - net->ipv4.icmp_sk[i] = sk; + *per_cpu_ptr(net->ipv4.icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff/skb_shared_info struct overhead. @@ -1099,8 +1226,8 @@ fail: for_each_possible_cpu(i) - inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]); - kfree(net->ipv4.icmp_sk); + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i)); + free_percpu(net->ipv4.icmp_sk); return err; }