--- zzzz-none-000/linux-3.10.107/include/linux/skbuff.h 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/include/linux/skbuff.h 2021-02-04 17:41:59.000000000 +0000 @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -28,20 +30,129 @@ #include #include #include -#include #include #include #include -#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_AVM_PA +/* Cannot include avm_pa.h */ +struct avm_pa_pkt_info; +/* transitional define to allow older avm_pa tags, to be removed */ +#define SKBUFF_H_NO_AVM_PA_H 1 +#endif + +#ifdef CONFIG_AVM_GENERIC_CONNTRACK +#include +#endif + +/* A. Checksumming of received packets by device. + * + * CHECKSUM_NONE: + * + * Device failed to checksum this packet e.g. due to lack of capabilities. + * The packet contains full (though not verified) checksum in packet but + * not in skb->csum. Thus, skb->csum is undefined in this case. + * + * CHECKSUM_UNNECESSARY: + * + * The hardware you're dealing with doesn't calculate the full checksum + * (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums + * for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY + * if their checksums are okay. skb->csum is still undefined in this case + * though. It is a bad option, but, unfortunately, nowadays most vendors do + * this. Apparently with the secret goal to sell you new devices, when you + * will add new protocol to your host, f.e. IPv6 8) + * + * CHECKSUM_UNNECESSARY is applicable to following protocols: + * TCP: IPv6 and IPv4. + * UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a + * zero UDP checksum for either IPv4 or IPv6, the networking stack + * may perform further validation in this case. + * GRE: only if the checksum is present in the header. + * SCTP: indicates the CRC in SCTP header has been validated. + * + * skb->csum_level indicates the number of consecutive checksums found in + * the packet minus one that have been verified as CHECKSUM_UNNECESSARY. + * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet + * and a device is able to verify the checksums for UDP (possibly zero), + * GRE (checksum flag is set), and TCP-- skb->csum_level would be set to + * two. If the device were only able to verify the UDP checksum and not + * GRE, either because it doesn't support GRE checksum of because GRE + * checksum is bad, skb->csum_level would be set to zero (TCP checksum is + * not considered in this case). + * + * CHECKSUM_COMPLETE: + * + * This is the most generic way. The device supplied checksum of the _whole_ + * packet as seen by netif_rx() and fills out in skb->csum. Meaning, the + * hardware doesn't need to parse L3/L4 headers to implement this. + * + * Note: Even if device supports only some protocols, but is able to produce + * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. + * + * CHECKSUM_PARTIAL: + * + * A checksum is set up to be offloaded to a device as described in the + * output description for CHECKSUM_PARTIAL. This may occur on a packet + * received directly from another Linux OS, e.g., a virtualized Linux kernel + * on the same host, or it may be set in the input path in GRO or remote + * checksum offload. For the purposes of checksum verification, the checksum + * referred to by skb->csum_start + skb->csum_offset and any preceding + * checksums in the packet are considered verified. Any checksums in the + * packet that are after the checksum being offloaded are not considered to + * be verified. + * + * B. Checksumming on output. + * + * CHECKSUM_NONE: + * + * The skb was already checksummed by the protocol, or a checksum is not + * required. + * + * CHECKSUM_PARTIAL: + * + * The device is required to checksum the packet as seen by hard_start_xmit() + * from skb->csum_start up to the end, and to record/write the checksum at + * offset skb->csum_start + skb->csum_offset. + * + * The device must show its capabilities in dev->features, set up at device + * setup time, e.g. netdev_features.h: + * + * NETIF_F_HW_CSUM - It's a clever device, it's able to checksum everything. + * NETIF_F_IP_CSUM - Device is dumb, it's able to checksum only TCP/UDP over + * IPv4. Sigh. Vendors like this way for an unknown reason. + * Though, see comment above about CHECKSUM_UNNECESSARY. 8) + * NETIF_F_IPV6_CSUM - About as dumb as the last one but does IPv6 instead. + * NETIF_F_... - Well, you get the picture. + * + * CHECKSUM_UNNECESSARY: + * + * Normally, the device will do per protocol specific checksumming. Protocol + * implementations that do not want the NIC to perform the checksum + * calculation should use this flag in their outgoing skbs. + * + * NETIF_F_FCOE_CRC - This indicates that the device can do FCoE FC CRC + * offload. Correspondingly, the FCoE protocol driver + * stack should use CHECKSUM_UNNECESSARY. + * + * Any questions? No questions, good. --ANK + */ /* Don't change this without changing skb_csum_unnecessary! */ -#define CHECKSUM_NONE 0 -#define CHECKSUM_UNNECESSARY 1 -#define CHECKSUM_COMPLETE 2 -#define CHECKSUM_PARTIAL 3 +#define CHECKSUM_NONE 0 +#define CHECKSUM_UNNECESSARY 1 +#define CHECKSUM_COMPLETE 2 +#define CHECKSUM_PARTIAL 3 -#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ - ~(SMP_CACHE_BYTES - 1)) +/* Maximum value in skb->csum_level */ +#define SKB_MAX_CSUM_LEVEL 3 + +#define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define SKB_MAX_ORDER(X, ORDER) \ @@ -54,61 +165,11 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -/* A. Checksumming of received packets by device. - * - * NONE: device failed to checksum this packet. - * skb->csum is undefined. - * - * UNNECESSARY: device parsed packet and wouldbe verified checksum. - * skb->csum is undefined. - * It is bad option, but, unfortunately, many of vendors do this. - * Apparently with secret goal to sell you new device, when you - * will add new protocol to your host. F.e. IPv6. 8) - * - * COMPLETE: the most generic way. Device supplied checksum of _all_ - * the packet as seen by netif_rx in skb->csum. - * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use COMPLETE, - * not UNNECESSARY. - * - * PARTIAL: identical to the case for output below. This may occur - * on a packet received directly from another Linux OS, e.g., - * a virtualised Linux kernel on the same host. The packet can - * be treated in the same way as UNNECESSARY except that on - * output (i.e., forwarding) the checksum must be filled in - * by the OS or the hardware. - * - * B. Checksumming on output. - * - * NONE: skb is checksummed by protocol or csum is not required. - * - * PARTIAL: device is required to csum packet as seen by hard_start_xmit - * from skb->csum_start to the end and to record the checksum - * at skb->csum_start + skb->csum_offset. - * - * Device must show its capabilities in dev->features, set - * at device setup time. - * NETIF_F_HW_CSUM - it is clever device, it is able to checksum - * everything. - * NETIF_F_IP_CSUM - device is dumb. It is able to csum only - * TCP/UDP over IPv4. Sigh. Vendors like this - * way by an unknown reason. Though, see comment above - * about CHECKSUM_UNNECESSARY. 8) - * NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead. - * - * UNNECESSARY: device will do per protocol specific csum. Protocol drivers - * that do not want net to perform the checksum calculation should use - * this flag in their outgoing skbs. - * NETIF_F_FCOE_CRC this indicates the device can do FCoE FC CRC - * offload. Correspondingly, the FCoE protocol driver - * stack should use CHECKSUM_UNNECESSARY. - * - * Any questions? No questions, good. --ANK - */ - struct net_device; struct scatterlist; struct pipe_inode_info; +struct iov_iter; +struct napi_struct; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct nf_conntrack { @@ -116,13 +177,33 @@ }; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { atomic_t use; - unsigned int mask; + enum { + BRNF_PROTO_UNCHANGED, + BRNF_PROTO_8021Q, + BRNF_PROTO_PPPOE + } orig_proto:8; + u8 pkt_otherhost:1; + u8 in_prerouting:1; + u8 bridged_dnat:1; + __u16 frag_max_size; struct net_device *physindev; + + /* always valid & non-NULL from FORWARD on, for physdev match */ struct net_device *physoutdev; - unsigned long data[32 / sizeof(unsigned long)]; + union { + /* prerouting: detect dnat in orig/reply direction */ + __be32 ipv4_daddr; + struct in6_addr ipv6_daddr; + + /* after prerouting + nat detected: store original source + * mac since neigh resolution overwrites it, only used while + * skb is out in neigh layer. + */ + char neigh_header[8]; + }; }; #endif @@ -149,6 +230,7 @@ #else #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) #endif +extern int sysctl_max_skb_frags; typedef struct skb_frag_struct skb_frag_t; @@ -191,18 +273,9 @@ * struct skb_shared_hwtstamps - hardware time stamps * @hwtstamp: hardware time stamp transformed into duration * since arbitrary point in time - * @syststamp: hwtstamp transformed to system time base * * Software time stamps generated by ktime_get_real() are stored in - * skb->tstamp. The relation between the different kinds of time - * stamps is as follows: - * - * syststamp and tstamp can be compared against each other in - * arbitrary combinations. The accuracy of a - * syststamp/tstamp/"syststamp from other device" comparison is - * limited by the accuracy of the transformation into system time - * base. This depends on the device driver and its underlying - * hardware. + * skb->tstamp. * * hwtstamps can only be compared against other hwtstamps from * the same device. @@ -212,7 +285,6 @@ */ struct skb_shared_hwtstamps { ktime_t hwtstamp; - ktime_t syststamp; }; /* Definitions for tx_flags in struct skb_shared_info */ @@ -220,7 +292,7 @@ /* generate hardware time stamp */ SKBTX_HW_TSTAMP = 1 << 0, - /* generate software time stamp */ + /* generate software time stamp when queueing packet to NIC */ SKBTX_SW_TSTAMP = 1 << 1, /* device driver is going to provide hardware time stamp */ @@ -238,8 +310,19 @@ * all frags to avoid possible bad checksum */ SKBTX_SHARED_FRAG = 1 << 5, + + /* generate software time stamp when entering packet scheduling */ + SKBTX_SCHED_TSTAMP = 1 << 6, + + /* generate software timestamp on peer data acknowledgment */ + SKBTX_ACK_TSTAMP = 1 << 7, }; +#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ + SKBTX_SCHED_TSTAMP | \ + SKBTX_ACK_TSTAMP) +#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP) + /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. @@ -266,6 +349,7 @@ unsigned short gso_type; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + u32 tskey; __be32 ip6_frag_id; /* @@ -297,9 +381,9 @@ enum { - SKB_FCLONE_UNAVAILABLE, - SKB_FCLONE_ORIG, - SKB_FCLONE_CLONE, + SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */ + SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */ + SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */ }; enum { @@ -318,7 +402,17 @@ SKB_GSO_GRE = 1 << 6, - SKB_GSO_UDP_TUNNEL = 1 << 7, + SKB_GSO_GRE_CSUM = 1 << 7, + + SKB_GSO_IPIP = 1 << 8, + + SKB_GSO_SIT = 1 << 9, + + SKB_GSO_UDP_TUNNEL = 1 << 10, + + SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, + + SKB_GSO_TUNNEL_REMCSUM = 1 << 12, }; #if BITS_PER_LONG > 32 @@ -331,11 +425,72 @@ typedef unsigned char *sk_buff_data_t; #endif +/** + * struct skb_mstamp - multi resolution time stamps + * @stamp_us: timestamp in us resolution + * @stamp_jiffies: timestamp in jiffies + */ +struct skb_mstamp { + union { + u64 v64; + struct { + u32 stamp_us; + u32 stamp_jiffies; + }; + }; +}; + +/** + * skb_mstamp_get - get current timestamp + * @cl: place to store timestamps + */ +static inline void skb_mstamp_get(struct skb_mstamp *cl) +{ + u64 val = local_clock(); + + do_div(val, NSEC_PER_USEC); + cl->stamp_us = (u32)val; + cl->stamp_jiffies = (u32)jiffies; +} + +/** + * skb_mstamp_delta - compute the difference in usec between two skb_mstamp + * @t1: pointer to newest sample + * @t0: pointer to oldest sample + */ +static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, + const struct skb_mstamp *t0) +{ + s32 delta_us = t1->stamp_us - t0->stamp_us; + u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies; + + /* If delta_us is negative, this might be because interval is too big, + * or local_clock() drift is too big : fallback using jiffies. + */ + if (delta_us <= 0 || + delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ))) + + delta_us = jiffies_to_usecs(delta_jiffies); + + return delta_us; +} + +static inline bool skb_mstamp_after(const struct skb_mstamp *t1, + const struct skb_mstamp *t0) +{ + s32 diff = t1->stamp_jiffies - t0->stamp_jiffies; + + if (!diff) + diff = t1->stamp_us - t0->stamp_us; + return diff > 0; +} + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list - * @tstamp: Time we arrived + * @tstamp: Time we arrived/left + * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -349,7 +504,7 @@ * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored * @priority: Packet queueing priority - * @local_df: allow local fragmentation + * @ignore_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header @@ -367,22 +522,24 @@ * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @rxhash: the packet hash computed on receive + * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices + * @xmit_more: More SKBs are pending for this queue * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed - * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport + * @l4_hash: indicate hash is a canonical 4-tuple hash over transport * ports. + * @sw_hash: indicates hash was computed in software stack * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS - * @dma_cookie: a cookie to one of several possible DMA operations - * done by skb DMA functions + * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark - * @dropcount: total number of sk_receive_queue overflows * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information + * @inner_protocol: Protocol (encapsulation) * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) @@ -398,14 +555,24 @@ */ struct sk_buff { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; - - ktime_t tstamp; - + union { + struct { + /* These two members must be first. */ + struct sk_buff *next; + struct sk_buff *prev; + + union { + ktime_t tstamp; + struct skb_mstamp skb_mstamp; + }; + }; + struct rb_node rbnode; /* used in netem & tcp stack */ + }; struct sock *sk; struct net_device *dev; +#if IS_ENABLED(CONFIG_AVM_NET_SKB_INPUT_DEV) + struct net_device *input_dev; +#endif /* * This is the control buffer. It is free to use for every @@ -416,49 +583,89 @@ char cb[48] __aligned(8); unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_XFRM struct sec_path *sp; #endif +#ifdef CONFIG_AVM_ENHANCED + /* uniq_id: Attention! This field does not exist in the 2.6.32 kernel + * anymore. We just use it to pass port and queue information between + * avm_cpmac and multid/dsld in the upper eight bits. */ + unsigned long uniq_id; +#define AVM_HAVE_SKB_UNIQ_ID +#endif +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct nf_conntrack *nfct; +#endif +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) + struct nf_bridge_info *nf_bridge; +#endif unsigned int len, data_len; __u16 mac_len, hdr_len; - union { - __wsum csum; - struct { - __u16 csum_start; - __u16 csum_offset; - }; - }; - __u32 priority; + + /* Following fields are _not_ copied in __copy_skb_header() + * Note that queue_mapping is here mostly to fill a hole. + */ kmemcheck_bitfield_begin(flags1); - __u8 local_df:1, - cloned:1, - ip_summed:2, + __u16 queue_mapping; + __u8 cloned:1, nohdr:1, - nfctinfo:3; - __u8 pkt_type:3, fclone:2, - ipvs_property:1, peeked:1, - nf_trace:1; + head_frag:1, + xmit_more:1; + /* one bit hole */ kmemcheck_bitfield_end(flags1); - __be16 protocol; - void (*destructor)(struct sk_buff *skb); -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) - struct nf_conntrack *nfct; -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - struct nf_bridge_info *nf_bridge; + /* fields enclosed in headers_start/headers_end are copied + * using a single memcpy() in __copy_skb_header() + */ + /* private: */ + __u32 headers_start[0]; + /* public: */ + +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 #endif +#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) - int skb_iif; + __u8 __pkt_type_offset[0]; + __u8 pkt_type:3; + __u8 pfmemalloc:1; + __u8 ignore_df:1; + __u8 nfctinfo:3; - __u32 rxhash; + __u8 nf_trace:1; + __u8 ip_summed:2; + __u8 ooo_okay:1; + __u8 l4_hash:1; + __u8 sw_hash:1; + __u8 wifi_acked_valid:1; + __u8 wifi_acked:1; - __be16 vlan_proto; - __u16 vlan_tci; + __u8 no_fcs:1; + /* Indicates the inner headers are valid in the skbuff. */ + __u8 encapsulation:1; + __u8 encap_hdr_csum:1; + __u8 csum_valid:1; + __u8 csum_complete_sw:1; + __u8 csum_level:2; + __u8 csum_bad:1; + +#ifdef CONFIG_IPV6_NDISC_NODETYPE + __u8 ndisc_nodetype:2; +#endif + __u8 ipvs_property:1; + __u8 inner_protocol_type:1; + __u8 remcsum_offload:1; + __u8 gro_skip:1; + __u8 fast_forwarded:1; + /* 1 or 3 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -467,45 +674,64 @@ #endif #endif - __u16 queue_mapping; - kmemcheck_bitfield_begin(flags2); -#ifdef CONFIG_IPV6_NDISC_NODETYPE - __u8 ndisc_nodetype:2; -#endif - __u8 pfmemalloc:1; - __u8 ooo_okay:1; - __u8 l4_rxhash:1; - __u8 wifi_acked_valid:1; - __u8 wifi_acked:1; - __u8 no_fcs:1; - __u8 head_frag:1; - /* Encapsulation protocol and NIC drivers should use - * this flag to indicate to each other if the skb contains - * encapsulated packet or not and maybe use the inner packet - * headers if needed - */ - __u8 encapsulation:1; - /* 7/9 bit hole (depending on ndisc_nodetype presence) */ - kmemcheck_bitfield_end(flags2); - -#ifdef CONFIG_NET_DMA - dma_cookie_t dma_cookie; + union { + __wsum csum; + struct { + __u16 csum_start; + __u16 csum_offset; + }; + }; + __u32 priority; + int skb_iif; + __u32 hash; + __be16 vlan_proto; + __u16 vlan_tci; +#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) + union { + unsigned int napi_id; + unsigned int sender_cpu; + }; #endif + union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; +#endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 offload_fwd_mark; #endif + }; + union { __u32 mark; - __u32 dropcount; __u32 reserved_tailroom; }; - sk_buff_data_t inner_transport_header; - sk_buff_data_t inner_network_header; - sk_buff_data_t inner_mac_header; - sk_buff_data_t transport_header; - sk_buff_data_t network_header; - sk_buff_data_t mac_header; + union { + __be16 inner_protocol; + __u8 inner_ipproto; + }; + + __u16 inner_transport_header; + __u16 inner_network_header; + __u16 inner_mac_header; + + __be16 protocol; + __u16 transport_header; + __u16 network_header; + __u16 mac_header; + +#ifdef CONFIG_AVM_PA + __u8 avm_pa[256]; +#define AVM_PKT_INFO(skb) ((struct avm_pa_pkt_info *) (skb)->avm_pa) +#endif +#ifdef CONFIG_AVM_GENERIC_CONNTRACK + struct generic_ct *generic_ct; +#endif + + /* private: */ + __u32 headers_end[0]; + /* public: */ + /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; @@ -513,6 +739,12 @@ *data; unsigned int truesize; atomic_t users; +#ifdef CONFIG_DEBUG_OBJECTS_SKBUFF +#define DEBUG_OBJECTS_SKBUFF_STACKSIZE 20 + void *free_addr[DEBUG_OBJECTS_SKBUFF_STACKSIZE]; + void *alloc_addr[DEBUG_OBJECTS_SKBUFF_STACKSIZE]; + u32 sum; +#endif }; #ifdef __KERNEL__ @@ -524,6 +756,7 @@ #define SKB_ALLOC_FCLONE 0x01 #define SKB_ALLOC_RX 0x02 +#define SKB_ALLOC_NAPI 0x04 /* Returns true if the skb was allocated from PFMEMALLOC reserves */ static inline bool skb_pfmemalloc(const struct sk_buff *skb) @@ -568,9 +801,6 @@ skb->_skb_refdst = (unsigned long)dst; } -extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, - bool force); - /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference * @skb: buffer @@ -583,24 +813,8 @@ */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { - __skb_dst_set_noref(skb, dst, false); -} - -/** - * skb_dst_set_noref_force - sets skb dst, without taking reference - * @skb: buffer - * @dst: dst entry - * - * Sets skb dst, assuming a reference was not taken on dst. - * No reference is taken and no dst_release will be called. While for - * cached dsts deferred reclaim is a basic feature, for entries that are - * not cached it is caller's job to guarantee that last dst_release for - * provided dst happens when nobody uses it, eg. after a RCU grace period. - */ -static inline void skb_dst_set_noref_force(struct sk_buff *skb, - struct dst_entry *dst) -{ - __skb_dst_set_noref(skb, dst, true); + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } /** @@ -617,67 +831,108 @@ return (struct rtable *)skb_dst(skb); } -extern void kfree_skb(struct sk_buff *skb); -extern void kfree_skb_list(struct sk_buff *segs); -extern void skb_tx_error(struct sk_buff *skb); -extern void consume_skb(struct sk_buff *skb); -extern void __kfree_skb(struct sk_buff *skb); +void kfree_skb(struct sk_buff *skb); +void kfree_skb_list(struct sk_buff *segs); +void skb_tx_error(struct sk_buff *skb); +void consume_skb(struct sk_buff *skb); +void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; +extern void kfree_skbmem(struct sk_buff *skb); +extern void skb_release_data(struct sk_buff *skb); -extern void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); -extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, - bool *fragstolen, int *delta_truesize); - -extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int flags, int node); -extern struct sk_buff *build_skb(void *data, unsigned int frag_size); +void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); +bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, + bool *fragstolen, int *delta_truesize); + +struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, + int node); +struct sk_buff *__build_skb(void *data, unsigned int frag_size); +struct sk_buff *build_skb(void *data, unsigned int frag_size); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { return __alloc_skb(size, priority, 0, NUMA_NO_NODE); } +struct sk_buff *alloc_skb_with_frags(unsigned long header_len, + unsigned long data_len, + int max_page_order, + int *errcode, + gfp_t gfp_mask); + +/* Layout of fast clones : [skb1][skb2][fclone_ref] */ +struct sk_buff_fclones { + struct sk_buff skb1; + + struct sk_buff skb2; + + atomic_t fclone_ref; +}; + +/** + * skb_fclone_busy - check if fclone is busy + * @skb: buffer + * + * Returns true is skb is a fast clone, and its clone is not freed. + * Some drivers call skb_orphan() in their ndo_start_xmit(), + * so we also check that this didnt happen. + */ +static inline bool skb_fclone_busy(const struct sock *sk, + const struct sk_buff *skb) +{ + const struct sk_buff_fclones *fclones; + + fclones = container_of(skb, struct sk_buff_fclones, skb1); + + return skb->fclone == SKB_FCLONE_ORIG && + atomic_read(&fclones->fclone_ref) > 1 && + fclones->skb2.sk == sk; +} + static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -extern struct sk_buff *__alloc_skb_head(gfp_t priority, int node); +struct sk_buff *__alloc_skb_head(gfp_t priority, int node); static inline struct sk_buff *alloc_skb_head(gfp_t priority) { return __alloc_skb_head(priority, -1); } -extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); -extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); -extern struct sk_buff *skb_clone(struct sk_buff *skb, - gfp_t priority); -extern struct sk_buff *skb_copy(const struct sk_buff *skb, - gfp_t priority); -extern struct sk_buff *__pskb_copy(struct sk_buff *skb, - int headroom, gfp_t gfp_mask); - -extern int pskb_expand_head(struct sk_buff *skb, - int nhead, int ntail, - gfp_t gfp_mask); -extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, - unsigned int headroom); -extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, - gfp_t priority); -extern int skb_to_sgvec(struct sk_buff *skb, - struct scatterlist *sg, int offset, - int len); -extern int skb_cow_data(struct sk_buff *skb, int tailbits, - struct sk_buff **trailer); -extern int skb_pad(struct sk_buff *skb, int pad); +struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); +struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); +struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, + gfp_t gfp_mask, bool fclone); +static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, + gfp_t gfp_mask) +{ + return __pskb_copy_fclone(skb, headroom, gfp_mask, false); +} + +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); +struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, + unsigned int headroom); +struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, + int newtailroom, gfp_t priority); +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len); +int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, + int len); +int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); +int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) consume_skb(a) -extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, - int getfrag(void *from, char *to, int offset, - int len,int odd, struct sk_buff *skb), - void *from, int length); +int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length); + +int skb_append_pagefrags(struct sk_buff *skb, struct page *page, + int offset, size_t size); struct skb_seq_state { __u32 lower_offset; @@ -689,24 +944,188 @@ __u8 *frag_data; }; -extern void skb_prepare_seq_read(struct sk_buff *skb, - unsigned int from, unsigned int to, - struct skb_seq_state *st); -extern unsigned int skb_seq_read(unsigned int consumed, const u8 **data, - struct skb_seq_state *st); -extern void skb_abort_seq_read(struct skb_seq_state *st); - -extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, - unsigned int to, struct ts_config *config, - struct ts_state *state); +void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, + unsigned int to, struct skb_seq_state *st); +unsigned int skb_seq_read(unsigned int consumed, const u8 **data, + struct skb_seq_state *st); +void skb_abort_seq_read(struct skb_seq_state *st); + +unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, + unsigned int to, struct ts_config *config); + +/* + * Packet hash types specify the type of hash in skb_set_hash. + * + * Hash types refer to the protocol layer addresses which are used to + * construct a packet's hash. The hashes are used to differentiate or identify + * flows of the protocol layer for the hash type. Hash types are either + * layer-2 (L2), layer-3 (L3), or layer-4 (L4). + * + * Properties of hashes: + * + * 1) Two packets in different flows have different hash values + * 2) Two packets in the same flow should have the same hash value + * + * A hash at a higher layer is considered to be more specific. A driver should + * set the most specific hash possible. + * + * A driver cannot indicate a more specific hash than the layer at which a hash + * was computed. For instance an L3 hash cannot be set as an L4 hash. + * + * A driver may indicate a hash level which is less specific than the + * actual layer the hash was computed on. For instance, a hash computed + * at L4 may be considered an L3 hash. This should only be done if the + * driver can't unambiguously determine that the HW computed the hash at + * the higher layer. Note that the "should" in the second property above + * permits this. + */ +enum pkt_hash_types { + PKT_HASH_TYPE_NONE, /* Undefined type */ + PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */ + PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */ + PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */ +}; + +static inline void skb_clear_hash(struct sk_buff *skb) +{ + skb->hash = 0; + skb->sw_hash = 0; + skb->l4_hash = 0; +} + +static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) +{ + if (!skb->l4_hash) + skb_clear_hash(skb); +} + +static inline void +__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4) +{ + skb->l4_hash = is_l4; + skb->sw_hash = is_sw; + skb->hash = hash; +} + +static inline void +skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) +{ + /* Used by drivers to set hash from HW */ + __skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4); +} + +static inline void +__skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) +{ + __skb_set_hash(skb, hash, true, is_l4); +} + +void __skb_get_hash(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(struct sk_buff *skb); +u32 skb_get_poff(const struct sk_buff *skb); +u32 __skb_get_poff(const struct sk_buff *skb, void *data, + const struct flow_keys *keys, int hlen); +__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + void *data, int hlen_proto); + +static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, + int thoff, u8 ip_proto) +{ + return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); +} + +void skb_flow_dissector_init(struct flow_dissector *flow_dissector, + const struct flow_dissector_key *key, + unsigned int key_count); + +bool __skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, + void *data, __be16 proto, int nhoff, int hlen, + unsigned int flags); + +static inline bool skb_flow_dissect(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, unsigned int flags) +{ + return __skb_flow_dissect(skb, flow_dissector, target_container, + NULL, 0, 0, 0, flags); +} + +static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, + struct flow_keys *flow, + unsigned int flags) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(skb, &flow_keys_dissector, flow, + NULL, 0, 0, 0, flags); +} + +static inline bool skb_flow_dissect_flow_keys_buf(struct flow_keys *flow, + void *data, __be16 proto, + int nhoff, int hlen, + unsigned int flags) +{ + memset(flow, 0, sizeof(*flow)); + return __skb_flow_dissect(NULL, &flow_keys_buf_dissector, flow, + data, proto, nhoff, hlen, flags); +} + +static inline __u32 skb_get_hash(struct sk_buff *skb) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash(skb); + + return skb->hash; +} + +__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6); + +static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) +{ + if (!skb->l4_hash && !skb->sw_hash) { + struct flow_keys keys; + __u32 hash = __get_hash_from_flowi6(fl6, &keys); + + __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); + } + + return skb->hash; +} + +__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl); + +static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) +{ + if (!skb->l4_hash && !skb->sw_hash) { + struct flow_keys keys; + __u32 hash = __get_hash_from_flowi4(fl4, &keys); + + __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); + } + + return skb->hash; +} + +__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); + +static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) +{ + return skb->hash; +} -extern void __skb_get_rxhash(struct sk_buff *skb); -static inline __u32 skb_get_rxhash(struct sk_buff *skb) +static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) { - if (!skb->l4_rxhash) - __skb_get_rxhash(skb); + to->hash = from->hash; + to->sw_hash = from->sw_hash; + to->l4_hash = from->l4_hash; +}; - return skb->rxhash; +static inline void skb_sender_cpu_clear(struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + skb->sender_cpu = 0; +#endif } #ifdef NET_SKBUFF_DATA_USES_OFFSET @@ -747,7 +1166,7 @@ */ static inline int skb_queue_empty(const struct sk_buff_head *list) { - return list->next == (struct sk_buff *)list; + return list->next == (const struct sk_buff *) list; } /** @@ -760,7 +1179,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->next == (struct sk_buff *)list; + return skb->next == (const struct sk_buff *) list; } /** @@ -773,7 +1192,7 @@ static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { - return skb->prev == (struct sk_buff *)list; + return skb->prev == (const struct sk_buff *) list; } /** @@ -846,7 +1265,7 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); @@ -880,6 +1299,7 @@ * Drop a reference to the header part of the buffer. This is done * by acquiring a payload reference. You must not read from the header * part of skb->data after this. + * Note : Check if you can use __skb_header_release() instead. */ static inline void skb_header_release(struct sk_buff *skb) { @@ -889,6 +1309,20 @@ } /** + * __skb_header_release - release reference to header + * @skb: buffer to operate on + * + * Variant of skb_header_release() assuming skb is private to caller. + * We can avoid one atomic operation. + */ +static inline void __skb_header_release(struct sk_buff *skb) +{ + skb->nohdr = 1; + atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT)); +} + + +/** * skb_shared - is the buffer shared * @skb: buffer to check * @@ -915,7 +1349,7 @@ */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, pri); @@ -951,10 +1385,15 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ + + /* Free our shared copy */ + if (likely(nskb)) + consume_skb(skb); + else + kfree_skb(skb); skb = nskb; } return skb; @@ -1078,7 +1517,8 @@ * The "__skb_xxxx()" functions are the non-atomic ones that * can only be called with interrupts disabled. */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, + struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) @@ -1184,8 +1624,8 @@ __skb_insert(newsk, prev, prev->next, list); } -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, - struct sk_buff_head *list); +void skb_append(struct sk_buff *old, struct sk_buff *newsk, + struct sk_buff_head *list); static inline void __skb_queue_before(struct sk_buff_head *list, struct sk_buff *next, @@ -1204,7 +1644,7 @@ * * A buffer cannot be placed on two lists at the same time. */ -extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { @@ -1221,7 +1661,7 @@ * * A buffer cannot be placed on two lists at the same time. */ -extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { @@ -1232,7 +1672,7 @@ * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ -extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; @@ -1253,7 +1693,7 @@ * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ -extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); +struct sk_buff *skb_dequeue(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); @@ -1270,7 +1710,7 @@ * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ -extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); @@ -1299,11 +1739,6 @@ return len + skb_headlen(skb); } -static inline bool skb_has_frags(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->nr_frags; -} - /** * __skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised @@ -1323,20 +1758,16 @@ skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; /* - * Propagate page->pfmemalloc to the skb if we can. The problem is - * that not all callers have unique ownership of the page. If - * pfmemalloc is set, we check the mapping as a mapping implies - * page->index is set (index and pfmemalloc share space). - * If it's a valid mapping, we cannot use page->pfmemalloc but we - * do not lose pfmemalloc information as the pages would not be - * allocated using __GFP_MEMALLOC. + * Propagate page pfmemalloc to the skb if we can. The problem is + * that not all callers have unique ownership of the page but rely + * on page_is_pfmemalloc doing the right thing(tm). */ frag->page.p = page; frag->page_offset = off; skb_frag_size_set(frag, size); page = compound_head(page); - if (page->pfmemalloc && !page->mapping) + if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } @@ -1349,7 +1780,7 @@ * @size: the length of the data * * As per __skb_fill_page_desc() -- initialises the @i'th fragment of - * @skb to point to &size bytes at offset @off within @page. In + * @skb to point to @size bytes at offset @off within @page. In * addition updates @skb such that @i is the last fragment. * * Does not take any additional reference on the fragment. @@ -1361,8 +1792,11 @@ skb_shinfo(skb)->nr_frags = i + 1; } -extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, - int off, int size, unsigned int truesize); +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size, unsigned int truesize); + +void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, + unsigned int truesize); #define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) #define SKB_FRAG_ASSERT(skb) BUG_ON(skb_has_frag_list(skb)) @@ -1384,6 +1818,7 @@ skb_reset_tail_pointer(skb); skb->tail += offset; } + #else /* NET_SKBUFF_DATA_USES_OFFSET */ static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) { @@ -1405,7 +1840,8 @@ /* * Add data to an sk_buff */ -extern unsigned char *skb_put(struct sk_buff *skb, unsigned int len); +unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); +unsigned char *skb_put(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) { unsigned char *tmp = skb_tail_pointer(skb); @@ -1415,7 +1851,53 @@ return tmp; } -extern unsigned char *skb_push(struct sk_buff *skb, unsigned int len); +static inline void *__skb_put_zero(struct sk_buff *skb, unsigned int len) +{ + void *tmp = __skb_put(skb, len); + + memset(tmp, 0, len); + return tmp; +} + +static inline void *__skb_put_data(struct sk_buff *skb, const void *data, + unsigned int len) +{ + void *tmp = __skb_put(skb, len); + + memcpy(tmp, data, len); + return tmp; +} + +static inline void __skb_put_u8(struct sk_buff *skb, u8 val) +{ + *(u8 *)__skb_put(skb, 1) = val; +} + +static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len) +{ + void *tmp = skb_put(skb, len); + + memset(tmp, 0, len); + + return tmp; +} + +static inline void *skb_put_data(struct sk_buff *skb, const void *data, + unsigned int len) +{ + void *tmp = skb_put(skb, len); + + memcpy(tmp, data, len); + + return tmp; +} + +static inline void skb_put_u8(struct sk_buff *skb, u8 val) +{ + *(u8 *)skb_put(skb, 1) = val; +} + +unsigned char *skb_push(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; @@ -1423,7 +1905,7 @@ return skb->data; } -extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); +unsigned char *skb_pull(struct sk_buff *skb, unsigned int len); static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) { skb->len -= len; @@ -1436,7 +1918,7 @@ return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); } -extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); +unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) { @@ -1512,6 +1994,47 @@ skb->tail += len; } +/** + * skb_tailroom_reserve - adjust reserved_tailroom + * @skb: buffer to alter + * @mtu: maximum amount of headlen permitted + * @needed_tailroom: minimum amount of reserved_tailroom + * + * Set reserved_tailroom so that headlen can be as large as possible but + * not larger than mtu and tailroom cannot be smaller than + * needed_tailroom. + * The required headroom should already have been reserved before using + * this function. + */ +static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu, + unsigned int needed_tailroom) +{ + SKB_LINEAR_ASSERT(skb); + if (mtu < skb_tailroom(skb) - needed_tailroom) + /* use at most mtu */ + skb->reserved_tailroom = skb_tailroom(skb) - mtu; + else + /* use up to all available space */ + skb->reserved_tailroom = needed_tailroom; +} + +#define ENCAP_TYPE_ETHER 0 +#define ENCAP_TYPE_IPPROTO 1 + +static inline void skb_set_inner_protocol(struct sk_buff *skb, + __be16 protocol) +{ + skb->inner_protocol = protocol; + skb->inner_protocol_type = ENCAP_TYPE_ETHER; +} + +static inline void skb_set_inner_ipproto(struct sk_buff *skb, + __u8 ipproto) +{ + skb->inner_ipproto = ipproto; + skb->inner_protocol_type = ENCAP_TYPE_IPPROTO; +} + static inline void skb_reset_inner_headers(struct sk_buff *skb) { skb->inner_mac_header = skb->mac_header; @@ -1524,7 +2047,6 @@ skb->mac_len = skb->network_header - skb->mac_header; } -#ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_inner_transport_header(const struct sk_buff *skb) { @@ -1578,7 +2100,7 @@ } static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { - return skb->transport_header != ~0U; + return skb->transport_header != (typeof(skb->transport_header))~0U; } static inline unsigned char *skb_transport_header(const struct sk_buff *skb) @@ -1621,7 +2143,7 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb) { - return skb->mac_header != ~0U; + return skb->mac_header != (typeof(skb->mac_header))~0U; } static inline void skb_reset_mac_header(struct sk_buff *skb) @@ -1635,112 +2157,6 @@ skb->mac_header += offset; } -#else /* NET_SKBUFF_DATA_USES_OFFSET */ -static inline unsigned char *skb_inner_transport_header(const struct sk_buff - *skb) -{ - return skb->inner_transport_header; -} - -static inline void skb_reset_inner_transport_header(struct sk_buff *skb) -{ - skb->inner_transport_header = skb->data; -} - -static inline void skb_set_inner_transport_header(struct sk_buff *skb, - const int offset) -{ - skb->inner_transport_header = skb->data + offset; -} - -static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) -{ - return skb->inner_network_header; -} - -static inline void skb_reset_inner_network_header(struct sk_buff *skb) -{ - skb->inner_network_header = skb->data; -} - -static inline void skb_set_inner_network_header(struct sk_buff *skb, - const int offset) -{ - skb->inner_network_header = skb->data + offset; -} - -static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) -{ - return skb->inner_mac_header; -} - -static inline void skb_reset_inner_mac_header(struct sk_buff *skb) -{ - skb->inner_mac_header = skb->data; -} - -static inline void skb_set_inner_mac_header(struct sk_buff *skb, - const int offset) -{ - skb->inner_mac_header = skb->data + offset; -} -static inline bool skb_transport_header_was_set(const struct sk_buff *skb) -{ - return skb->transport_header != NULL; -} - -static inline unsigned char *skb_transport_header(const struct sk_buff *skb) -{ - return skb->transport_header; -} - -static inline void skb_reset_transport_header(struct sk_buff *skb) -{ - skb->transport_header = skb->data; -} - -static inline void skb_set_transport_header(struct sk_buff *skb, - const int offset) -{ - skb->transport_header = skb->data + offset; -} - -static inline unsigned char *skb_network_header(const struct sk_buff *skb) -{ - return skb->network_header; -} - -static inline void skb_reset_network_header(struct sk_buff *skb) -{ - skb->network_header = skb->data; -} - -static inline void skb_set_network_header(struct sk_buff *skb, const int offset) -{ - skb->network_header = skb->data + offset; -} - -static inline unsigned char *skb_mac_header(const struct sk_buff *skb) -{ - return skb->mac_header; -} - -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != NULL; -} - -static inline void skb_reset_mac_header(struct sk_buff *skb) -{ - skb->mac_header = skb->data; -} - -static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) -{ - skb->mac_header = skb->data + offset; -} -#endif /* NET_SKBUFF_DATA_USES_OFFSET */ - static inline void skb_pop_mac_header(struct sk_buff *skb) { skb->mac_header = skb->network_header; @@ -1753,8 +2169,8 @@ if (skb_transport_header_was_set(skb)) return; - else if (skb_flow_dissect(skb, &keys)) - skb_set_transport_header(skb, keys.thoff); + else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) + skb_set_transport_header(skb, keys.control.thoff); else skb_set_transport_header(skb, offset_hint); } @@ -1849,10 +2265,10 @@ * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD -#define NET_SKB_PAD max(32, L1_CACHE_BYTES) +#define NET_SKB_PAD max(64, L1_CACHE_BYTES) #endif -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len); +int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { @@ -1864,7 +2280,7 @@ skb_set_tail_pointer(skb, len); } -extern void skb_trim(struct sk_buff *skb, unsigned int len); +void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { @@ -1879,6 +2295,10 @@ return (len < skb->len) ? __pskb_trim(skb, len) : 0; } +extern struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length, gfp_t gfp); + + /** * pskb_trim_unique - remove end from a paged unique (not cloned) buffer * @skb: buffer to alter @@ -1894,6 +2314,24 @@ BUG_ON(err); } +/* + * Caller wants to reduce memory needs before queueing skb + * The (expensive) copy should not be be done in fast path. + */ +static inline struct sk_buff *skb_reduce_truesize(struct sk_buff *skb) +{ + if (skb->truesize > 2 * SKB_TRUESIZE(skb->len)) { + struct sk_buff *nskb; + nskb = skb_copy_expand(skb, skb_headroom(skb), 0, + GFP_ATOMIC | __GFP_NOWARN); + if (nskb) { + __kfree_skb(skb); + skb = nskb; + } + } + return skb; +} + /** * skb_orphan - orphan a buffer * @skb: buffer to orphan @@ -1904,10 +2342,13 @@ */ static inline void skb_orphan(struct sk_buff *skb) { - if (skb->destructor) + if (skb->destructor) { skb->destructor(skb); - skb->destructor = NULL; - skb->sk = NULL; + skb->destructor = NULL; + skb->sk = NULL; + } else { + BUG_ON(skb->sk); + } } /** @@ -1934,7 +2375,7 @@ * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ -extern void skb_queue_purge(struct sk_buff_head *list); +void skb_queue_purge(struct sk_buff_head *list); static inline void __skb_queue_purge(struct sk_buff_head *list) { struct sk_buff *skb; @@ -1942,15 +2383,10 @@ kfree_skb(skb); } -#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768) -#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER) -#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE - -extern void *netdev_alloc_frag(unsigned int fragsz); - -extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, - unsigned int length, - gfp_t gfp_mask); +void *netdev_alloc_frag(unsigned int fragsz); + +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, + gfp_t gfp_mask); /** * netdev_alloc_skb - allocate an skbuff for rx on a specific device @@ -1985,63 +2421,72 @@ } -static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, - unsigned int length, gfp_t gfp) +static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) { - struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); + return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); +} - if (NET_IP_ALIGN && skb) - skb_reserve(skb, NET_IP_ALIGN); - return skb; +static inline void skb_free_frag(void *addr) +{ + __free_page_frag(addr); } -static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, - unsigned int length) +void *napi_alloc_frag(unsigned int fragsz); +struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, + unsigned int length, gfp_t gfp_mask); +static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, + unsigned int length) { - return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); + return __napi_alloc_skb(napi, length, GFP_ATOMIC); } -/* - * __skb_alloc_page - allocate pages for ps-rx on a skb and preserve pfmemalloc data - * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX - * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used - * @order: size of the allocation +/** + * __dev_alloc_pages - allocate page for network Rx + * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx + * @order: size of the allocation * - * Allocate a new page. + * Allocate a new page. * - * %NULL is returned if there is no free memory. + * %NULL is returned if there is no free memory. */ -static inline struct page *__skb_alloc_pages(gfp_t gfp_mask, - struct sk_buff *skb, - unsigned int order) +static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, + unsigned int order) { - struct page *page; - - gfp_mask |= __GFP_COLD; - - if (!(gfp_mask & __GFP_NOMEMALLOC)) - gfp_mask |= __GFP_MEMALLOC; + /* This piece of code contains several assumptions. + * 1. This is for device Rx, therefor a cold page is preferred. + * 2. The expectation is the user wants a compound page. + * 3. If requesting a order 0 page it will not be compound + * due to the check to see if order has a value in prep_new_page + * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to + * code in gfp_to_alloc_flags that should be enforcing this. + */ + gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC; - page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); - if (skb && page && page->pfmemalloc) - skb->pfmemalloc = true; + return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); +} - return page; +static inline struct page *dev_alloc_pages(unsigned int order) +{ + return __dev_alloc_pages(GFP_ATOMIC, order); } /** - * __skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data - * @gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX - * @skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used + * __dev_alloc_page - allocate a page for network Rx + * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx * - * Allocate a new page. + * Allocate a new page. * - * %NULL is returned if there is no free memory. + * %NULL is returned if there is no free memory. */ -static inline struct page *__skb_alloc_page(gfp_t gfp_mask, - struct sk_buff *skb) +static inline struct page *__dev_alloc_page(gfp_t gfp_mask) { - return __skb_alloc_pages(gfp_mask, skb, 0); + return __dev_alloc_pages(gfp_mask, 0); +} + +static inline struct page *dev_alloc_page(void) +{ + return __dev_alloc_page(GFP_ATOMIC); } /** @@ -2052,12 +2497,12 @@ static inline void skb_propagate_pfmemalloc(struct page *page, struct sk_buff *skb) { - if (page && page->pfmemalloc) + if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } /** - * skb_frag_page - retrieve the page refered to by a paged fragment + * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment * * Returns the &struct page associated with @frag. @@ -2167,6 +2612,8 @@ __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page); } +bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); + /** * skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to @@ -2193,6 +2640,14 @@ return __pskb_copy(skb, skb_headroom(skb), gfp_mask); } + +static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb, + gfp_t gfp_mask) +{ + return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true); +} + + /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check @@ -2207,6 +2662,13 @@ skb_headroom(skb) + len <= skb->hdr_len; } +static inline int skb_try_make_writable(struct sk_buff *skb, + unsigned int write_len) +{ + return skb_cloned(skb) && !skb_clone_writable(skb, write_len) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} + static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, int cloned) { @@ -2263,7 +2725,6 @@ * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ - static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; @@ -2272,20 +2733,42 @@ return skb_pad(skb, len - size); } +/** + * skb_put_padto - increase size and pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. + */ +static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + + if (unlikely(size < len)) { + len -= size; + if (skb_pad(skb, len)) + return -ENOMEM; + __skb_put(skb, len); + } + return 0; +} + static inline int skb_add_data(struct sk_buff *skb, - char __user *from, int copy) + struct iov_iter *from, int copy) { const int off = skb->len; if (skb->ip_summed == CHECKSUM_NONE) { - int err = 0; - __wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy), - copy, 0, &err); - if (!err) { + __wsum csum = 0; + if (csum_and_copy_from_iter(skb_put(skb, copy), copy, + &csum, from) == copy) { skb->csum = csum_block_add(skb->csum, csum, off); return 0; } - } else if (!copy_from_user(skb_put(skb, copy), from, copy)) + } else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy) return 0; __skb_trim(skb, off); @@ -2370,6 +2853,42 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static inline void skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + /* For performing the reverse operation to skb_postpull_rcsum(), + * we can instead of ... + * + * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); + * + * ... just use this equivalent version here to save a few + * instructions. Feeding csum of 0 in csum_partial() and later + * on adding skb->csum is equivalent to feed skb->csum in the + * first place. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(start, len, skb->csum); +} + +/** + * skb_push_rcsum - push skb and update receive checksum + * @skb: buffer to update + * @len: length of data pulled + * + * This function performs an skb_push on the packet and updates + * the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_push unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. + */ +static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, + unsigned int len) +{ + skb_push(skb, len); + skb_postpush_rcsum(skb, skb->data, len); + return skb->data; +} + /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim @@ -2432,81 +2951,116 @@ skb_shinfo(skb)->frag_list = NULL; } -static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) -{ - frag->next = skb_shinfo(skb)->frag_list; - skb_shinfo(skb)->frag_list = frag; -} - #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) -extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *off, int *err); -extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, - int noblock, int *err); -extern unsigned int datagram_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); -extern int skb_copy_datagram_iovec(const struct sk_buff *from, - int offset, struct iovec *to, - int size); -extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, - int hlen, - struct iovec *iov); -extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, - int offset, - const struct iovec *from, - int from_offset, - int len); -extern int skb_copy_datagram_const_iovec(const struct sk_buff *from, - int offset, - const struct iovec *to, - int to_offset, - int size); -extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -extern void skb_free_datagram_locked(struct sock *sk, - struct sk_buff *skb); -extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, - unsigned int flags); -extern __wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum); -extern int skb_copy_bits(const struct sk_buff *skb, int offset, - void *to, int len); -extern int skb_store_bits(struct sk_buff *skb, int offset, - const void *from, int len); -extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, - int offset, u8 *to, int len, - __wsum csum); -extern int skb_splice_bits(struct sk_buff *skb, - unsigned int offset, - struct pipe_inode_info *pipe, - unsigned int len, - unsigned int flags); -extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); -extern void skb_split(struct sk_buff *skb, - struct sk_buff *skb1, const u32 len); -extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, - int shiftlen); - -extern struct sk_buff *skb_segment(struct sk_buff *skb, - netdev_features_t features); - +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + int *peeked, int *off, int *err); +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, + int *err); +unsigned int datagram_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); +int skb_copy_datagram_iter(const struct sk_buff *from, int offset, + struct iov_iter *to, int size); +static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, + struct msghdr *msg, int size) +{ + return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size); +} +int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, + struct msghdr *msg); +int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); +void skb_free_datagram(struct sock *sk, struct sk_buff *skb); +void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb); +int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); +int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); +__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, + int len, __wsum csum); +ssize_t skb_socket_splice(struct sock *sk, + struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd); +int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, + struct pipe_inode_info *pipe, unsigned int len, + unsigned int flags, + ssize_t (*splice_cb)(struct sock *, + struct pipe_inode_info *, + struct splice_pipe_desc *)); +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); +unsigned int skb_zerocopy_headlen(const struct sk_buff *from); +int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, + int len, int hlen); +void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); +void skb_scrub_packet(struct sk_buff *skb, bool xnet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb); +struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); +struct sk_buff *skb_vlan_untag(struct sk_buff *skb); +int skb_ensure_writable(struct sk_buff *skb, int write_len); +int skb_vlan_pop(struct sk_buff *skb); +int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) +static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { - int hlen = skb_headlen(skb); + return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; +} + +static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) +{ + return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; +} +struct skb_checksum_ops { + __wsum (*update)(const void *mem, int len, __wsum wsum); + __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); +}; + +__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum, const struct skb_checksum_ops *ops); +__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, + __wsum csum); + +static inline void * __must_check +__skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *data, int hlen, void *buffer) +{ if (hlen - offset >= len) - return skb->data + offset; + return data + offset; - if (skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || + skb_copy_bits(skb, offset, buffer, len) < 0) return NULL; return buffer; } +static inline void * __must_check +skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) +{ + return __skb_header_pointer(skb, offset, len, skb->data, + skb_headlen(skb), buffer); +} + +/** + * skb_needs_linearize - check if we need to linearize a given skb + * depending on the given device features. + * @skb: socket buffer to check + * @features: net device features + * + * Returns true if either: + * 1. skb has frag_list and the device doesn't support FRAGLIST, or + * 2. skb is fragmented and the device does not support SG. + */ +static inline bool skb_needs_linearize(struct sk_buff *skb, + netdev_features_t features) +{ + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG))); +} + static inline void skb_copy_from_linear_data(const struct sk_buff *skb, void *to, const unsigned int len) @@ -2536,7 +3090,7 @@ memcpy(skb->data + offset, from, len); } -extern void skb_init(void); +void skb_init(void); static inline ktime_t skb_get_ktime(const struct sk_buff *skb) { @@ -2579,12 +3133,12 @@ return ktime_set(0, 0); } -extern void skb_timestamping_init(void); +struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING -extern void skb_clone_tx_timestamp(struct sk_buff *skb); -extern bool skb_defer_rx_timestamp(struct sk_buff *skb); +void skb_clone_tx_timestamp(struct sk_buff *skb); +bool skb_defer_rx_timestamp(struct sk_buff *skb); #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */ @@ -2604,16 +3158,20 @@ * * PHY drivers may accept clones of transmitted packets for * timestamping via their phy_driver.txtstamp method. These drivers - * must call this function to return the skb back to the stack, with - * or without a timestamp. + * must call this function to return the skb back to the stack with a + * timestamp. * * @skb: clone of the the original outgoing packet - * @hwtstamps: hardware time stamps, may be NULL if not available + * @hwtstamps: hardware time stamps * */ void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps); +void __skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps, + struct sock *sk, int tstype); + /** * skb_tstamp_tx - queue clone of skb with send time stamps * @orig_skb: the original outgoing packet @@ -2625,8 +3183,8 @@ * generates a software time stamp (otherwise), then queues the clone * to the error queue of the socket. Errors are silently ignored. */ -extern void skb_tstamp_tx(struct sk_buff *orig_skb, - struct skb_shared_hwtstamps *hwtstamps); +void skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps); static inline void sw_tx_timestamp(struct sk_buff *skb) { @@ -2641,6 +3199,10 @@ * Ethernet MAC Drivers should call this function in their hard_xmit() * function immediately before giving the sk_buff to the MAC hardware. * + * Specifically, one should make absolutely sure that this function is + * called before TX completion of this packet can trigger. Otherwise + * the packet could potentially already be freed. + * * @skb: A socket buffer. */ static inline void skb_tx_timestamp(struct sk_buff *skb) @@ -2658,12 +3220,15 @@ */ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked); -extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); -extern __sum16 __skb_checksum_complete(struct sk_buff *skb); +__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); +__sum16 __skb_checksum_complete(struct sk_buff *skb); static inline int skb_csum_unnecessary(const struct sk_buff *skb) { - return skb->ip_summed & CHECKSUM_UNNECESSARY; + return ((skb->ip_summed == CHECKSUM_UNNECESSARY) || + skb->csum_valid || + (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) >= 0)); } /** @@ -2688,8 +3253,242 @@ 0 : __skb_checksum_complete(skb); } +static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level == 0) + skb->ip_summed = CHECKSUM_NONE; + else + skb->csum_level--; + } +} + +static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (skb->csum_level < SKB_MAX_CSUM_LEVEL) + skb->csum_level++; + } else if (skb->ip_summed == CHECKSUM_NONE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = 0; + } +} + +static inline void __skb_mark_checksum_bad(struct sk_buff *skb) +{ + /* Mark current checksum as bad (typically called from GRO + * path). In the case that ip_summed is CHECKSUM_NONE + * this must be the first checksum encountered in the packet. + * When ip_summed is CHECKSUM_UNNECESSARY, this is the first + * checksum after the last one validated. For UDP, a zero + * checksum can not be marked as bad. + */ + + if (skb->ip_summed == CHECKSUM_NONE || + skb->ip_summed == CHECKSUM_UNNECESSARY) + skb->csum_bad = 1; +} + +/* Check if we need to perform checksum complete validation. + * + * Returns true if checksum complete is needed, false otherwise + * (either checksum is unnecessary or zero checksum is allowed). + */ +static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, + bool zero_okay, + __sum16 check) +{ + if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { + skb->csum_valid = 1; + __skb_decr_checksum_unnecessary(skb); + return false; + } + + return true; +} + +/* For small packets <= CHECKSUM_BREAK peform checksum complete directly + * in checksum_init. + */ +#define CHECKSUM_BREAK 76 + +/* Unset checksum-complete + * + * Unset checksum complete can be done when packet is being modified + * (uncompressed for instance) and checksum-complete value is + * invalidated. + */ +static inline void skb_checksum_complete_unset(struct sk_buff *skb) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; +} + +/* Validate (init) checksum based on checksum complete. + * + * Return values: + * 0: checksum is validated or try to in skb_checksum_complete. In the latter + * case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo + * checksum is stored in skb->csum for use in __skb_checksum_complete + * non-zero: value of invalid checksum + * + */ +static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, + bool complete, + __wsum psum) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (!csum_fold(csum_add(psum, skb->csum))) { + skb->csum_valid = 1; + return 0; + } + } else if (skb->csum_bad) { + /* ip_summed == CHECKSUM_NONE in this case */ + return (__force __sum16)1; + } + + skb->csum = psum; + + if (complete || skb->len <= CHECKSUM_BREAK) { + __sum16 csum; + + csum = __skb_checksum_complete(skb); + skb->csum_valid = !csum; + return csum; + } + + return 0; +} + +static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) +{ + return 0; +} + +/* Perform checksum validate (init). Note that this is a macro since we only + * want to calculate the pseudo header which is an input function if necessary. + * First we try to validate without any computation (checksum unnecessary) and + * then calculate based on checksum complete calling the function to compute + * pseudo header. + * + * Return values: + * 0: checksum is validated or try to in skb_checksum_complete + * non-zero: value of invalid checksum + */ +#define __skb_checksum_validate(skb, proto, complete, \ + zero_okay, check, compute_pseudo) \ +({ \ + __sum16 __ret = 0; \ + skb->csum_valid = 0; \ + if (__skb_checksum_validate_needed(skb, zero_okay, check)) \ + __ret = __skb_checksum_validate_complete(skb, \ + complete, compute_pseudo(skb, proto)); \ + __ret; \ +}) + +#define skb_checksum_init(skb, proto, compute_pseudo) \ + __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo) + +#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \ + __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo) + +#define skb_checksum_validate(skb, proto, compute_pseudo) \ + __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo) + +#define skb_checksum_validate_zero_check(skb, proto, check, \ + compute_pseudo) \ + __skb_checksum_validate(skb, proto, true, true, check, compute_pseudo) + +#define skb_checksum_simple_validate(skb) \ + __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) + +static inline bool __skb_checksum_convert_check(struct sk_buff *skb) +{ + return (skb->ip_summed == CHECKSUM_NONE && + skb->csum_valid && !skb->csum_bad); +} + +static inline void __skb_checksum_convert(struct sk_buff *skb, + __sum16 check, __wsum pseudo) +{ + skb->csum = ~pseudo; + skb->ip_summed = CHECKSUM_COMPLETE; +} + +#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \ +do { \ + if (__skb_checksum_convert_check(skb)) \ + __skb_checksum_convert(skb, check, \ + compute_pseudo(skb, proto)); \ +} while (0) + +static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr, + u16 start, u16 offset) +{ + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = ((unsigned char *)ptr + start) - skb->head; + skb->csum_offset = offset - start; +} + +/* Update skbuf and packet to reflect the remote checksum offload operation. + * When called, ptr indicates the starting point for skb->csum when + * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete + * here, skb_postpull_rcsum is done so skb->csum start is ptr. + */ +static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, + int start, int offset, bool nopartial) +{ + __wsum delta; + + if (!nopartial) { + skb_remcsum_adjust_partial(skb, ptr, start, offset); + return; + } + + if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { + __skb_checksum_complete(skb); + skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data); + } + + delta = remcsum_adjust(ptr, skb->csum, start, offset); + + /* Adjust skb->csum since we changed the packet */ + skb->csum = csum_add(skb->csum, delta); +} + +#ifdef CONFIG_AVM_GENERIC_CONNTRACK +static inline enum generic_ct_dir skb_get_ct_dir(struct sk_buff *skb) +{ + return skb->nfctinfo ? GENERIC_CT_DIR_REPLY : GENERIC_CT_DIR_ORIGINAL; +} + +static inline void skb_set_ct_dir(struct sk_buff *skb, enum generic_ct_dir dir) +{ + skb->nfctinfo = (dir & 1); +} + +static inline void generic_ct_reset(struct sk_buff *skb __maybe_unused) +{ + generic_ct_put(skb->generic_ct); + skb->generic_ct = NULL; +} + +static inline void __generic_ct_copy(struct sk_buff *dst __maybe_unused, + const struct sk_buff *src __maybe_unused) +{ + dst->generic_ct = generic_ct_get(src->generic_ct); + dst->nfctinfo = src->nfctinfo; +} + +static inline void generic_ct_copy(struct sk_buff *dst, const struct sk_buff *src) +{ + generic_ct_put(dst->generic_ct); + __generic_ct_copy(dst, src); +} +#endif + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -extern void nf_conntrack_destroy(struct nf_conntrack *nfct); +void nf_conntrack_destroy(struct nf_conntrack *nfct); static inline void nf_conntrack_put(struct nf_conntrack *nfct) { if (nfct && atomic_dec_and_test(&nfct->use)) @@ -2701,7 +3500,7 @@ atomic_inc(&nfct->use); } #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) { if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) @@ -2713,48 +3512,80 @@ atomic_inc(&nf_bridge->use); } #endif /* CONFIG_BRIDGE_NETFILTER */ -static inline void nf_reset(struct sk_buff *skb) +static inline void nf_reset_no_generic_ct(struct sk_buff *skb __maybe_unused) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb->nfct); skb->nfct = NULL; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(skb->nf_bridge); skb->nf_bridge = NULL; #endif } +static inline void nf_reset(struct sk_buff *skb) +{ + nf_reset_no_generic_ct(skb); +#ifdef CONFIG_AVM_GENERIC_CONNTRACK + generic_ct_reset(skb); +#endif +} + static inline void nf_reset_trace(struct sk_buff *skb) { -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) skb->nf_trace = 0; #endif } /* Note: This doesn't put any conntrack and bridge info in dst. */ -static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) +static inline void __nf_copy_no_generic_ct(struct sk_buff *dst, const struct sk_buff *src, + bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) dst->nfct = src->nfct; nf_conntrack_get(src->nfct); - dst->nfctinfo = src->nfctinfo; + if (copy) + dst->nfctinfo = src->nfctinfo; #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); #endif +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) + if (copy) + dst->nf_trace = src->nf_trace; +#endif } -static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) +/* Note: This doesn't put any conntrack and bridge info in dst. */ +static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, + bool copy) +{ + __nf_copy_no_generic_ct(dst, src, copy); +#ifdef CONFIG_AVM_GENERIC_CONNTRACK + __generic_ct_copy(dst, src); +#endif +} + +static inline void nf_copy_no_generic_ct(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(dst->nfct); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nf_bridge_put(dst->nf_bridge); #endif - __nf_copy(dst, src); + __nf_copy_no_generic_ct(dst, src, true); +} + +static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) +{ + nf_copy_no_generic_ct(dst, src); +#ifdef CONFIG_AVM_GENERIC_CONNTRACK + generic_ct_copy(dst, src); +#endif } #ifdef CONFIG_NETWORK_SECMARK @@ -2775,6 +3606,19 @@ { } #endif +static inline bool skb_irq_freeable(const struct sk_buff *skb) +{ + return !skb->destructor && +#if IS_ENABLED(CONFIG_XFRM) + !skb->sp && +#endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + !skb->nfct && +#endif + !skb->_skb_refdst && + !skb_has_frag_list(skb); +} + static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) { skb->queue_mapping = queue_mapping; @@ -2805,30 +3649,28 @@ return skb->queue_mapping != 0; } -extern u16 __skb_tx_hash(const struct net_device *dev, - const struct sk_buff *skb, - unsigned int num_tx_queues); - -#ifdef CONFIG_XFRM static inline struct sec_path *skb_sec_path(struct sk_buff *skb) { +#ifdef CONFIG_XFRM return skb->sp; -} #else -static inline struct sec_path *skb_sec_path(struct sk_buff *skb) -{ return NULL; -} #endif +} /* Keeps track of mac header offset relative to skb->head. * It is useful for TSO of Tunneling protocol. e.g. GRE. * For non-tunnel skb it points to skb_mac_header() and for - * tunnel skb it points to outer mac header. */ + * tunnel skb it points to outer mac header. + * Keeps track of level of encapsulation of network headers. + */ struct skb_gso_cb { - int mac_offset; + int mac_offset; + int encap_level; + __u16 csum_start; }; -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb) +#define SKB_SGO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) { @@ -2851,17 +3693,39 @@ return 0; } +/* Compute the checksum for a gso segment. First compute the checksum value + * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and + * then add in skb->csum (checksum from csum_start to end of packet). + * skb->csum and csum_start are then updated to reflect the checksum of the + * resultant packet starting from the transport header-- the resultant checksum + * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo + * header. + */ +static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) +{ + int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) - + skb_transport_offset(skb); + __wsum partial; + + partial = csum_partial(skb_transport_header(skb), plen, skb->csum); + skb->csum = res; + SKB_GSO_CB(skb)->csum_start -= plen; + + return csum_fold(partial); +} + static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; } +/* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_v6(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } -extern void __skb_warn_lro_forwarding(const struct sk_buff *skb); +void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) { @@ -2901,7 +3765,10 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); -u32 __skb_get_poff(const struct sk_buff *skb); +int skb_checksum_setup(struct sk_buff *skb, bool recalculate); +struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, + unsigned int transport_len, + __sum16(*skb_chkf)(struct sk_buff *skb)); /** * skb_head_is_locked - Determine if the skb->head is locked down @@ -2933,5 +3800,6 @@ skb_network_header(skb); return hdr_len + skb_gso_transport_seglen(skb); } + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */