--- zzzz-none-000/linux-3.10.107/include/net/tcp.h 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/include/net/tcp.h 2021-02-04 17:41:59.000000000 +0000 @@ -27,10 +27,10 @@ #include #include #include -#include #include #include #include +#include #include #include @@ -50,32 +50,34 @@ extern struct inet_hashinfo tcp_hashinfo; extern struct percpu_counter tcp_orphan_count; -extern void tcp_time_wait(struct sock *sk, int state, int timeo); +void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 +#define TCP_MIN_SND_MSS 48 +#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) -/* +/* * Never offer a window over 32767 without using window scaling. Some - * poor stacks do signed 16bit maths! + * poor stacks do signed 16bit maths! */ #define MAX_TCP_WINDOW 32767U -/* Offer an initial receive window of 10 mss. */ -#define TCP_DEFAULT_INIT_RCVWND 10 - /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U /* The least MTU to use for probing */ -#define TCP_BASE_MSS 512 +#define TCP_BASE_MSS 1024 + +/* probing interval, default to 10 minutes as per RFC4821 */ +#define TCP_PROBE_INTERVAL 600 + +/* Specify interval when tcp mtu probing will stop */ +#define TCP_PROBE_THRESHOLD 8 /* After receiving this amount of duplicate ACKs fast retransmit starts. */ #define TCP_FASTRETRANS_THRESH 3 -/* Maximal reordering. */ -#define TCP_MAX_REORDERING 127 - /* Maximal number of ACKs sent quickly to accelerate slow-start. */ #define TCP_MAX_QUICKACKS 16U @@ -170,7 +172,7 @@ /* * TCP option */ - + #define TCPOPT_NOP 1 /* Padding */ #define TCPOPT_EOL 0 /* End of options */ #define TCPOPT_MSS 2 /* Segment size negotiating */ @@ -179,6 +181,7 @@ #define TCPOPT_SACK 5 /* SACK Block */ #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ +#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */ #define TCPOPT_EXP 254 /* Experimental */ /* Magic number to be after the option value for sharing TCP * experimental options. See draft-ietf-tcpm-experimental-options-00.txt @@ -194,11 +197,8 @@ #define TCPOLEN_SACK_PERM 2 #define TCPOLEN_TIMESTAMP 10 #define TCPOLEN_MD5SIG 18 +#define TCPOLEN_FASTOPEN_BASE 2 #define TCPOLEN_EXP_FASTOPEN_BASE 4 -#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ -#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ -#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) -#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) /* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -226,8 +226,6 @@ #define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ -/* Process SYN data but skip cookie validation */ -#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 /* Accept SYN data w/o any cookie option */ #define TFO_SERVER_COOKIE_NOT_REQD 0x200 @@ -236,10 +234,6 @@ */ #define TFO_SERVER_WO_SOCKOPT1 0x400 #define TFO_SERVER_WO_SOCKOPT2 0x800 -/* Always create TFO child sockets on a TFO listener even when - * cookie/data not present. (For testing purpose!) - */ -#define TFO_SERVER_ALWAYS 0x1000 extern struct inet_timewait_death_row tcp_death_row; @@ -265,7 +259,9 @@ extern int sysctl_tcp_max_orphans; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; +extern int sysctl_tcp_max_reordering; extern int sysctl_tcp_dsack; +extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; @@ -273,26 +269,36 @@ extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; -extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; -extern int sysctl_tcp_mtu_probing; -extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; -extern int sysctl_tcp_max_ssthresh; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; +extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; +extern int sysctl_tcp_min_rtt_wlen; +extern int sysctl_tcp_autocorking; +extern int sysctl_tcp_invalid_ratelimit; +extern int sysctl_tcp_pacing_ss_ratio; +extern int sysctl_tcp_pacing_ca_ratio; extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; +/* optimized version of sk_under_memory_pressure() for TCP sockets */ +static inline bool tcp_under_memory_pressure(const struct sock *sk) +{ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) + return !!sk->sk_cgrp->memory_pressure; + + return tcp_memory_pressure; +} /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). @@ -318,6 +324,8 @@ return false; } +void sk_forced_mem_schedule(struct sock *sk, int size); + static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { struct percpu_counter *ocp = sk->sk_prot->orphan_count; @@ -331,20 +339,8 @@ return false; } -extern bool tcp_check_oom(struct sock *sk, int shift); +bool tcp_check_oom(struct sock *sk, int shift); -/* syncookies: remember time of last synqueue overflow */ -static inline void tcp_synq_overflow(struct sock *sk) -{ - tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies; -} - -/* syncookies: no recent synqueue overflow on this listening socket? */ -static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) -{ - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; - return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK); -} extern struct proto tcp_prot; @@ -354,38 +350,33 @@ #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) -extern void tcp_init_mem(struct net *net); +void tcp_tasklet_init(void); -extern void tcp_tasklet_init(void); +void tcp_v4_err(struct sk_buff *skb, u32); -extern void tcp_v4_err(struct sk_buff *skb, u32); +void tcp_shutdown(struct sock *sk, int how); -extern void tcp_shutdown (struct sock *sk, int how); +void tcp_v4_early_demux(struct sk_buff *skb); +int tcp_v4_rcv(struct sk_buff *skb); -extern void tcp_v4_early_demux(struct sk_buff *skb); -extern int tcp_v4_rcv(struct sk_buff *skb); - -extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); -extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t size); -extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags); -extern void tcp_release_cb(struct sock *sk); -extern void tcp_wfree(struct sk_buff *skb); -extern void tcp_write_timer_handler(struct sock *sk); -extern void tcp_delack_timer_handler(struct sock *sk); -extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); -extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len); -extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len); -extern void tcp_rcv_space_adjust(struct sock *sk); -extern void tcp_cleanup_rbuf(struct sock *sk, int copied); -extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); -extern void tcp_twsk_destructor(struct sock *sk); -extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags); +int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, + int flags); +void tcp_release_cb(struct sock *sk); +void tcp_wfree(struct sk_buff *skb); +void tcp_write_timer_handler(struct sock *sk); +void tcp_delack_timer_handler(struct sock *sk); +int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct tcphdr *th, unsigned int len); +void tcp_rcv_space_adjust(struct sock *sk); +int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); +void tcp_twsk_destructor(struct sock *sk); +ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) @@ -415,149 +406,182 @@ }; -extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, - struct sk_buff *skb, - const struct tcphdr *th); -extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev, - bool fastopen); -extern int tcp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb); -extern void tcp_enter_loss(struct sock *sk, int how); -extern void tcp_clear_retrans(struct tcp_sock *tp); -extern void tcp_update_metrics(struct sock *sk); -extern void tcp_init_metrics(struct sock *sk); -extern void tcp_metrics_init(void); -extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check); -extern bool tcp_remember_stamp(struct sock *sk); -extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); -extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); -extern void tcp_disable_fack(struct tcp_sock *tp); -extern void tcp_close(struct sock *sk, long timeout); -extern void tcp_init_sock(struct sock *sk); -extern unsigned int tcp_poll(struct file * file, struct socket *sock, - struct poll_table_struct *wait); -extern int tcp_getsockopt(struct sock *sk, int level, int optname, +enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, + struct sk_buff *skb, + const struct tcphdr *th); +struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, bool fastopen); +int tcp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +void tcp_enter_loss(struct sock *sk); +void tcp_clear_retrans(struct tcp_sock *tp); +void tcp_update_metrics(struct sock *sk); +void tcp_init_metrics(struct sock *sk); +void tcp_metrics_init(void); +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, + bool paws_check, bool timestamps); +bool tcp_remember_stamp(struct sock *sk); +bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); +void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); +void tcp_disable_fack(struct tcp_sock *tp); +void tcp_close(struct sock *sk, long timeout); +void tcp_init_sock(struct sock *sk); +unsigned int tcp_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); +int tcp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +int tcp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); +int compat_tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); -extern int tcp_setsockopt(struct sock *sk, int level, int optname, +int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); -extern int compat_tcp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -extern int compat_tcp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); -extern void tcp_set_keepalive(struct sock *sk, int val); -extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); -extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int nonblock, int flags, int *addr_len); -extern void tcp_parse_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx, - int estab, struct tcp_fastopen_cookie *foc); -extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); +void tcp_set_keepalive(struct sock *sk, int val); +void tcp_syn_ack_timeout(const struct request_sock *req); +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); +void tcp_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, + int estab, struct tcp_fastopen_cookie *foc); +const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* * TCP v4 functions exported for the inet6 API */ -extern void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); +void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); -extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); -extern struct sock * tcp_create_openreq_child(struct sock *sk, - struct request_sock *req, - struct sk_buff *skb); -extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst); -extern int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); -extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len); -extern int tcp_connect(struct sock *sk); -extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req, - struct tcp_fastopen_cookie *foc); -extern int tcp_disconnect(struct sock *sk, int flags); +void tcp_req_err(struct sock *sk, u32 seq, bool abort); +int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); +struct sock *tcp_create_openreq_child(const struct sock *sk, + struct request_sock *req, + struct sk_buff *skb); +void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst); +struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst, + struct request_sock *req_unhash, + bool *own_req); +int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); +int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int tcp_connect(struct sock *sk); +struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + bool attach_req); +int tcp_disconnect(struct sock *sk, int flags); -void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ -extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; -extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, - struct ip_options *opt); +struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, + u32 cookie); +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES -extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, - __u16 *mss); -#else -static inline __u32 cookie_v4_init_sequence(struct sock *sk, - struct sk_buff *skb, - __u16 *mss) + +/* Syncookies use a monotonic timer which increments every 60 seconds. + * This counter is used both as a hash input and partially encoded into + * the cookie value. A cookie is only validated further if the delta + * between the current counter value and the encoded one is less than this, + * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if + * the counter advances immediately after a cookie is generated). + */ +#define MAX_SYNCOOKIE_AGE 2 +#define TCP_SYNCOOKIE_PERIOD (60 * HZ) +#define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD) + +/* syncookies: remember time of last synqueue overflow + * But do not dirty this field too often (once per second is enough) + * It is racy as we do not hold a lock, but race is very minor. + */ +static inline void tcp_synq_overflow(const struct sock *sk) { - return 0; + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long now = jiffies; + + if (time_after(now, last_overflow + HZ)) + tcp_sk(sk)->rx_opt.ts_recent_stamp = now; } -#endif -extern __u32 cookie_init_timestamp(struct request_sock *req); -extern bool cookie_check_timestamp(struct tcp_options_received *opt, - struct net *net, bool *ecn_ok); +/* syncookies: no recent synqueue overflow on this listening socket? */ +static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) +{ + unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; -/* From net/ipv6/syncookies.c */ -extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); -#ifdef CONFIG_SYN_COOKIES -extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, - __u16 *mss); -#else -static inline __u32 cookie_v6_init_sequence(struct sock *sk, - struct sk_buff *skb, - __u16 *mss) + return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); +} + +static inline u32 tcp_cookie_time(void) { - return 0; + u64 val = get_jiffies_64(); + + do_div(val, TCP_SYNCOOKIE_PERIOD); + return val; } + +u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, + u16 *mssp); +__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); +__u32 cookie_init_timestamp(struct request_sock *req); +bool cookie_timestamp_decode(struct tcp_options_received *opt); +bool cookie_ecn_ok(const struct tcp_options_received *opt, + const struct net *net, const struct dst_entry *dst); + +/* From net/ipv6/syncookies.c */ +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, + u32 cookie); +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); + +u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, + const struct tcphdr *th, u16 *mssp); +__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ -extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, - int nonagle); -extern bool tcp_may_send_now(struct sock *sk); -extern int __tcp_retransmit_skb(struct sock *, struct sk_buff *); -extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); -extern void tcp_retransmit_timer(struct sock *sk); -extern void tcp_xmit_retransmit_queue(struct sock *); -extern void tcp_simple_retransmit(struct sock *); -extern int tcp_trim_head(struct sock *, struct sk_buff *, u32); -extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int); - -extern void tcp_send_probe0(struct sock *); -extern void tcp_send_partial(struct sock *); -extern int tcp_write_wakeup(struct sock *); -extern void tcp_send_fin(struct sock *sk); -extern void tcp_send_active_reset(struct sock *sk, gfp_t priority); -extern int tcp_send_synack(struct sock *); -extern bool tcp_syn_flood_action(struct sock *sk, - const struct sk_buff *skb, - const char *proto); -extern void tcp_push_one(struct sock *, unsigned int mss_now); -extern void tcp_send_ack(struct sock *sk); -extern void tcp_send_delayed_ack(struct sock *sk); -extern void tcp_send_loss_probe(struct sock *sk); -extern bool tcp_schedule_loss_probe(struct sock *sk); +void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, + int nonagle); +bool tcp_may_send_now(struct sock *sk); +int __tcp_retransmit_skb(struct sock *, struct sk_buff *); +int tcp_retransmit_skb(struct sock *, struct sk_buff *); +void tcp_retransmit_timer(struct sock *sk); +void tcp_xmit_retransmit_queue(struct sock *); +void tcp_simple_retransmit(struct sock *); +int tcp_trim_head(struct sock *, struct sk_buff *, u32); +int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); + +void tcp_send_probe0(struct sock *); +void tcp_send_partial(struct sock *); +int tcp_write_wakeup(struct sock *, int mib); +void tcp_send_fin(struct sock *sk); +void tcp_send_active_reset(struct sock *sk, gfp_t priority); +int tcp_send_synack(struct sock *); +void tcp_push_one(struct sock *, unsigned int mss_now); +void tcp_send_ack(struct sock *sk); +void tcp_send_delayed_ack(struct sock *sk); +void tcp_send_loss_probe(struct sock *sk); +bool tcp_schedule_loss_probe(struct sock *sk); /* tcp_input.c */ -extern void tcp_cwnd_application_limited(struct sock *sk); -extern void tcp_resume_early_retransmit(struct sock *sk); -extern void tcp_rearm_rto(struct sock *sk); -extern void tcp_reset(struct sock *sk); +void tcp_resume_early_retransmit(struct sock *sk); +void tcp_rearm_rto(struct sock *sk); +void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); +void tcp_reset(struct sock *sk); +void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); /* tcp_timer.c */ -extern void tcp_init_xmit_timers(struct sock *); +void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { inet_csk_clear_xmit_timers(sk); } -extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); -extern unsigned int tcp_current_mss(struct sock *sk); +unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); +unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) @@ -583,21 +607,20 @@ } /* tcp.c */ -extern void tcp_get_info(const struct sock *, struct tcp_info *); +void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); -extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor); +int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor); -extern void tcp_initialize_rcv_mss(struct sock *sk); +void tcp_initialize_rcv_mss(struct sock *sk); -extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); -extern int tcp_mss_to_mtu(struct sock *sk, int mss); -extern void tcp_mtup_init(struct sock *sk); -extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); -extern void tcp_init_buffer_space(struct sock *sk); +int tcp_mtu_to_mss(struct sock *sk, int pmtu); +int tcp_mss_to_mtu(struct sock *sk, int mss); +void tcp_mtup_init(struct sock *sk); +void tcp_init_buffer_space(struct sock *sk); static inline void tcp_bound_rto(const struct sock *sk) { @@ -607,11 +630,9 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) { - return (tp->srtt >> 3) + tp->rttvar; + return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } -extern void tcp_set_rto(struct sock *sk); - static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | @@ -646,6 +667,22 @@ return rto_min; } +static inline u32 tcp_rto_min_us(struct sock *sk) +{ + return jiffies_to_usecs(tcp_rto_min(sk)); +} + +static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) +{ + return dst_metric_locked(dst, RTAX_CC_ALGO); +} + +/* Minimum RTT in usec. ~0 means not available. */ +static inline u32 tcp_min_rtt(const struct tcp_sock *tp) +{ + return tp->rtt_min[0].rtt; +} + /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. @@ -663,7 +700,7 @@ * scaling applied to the result. The caller does these things * if necessary. This is a "raw" window selection. */ -extern u32 __tcp_select_window(struct sock *sk); +u32 __tcp_select_window(struct sock *sk); void tcp_send_window_probe(struct sock *sk); @@ -675,6 +712,12 @@ */ #define tcp_time_stamp ((__u32)(jiffies)) +static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) +{ + return skb->skb_mstamp.stamp_jiffies; +} + + #define tcp_flag_byte(th) (((u_int8_t *)th)[13]) #define TCPHDR_FIN 0x01 @@ -686,6 +729,8 @@ #define TCPHDR_ECE 0x40 #define TCPHDR_CWR 0x80 +#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR) + /* This is what the send packet queuing engine uses to pass * TCP per-packet control information to the transmission code. * We also store the host-order sequence numbers in here too. @@ -693,15 +738,21 @@ * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately. */ struct tcp_skb_cb { - union { - struct inet_skb_parm h4; -#if IS_ENABLED(CONFIG_IPV6) - struct inet6_skb_parm h6; -#endif - } header; /* For incoming frames */ __u32 seq; /* Starting sequence number */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */ - __u32 when; /* used to compute rtt's */ + union { + /* Note : tcp_tw_isn is used in input path only + * (isn chosen by tcp_timewait_state_process()) + * + * tcp_gso_segs/size are used in write queue only, + * cf tcp_skb_pcount()/tcp_skb_mss() + */ + __u32 tcp_tw_isn; + struct { + u16 tcp_gso_segs; + u16 tcp_gso_size; + }; + }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ __u8 sacked; /* State flags for SACK/FACK. */ @@ -709,45 +760,57 @@ #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ #define TCPCB_TAGBITS 0x07 /* All tag bits */ +#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ -#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) +#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ + TCPCB_REPAIRED) __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ /* 1 byte hole */ __u32 ack_seq; /* Sequence number ACK'd */ + union { + struct inet_skb_parm h4; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_skb_parm h6; +#endif + } header; /* For incoming frames */ }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) -/* RFC3168 : 6.1.1 SYN packets must not have ECT/ECN bits set - * - * If we receive a SYN packet with these bits set, it means a network is - * playing bad games with TOS bits. In order to avoid possible false congestion - * notifications, we disable TCP ECN negociation. - */ -static inline void -TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb, - struct net *net) -{ - const struct tcphdr *th = tcp_hdr(skb); - - if (net->ipv4.sysctl_tcp_ecn && th->ece && th->cwr && - INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield)) - inet_rsk(req)->ecn_ok = 1; + +#if IS_ENABLED(CONFIG_IPV6) +/* This is the variant of inet6_iif() that must be used by TCP, + * as TCP moves IP6CB into a different location in skb->cb[] + */ +static inline int tcp_v6_iif(const struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->header.h6.iif; } +#endif /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ static inline int tcp_skb_pcount(const struct sk_buff *skb) { - return skb_shinfo(skb)->gso_segs; + return TCP_SKB_CB(skb)->tcp_gso_segs; } -/* This is valid iff tcp_skb_pcount() > 1. */ +static inline void tcp_skb_pcount_set(struct sk_buff *skb, int segs) +{ + TCP_SKB_CB(skb)->tcp_gso_segs = segs; +} + +static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs) +{ + TCP_SKB_CB(skb)->tcp_gso_segs += segs; +} + +/* This is valid iff skb is in write queue and tcp_skb_pcount() > 1. */ static inline int tcp_skb_mss(const struct sk_buff *skb) { - return skb_shinfo(skb)->gso_size; + return TCP_SKB_CB(skb)->tcp_gso_size; } /* Events passed to congestion control interface */ @@ -756,8 +819,17 @@ CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ CA_EVENT_LOSS, /* loss timeout */ - CA_EVENT_FAST_ACK, /* in sequence ack */ - CA_EVENT_SLOW_ACK, /* other ack */ + CA_EVENT_ECN_NO_CE, /* ECT set, but not CE marked */ + CA_EVENT_ECN_IS_CE, /* received CE marked IP packet */ + CA_EVENT_DELAYED_ACK, /* Delayed ack is sent */ + CA_EVENT_NON_DELAYED_ACK, +}; + +/* Information about inbound ACK, passed to cong_ops->in_ack_event() */ +enum tcp_ca_ack_event_flags { + CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ + CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ + CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ }; /* @@ -767,12 +839,19 @@ #define TCP_CA_MAX 128 #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) +#define TCP_CA_UNSPEC 0 + +/* Algorithm can be set on socket without CAP_NET_ADMIN privileges */ #define TCP_CONG_NON_RESTRICTED 0x1 -#define TCP_CONG_RTT_STAMP 0x2 +/* Requires ECN/ECT set on all packets */ +#define TCP_CONG_NEEDS_ECN 0x2 + +union tcp_cc_info; struct tcp_congestion_ops { struct list_head list; - unsigned long flags; + u32 key; + u32 flags; /* initialize private data (optional) */ void (*init)(struct sock *sk); @@ -781,45 +860,63 @@ /* return slow start threshold (required) */ u32 (*ssthresh)(struct sock *sk); - /* lower bound for congestion window (optional) */ - u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight); + void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); + /* call when ack arrives (optional) */ + void (*in_ack_event)(struct sock *sk, u32 flags); /* new value of cwnd after loss (optional) */ u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us); /* get info for inet_diag (optional) */ - void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); + size_t (*get_info)(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info); char name[TCP_CA_NAME_MAX]; struct module *owner; }; -extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); -extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); +int tcp_register_congestion_control(struct tcp_congestion_ops *type); +void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); + +void tcp_assign_congestion_control(struct sock *sk); +void tcp_init_congestion_control(struct sock *sk); +void tcp_cleanup_congestion_control(struct sock *sk); +int tcp_set_default_congestion_control(const char *name); +void tcp_get_default_congestion_control(char *name); +void tcp_get_available_congestion_control(char *buf, size_t len); +void tcp_get_allowed_congestion_control(char *buf, size_t len); +int tcp_set_allowed_congestion_control(char *allowed); +int tcp_set_congestion_control(struct sock *sk, const char *name); +u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); +void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); -extern void tcp_init_congestion_control(struct sock *sk); -extern void tcp_cleanup_congestion_control(struct sock *sk); -extern int tcp_set_default_congestion_control(const char *name); -extern void tcp_get_default_congestion_control(char *name); -extern void tcp_get_available_congestion_control(char *buf, size_t len); -extern void tcp_get_allowed_congestion_control(char *buf, size_t len); -extern int tcp_set_allowed_congestion_control(char *allowed); -extern int tcp_set_congestion_control(struct sock *sk, const char *name); -extern void tcp_slow_start(struct tcp_sock *tp); -extern void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); - -extern struct tcp_congestion_ops tcp_init_congestion_ops; -extern u32 tcp_reno_ssthresh(struct sock *sk); -extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight); -extern u32 tcp_reno_min_cwnd(const struct sock *sk); +u32 tcp_reno_ssthresh(struct sock *sk); +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); extern struct tcp_congestion_ops tcp_reno; +struct tcp_congestion_ops *tcp_ca_find_key(u32 key); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); +#ifdef CONFIG_INET +char *tcp_ca_get_name_by_key(u32 key, char *buffer); +#else +static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) +{ + return NULL; +} +#endif + +static inline bool tcp_ca_needs_ecn(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; +} + static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -906,6 +1003,11 @@ #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ + return tp->snd_cwnd < tp->snd_ssthresh; +} + static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) { return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; @@ -936,8 +1038,8 @@ /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) -extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -extern __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst); +void tcp_enter_cwr(struct sock *sk); +__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst); /* The maximum number of MSS of available cwnd for which TSO defers * sending if not using sysctl_tcp_tso_win_divisor. @@ -963,23 +1065,56 @@ { return tp->snd_una + tp->snd_wnd; } -extern bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight); -static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss, - const struct sk_buff *skb) +/* We follow the spirit of RFC2861 to validate cwnd but implement a more + * flexible approach. The RFC suggests cwnd should not be raised unless + * it was fully used previously. And that's exactly what we do in + * congestion avoidance mode. But in slow start we allow cwnd to grow + * as long as the application has used half the cwnd. + * Example : + * cwnd is 10 (IW10), but application sends 9 frames. + * We allow cwnd to reach 18 when all frames are ACKed. + * This check is safe because it's as aggressive as slow start which already + * risks 100% overshoot. The advantage is that we discourage application to + * either send more filler packets or data to artificially blow up the cwnd + * usage, and allow application-limited process to probe bw more aggressively. + */ +static inline bool tcp_is_cwnd_limited(const struct sock *sk) { - if (skb->len < mss) - tp->snd_sml = TCP_SKB_CB(skb)->end_seq; + const struct tcp_sock *tp = tcp_sk(sk); + + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ + if (tcp_in_slow_start(tp)) + return tp->snd_cwnd < 2 * tp->max_packets_out; + + return tp->is_cwnd_limited; } -static inline void tcp_check_probe_timer(struct sock *sk) +/* Something is really bad, we could not queue an additional packet, + * because qdisc is full or receiver sent a 0 window. + * We do not want to add fuel to the fire, or abort too early, + * so make sure the timer we arm now is at least 200ms in the future, + * regardless of current icsk_rto value (as it could be ~2ms) + */ +static inline unsigned long tcp_probe0_base(const struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); + return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN); +} + +/* Variant of inet_csk_rto_backoff() used for zero window probes */ +static inline unsigned long tcp_probe0_when(const struct sock *sk, + unsigned long max_when) +{ + u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff; - if (!tp->packets_out && !icsk->icsk_pending) + return (unsigned long)min_t(u64, when, max_when); +} + +static inline void tcp_check_probe_timer(struct sock *sk) +{ + if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - icsk->icsk_rto, TCP_RTO_MAX); + tcp_probe0_base(sk), TCP_RTO_MAX); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq) @@ -1020,15 +1155,9 @@ tp->ucopy.len = 0; tp->ucopy.memory = 0; skb_queue_head_init(&tp->ucopy.prequeue); -#ifdef CONFIG_NET_DMA - tp->ucopy.dma_chan = NULL; - tp->ucopy.wakeup = 0; - tp->ucopy.pinned_list = NULL; - tp->ucopy.dma_cookie = 0; -#endif } -extern bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); +bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); int tcp_filter(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE @@ -1040,9 +1169,9 @@ "Close Wait","Last ACK","Listen","Closing" }; #endif -extern void tcp_set_state(struct sock *sk, int state); +void tcp_set_state(struct sock *sk, int state); -extern void tcp_done(struct sock *sk); +void tcp_done(struct sock *sk); static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { @@ -1050,11 +1179,25 @@ rx_opt->num_sacks = 0; } +u32 tcp_default_init_rwnd(u32 mss); +void tcp_cwnd_restart(struct sock *sk, s32 delta); + +static inline void tcp_slow_start_after_idle_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + s32 delta; + + if (!sysctl_tcp_slow_start_after_idle || tp->packets_out) + return; + delta = tcp_time_stamp - tp->lsndtime; + if (delta > inet_csk(sk)->icsk_rto) + tcp_cwnd_restart(sk, delta); +} + /* Determine a window scaling and initial window to offer. */ -extern void tcp_select_initial_window(int __space, __u32 mss, - __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale, - __u32 init_rcv_wnd); +void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, + __u32 *window_clamp, int wscale_ok, + __u8 *rcv_wscale, __u32 init_rcv_wnd); static inline int tcp_win_from_space(int space) { @@ -1063,51 +1206,23 @@ space - (space>>sysctl_tcp_adv_win_scale); } -/* Note: caller must be prepared to deal with negative returns */ +/* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { return tcp_win_from_space(sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)); -} +} static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk->sk_rcvbuf); + return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_openreq_init(struct request_sock *req, - struct tcp_options_received *rx_opt, - struct sk_buff *skb) -{ - struct inet_request_sock *ireq = inet_rsk(req); - - req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ - req->cookie_ts = 0; - tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; - tcp_rsk(req)->snt_synack = 0; - req->mss = rx_opt->mss_clamp; - req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; - ireq->tstamp_ok = rx_opt->tstamp_ok; - ireq->sack_ok = rx_opt->sack_ok; - ireq->snd_wscale = rx_opt->snd_wscale; - ireq->wscale_ok = rx_opt->wscale_ok; - ireq->acked = 0; - ireq->ecn_ok = 0; - ireq->rmt_port = tcp_hdr(skb)->source; - ireq->loc_port = tcp_hdr(skb)->dest; -} - -/* Compute time elapsed between SYNACK and the ACK completing 3WHS */ -static inline void tcp_synack_rtt_meas(struct sock *sk, - struct request_sock *req) -{ - if (tcp_rsk(req)->snt_synack) - tcp_valid_rtt_meas(sk, - tcp_time_stamp - tcp_rsk(req)->snt_synack); -} +extern void tcp_openreq_init_rwin(struct request_sock *req, + const struct sock *sk_listener, + const struct dst_entry *dst); -extern void tcp_enter_memory_pressure(struct sock *sk); +void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { @@ -1183,6 +1298,9 @@ return true; } +bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb, + int mib_idx, u32 *last_oow_ack_time); + static inline void tcp_mib_init(struct net *net) { /* See RFC 2012 */ @@ -1196,7 +1314,6 @@ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) { tp->lost_skb_hint = NULL; - tp->scoreboard_skb_hint = NULL; } static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) @@ -1261,24 +1378,22 @@ }; /* - functions */ -extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, - const struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); -extern int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, const u8 *newkey, - u8 newkeylen, gfp_t gfp); -extern int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, - int family); -extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk); +int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, + const struct sock *sk, const struct sk_buff *skb); +int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, const u8 *newkey, u8 newkeylen, gfp_t gfp); +int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, + int family); +struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, + const struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG -extern struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, - const union tcp_md5_addr *addr, int family); +struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, + const union tcp_md5_addr *addr, + int family); #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else -static inline struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, +static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, const union tcp_md5_addr *addr, int family) { @@ -1287,25 +1402,27 @@ #define tcp_twsk_md5_key(twsk) NULL #endif -extern struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *); -extern void tcp_free_md5sig_pool(void); +bool tcp_alloc_md5sig_pool(void); -extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); -extern void tcp_put_md5sig_pool(void); +struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); +static inline void tcp_put_md5sig_pool(void) +{ + local_bh_enable(); +} -extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); -extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, - unsigned int header_len); -extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, - const struct tcp_md5sig_key *key); +int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); +int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, + unsigned int header_len); +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, + const struct tcp_md5sig_key *key); /* From tcp_fastopen.c */ -extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, - int *syn_loss, unsigned long *last_syn_loss); -extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie, - bool syn_lost); +void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie, int *syn_loss, + unsigned long *last_syn_loss); +void tcp_fastopen_cache_set(struct sock *sk, u16 mss, + struct tcp_fastopen_cookie *cookie, bool syn_lost, + u16 try_exp); struct tcp_fastopen_request { /* Fast Open cookie. Size 0 means a cookie request */ struct tcp_fastopen_cookie cookie; @@ -1317,15 +1434,18 @@ extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); -void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc); - +struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst); +void tcp_fastopen_init_key_once(bool publish); #define TCP_FASTOPEN_KEY_LENGTH 16 /* Fastopen key context */ struct tcp_fastopen_context { - struct crypto_cipher __rcu *tfm; - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; - struct rcu_head rcu; + struct crypto_cipher *tfm; + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct rcu_head rcu; }; /* write queue abstraction */ @@ -1514,9 +1634,7 @@ /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, - TCP_SEQ_STATE_OPENREQ, TCP_SEQ_STATE_ESTABLISHED, - TCP_SEQ_STATE_TIME_WAIT, }; int tcp_seq_open(struct inode *inode, struct file *file); @@ -1534,62 +1652,158 @@ enum tcp_seq_states state; struct sock *syn_wait_sk; int bucket, offset, sbucket, num; - kuid_t uid; loff_t last_pos; }; -extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); -extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); +int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); +void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); extern struct request_sock_ops tcp_request_sock_ops; extern struct request_sock_ops tcp6_request_sock_ops; -extern void tcp_v4_destroy_sock(struct sock *sk); +void tcp_v4_destroy_sock(struct sock *sk); + +struct sk_buff *tcp_gso_segment(struct sk_buff *skb, + netdev_features_t features); +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb); +int tcp_gro_complete(struct sk_buff *skb); + +void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); -extern int tcp_v4_gso_send_check(struct sk_buff *skb); -extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, - netdev_features_t features); -extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, - struct sk_buff *skb); -extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, - struct sk_buff *skb); -extern int tcp_gro_complete(struct sk_buff *skb); -extern int tcp4_gro_complete(struct sk_buff *skb); +static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) +{ + return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat; +} + +static inline bool tcp_stream_memory_free(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u32 notsent_bytes = tp->write_seq - tp->snd_nxt; + + return notsent_bytes < tcp_notsent_lowat(tp); +} #ifdef CONFIG_PROC_FS -extern int tcp4_proc_init(void); -extern void tcp4_proc_exit(void); +int tcp4_proc_init(void); +void tcp4_proc_exit(void); #endif +int tcp_rtx_synack(const struct sock *sk, struct request_sock *req); +int tcp_conn_request(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct sk_buff *skb); + /* TCP af-specific functions */ struct tcp_sock_af_ops { #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, - struct sock *addr_sk); - int (*calc_md5_hash) (char *location, - struct tcp_md5sig_key *md5, - const struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); - int (*md5_parse) (struct sock *sk, - char __user *optval, - int optlen); + struct tcp_md5sig_key *(*md5_lookup) (const struct sock *sk, + const struct sock *addr_sk); + int (*calc_md5_hash)(char *location, + const struct tcp_md5sig_key *md5, + const struct sock *sk, + const struct sk_buff *skb); + int (*md5_parse)(struct sock *sk, + char __user *optval, + int optlen); #endif }; struct tcp_request_sock_ops { + u16 mss_clamp; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *(*md5_lookup) (struct sock *sk, - struct request_sock *req); - int (*calc_md5_hash) (char *location, - struct tcp_md5sig_key *md5, - const struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); + struct tcp_md5sig_key *(*req_md5_lookup)(const struct sock *sk, + const struct sock *addr_sk); + int (*calc_md5_hash) (char *location, + const struct tcp_md5sig_key *md5, + const struct sock *sk, + const struct sk_buff *skb); #endif + void (*init_req)(struct request_sock *req, + const struct sock *sk_listener, + struct sk_buff *skb); +#ifdef CONFIG_SYN_COOKIES + __u32 (*cookie_init_seq)(const struct sk_buff *skb, + __u16 *mss); +#endif + struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, + const struct request_sock *req, + bool *strict); + __u32 (*init_seq)(const struct sk_buff *skb); + int (*send_synack)(const struct sock *sk, struct dst_entry *dst, + struct flowi *fl, struct request_sock *req, + struct tcp_fastopen_cookie *foc, + bool attach_req); }; -extern void tcp_v4_init(void); -extern void tcp_init(void); +#ifdef CONFIG_SYN_COOKIES +static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, + const struct sock *sk, struct sk_buff *skb, + __u16 *mss) +{ + tcp_synq_overflow(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); + return ops->cookie_init_seq(skb, mss); +} +#else +static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, + const struct sock *sk, struct sk_buff *skb, + __u16 *mss) +{ + return 0; +} +#endif + +int tcpv4_offload_init(void); + +void tcp_v4_init(void); +void tcp_init(void); + +/* tcp_recovery.c */ + +/* Flags to enable various loss recovery features. See below */ +extern int sysctl_tcp_recovery; + +/* Use TCP RACK to detect (some) tail and retransmit losses */ +#define TCP_RACK_LOST_RETRANS 0x1 + +extern int tcp_rack_mark_lost(struct sock *sk); + +extern void tcp_rack_advance(struct tcp_sock *tp, + const struct skb_mstamp *xmit_time, u8 sacked); + +/* + * Save and compile IPv4 options, return a pointer to it + */ +static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) +{ + const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; + struct ip_options_rcu *dopt = NULL; + + if (opt->optlen) { + int opt_size = sizeof(*dopt) + opt->optlen; + + dopt = kmalloc(opt_size, GFP_ATOMIC); + if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { + kfree(dopt); + dopt = NULL; + } + } + return dopt; +} + +/* locally generated TCP pure ACKs have skb->truesize == 2 + * (check tcp_send_ack() in net/ipv4/tcp_output.c ) + * This is much faster than dissecting the packet to find out. + * (Think of GRE encapsulations, IPv4, IPv6, ...) + */ +static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb) +{ + return skb->truesize == 2; +} + +static inline void skb_set_tcp_pure_ack(struct sk_buff *skb) +{ + skb->truesize = 2; +} #endif /* _TCP_H */