--- zzzz-none-000/linux-4.1.38/net/ipv4/tcp_output.c 2017-01-18 18:48:06.000000000 +0000 +++ bcm63-7582-715/linux-4.1.38/net/ipv4/tcp_output.c 2020-11-25 10:06:48.000000000 +0000 @@ -36,6 +36,14 @@ #define pr_fmt(fmt) "TCP: " fmt +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif +#include +#endif #include #include @@ -65,11 +73,17 @@ unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX; EXPORT_SYMBOL(sysctl_tcp_notsent_lowat); +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); +#endif /* Account for new data that has been sent to the network. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) +#else +void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) +#endif { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -211,7 +225,11 @@ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) __u32 init_rcv_wnd) +#else + __u32 init_rcv_wnd, const struct sock *sk) +#endif { unsigned int space = (__space < 0 ? 0 : __space); @@ -266,12 +284,25 @@ * value can be stuffed directly into th->window for an outgoing * frame. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static u16 tcp_select_window(struct sock *sk) +#else +u16 tcp_select_window(struct sock *sk) +#endif { struct tcp_sock *tp = tcp_sk(sk); u32 old_win = tp->rcv_wnd; +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); +#else + /* The window must never shrink at the meta-level. At the subflow we + * have to allow this. Otherwise we may announce a window too large + * for the current meta-level sk_rcvbuf. + */ + u32 cur_win = tcp_receive_window(mptcp(tp) ? tcp_sk(mptcp_meta_sk(sk)) : tp); + u32 new_win = tp->ops->__select_window(sk); +#endif /* Never shrink the offered window */ if (new_win < cur_win) { @@ -287,6 +318,9 @@ LINUX_MIB_TCPWANTZEROWINDOWADV); new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + +#endif tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; @@ -391,7 +425,11 @@ /* Constructs common control bits of non-data skb. If SYN/FIN is present, * auto increment end seqno. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) +#else +void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) +#endif { struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -411,7 +449,11 @@ TCP_SKB_CB(skb)->end_seq = seq; } +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static inline bool tcp_urg_mode(const struct tcp_sock *tp) +#else +bool tcp_urg_mode(const struct tcp_sock *tp) +#endif { return tp->snd_una != tp->snd_up; } @@ -421,6 +463,7 @@ #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) #define OPTION_FAST_OPEN_COOKIE (1 << 8) +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) struct tcp_out_options { u16 options; /* bit field of OPTION_* */ @@ -432,6 +475,9 @@ __u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; +#else +/* Before adding here - take a look at OPTION_MPTCP in include/net/mptcp.h */ +#endif /* Write previously computed TCP options to the packet. * @@ -447,7 +493,11 @@ * (but it may well be that other scenarios fail similarly). */ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) struct tcp_out_options *opts) +#else + struct tcp_out_options *opts, struct sk_buff *skb) +#endif { u16 options = opts->options; /* mungable copy */ @@ -539,6 +589,11 @@ } ptr += (len + 3) >> 2; } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + + if (unlikely(OPTION_MPTCP & opts->options)) + mptcp_options_write(ptr, tp, opts, skb); +#endif } /* Compute TCP options for SYN packets. This is not the final @@ -590,6 +645,10 @@ if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + if (tp->request_mptcp || mptcp(tp)) + mptcp_syn_options(sk, opts, &remaining); +#endif if (fastopen && fastopen->cookie.len >= 0) { u32 need = fastopen->cookie.len; @@ -667,6 +726,11 @@ } } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + if (ireq->saw_mpc) + mptcp_synack_options(req, opts, &remaining); + +#endif return MAX_TCP_OPTION_SPACE - remaining; } @@ -699,9 +763,14 @@ opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + if (mptcp(tp)) + mptcp_established_options(sk, skb, opts, &size); +#endif eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; if (unlikely(eff_sacks)) { +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; opts->num_sack_blocks = min_t(unsigned int, eff_sacks, @@ -709,12 +778,26 @@ TCPOLEN_SACK_PERBLOCK); size += TCPOLEN_SACK_BASE_ALIGNED + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; +#else + const unsigned remaining = MAX_TCP_OPTION_SPACE - size; + if (remaining < TCPOLEN_SACK_BASE_ALIGNED) + opts->num_sack_blocks = 0; + else + opts->num_sack_blocks = + min_t(unsigned int, eff_sacks, + (remaining - TCPOLEN_SACK_BASE_ALIGNED) / + TCPOLEN_SACK_PERBLOCK); + if (opts->num_sack_blocks) + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; +#endif } return size; } +#if !defined(CONFIG_BCM_KF_TCP_NO_TSQ) /* TCP SMALL QUEUES (TSQ) * * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev) @@ -740,8 +823,13 @@ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, 0, GFP_ATOMIC); +#else + tcp_sk(sk)->ops->write_xmit(sk, tcp_current_mss(sk), + tcp_sk(sk)->nonagle, 0, GFP_ATOMIC); +#endif } /* * One tasklet per cpu tries to send more skbs. @@ -756,7 +844,11 @@ unsigned long flags; struct list_head *q, *n; struct tcp_sock *tp; +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) struct sock *sk; +#else + struct sock *sk, *meta_sk; +#endif local_irq_save(flags); list_splice_init(&tsq->head, &list); @@ -767,25 +859,77 @@ list_del(&tp->tsq_node); sk = (struct sock *)tp; +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) bh_lock_sock(sk); +#else + meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; + bh_lock_sock(meta_sk); +#endif +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) if (!sock_owned_by_user(sk)) { +#else + if (!sock_owned_by_user(meta_sk)) { +#endif tcp_tsq_handler(sk); +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + if (mptcp(tp)) + tcp_tsq_handler(meta_sk); +#endif } else { +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + if (mptcp(tp) && sk->sk_state == TCP_CLOSE) + goto exit; + +#endif /* defer the work to tcp_release_cb() */ set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + + if (mptcp(tp)) + mptcp_tsq_flags(sk); +#endif } +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) bh_unlock_sock(sk); +#else +exit: + bh_unlock_sock(meta_sk); +#endif clear_bit(TSQ_QUEUED, &tp->tsq_flags); sk_free(sk); } } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) #define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ (1UL << TCP_WRITE_TIMER_DEFERRED) | \ (1UL << TCP_DELACK_TIMER_DEFERRED) | \ + (1UL << MPTCP_PATH_MANAGER) | \ + (1UL << MPTCP_SUB_DEFERRED) | \ + (1UL << TCP_MTU_REDUCED_DEFERRED)) +#else +#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ + (1UL << TCP_WRITE_TIMER_DEFERRED) | \ + (1UL << TCP_DELACK_TIMER_DEFERRED) | \ + (1UL << TCP_MTU_REDUCED_DEFERRED)) + +#endif +#else + +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) +#define TCP_DEFERRED_ALL ((1UL << TCP_WRITE_TIMER_DEFERRED) | \ + (1UL << TCP_DELACK_TIMER_DEFERRED) | \ + (1UL << MPTCP_PATH_MANAGER) | \ + (1UL << MPTCP_SUB_DEFERRED) | \ (1UL << TCP_MTU_REDUCED_DEFERRED)) +#else +#define TCP_DEFERRED_ALL ((1UL << TCP_WRITE_TIMER_DEFERRED) | \ + (1UL << TCP_DELACK_TIMER_DEFERRED) | \ + (1UL << TCP_MTU_REDUCED_DEFERRED)) +#endif +#endif /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -806,8 +950,10 @@ nflags = flags & ~TCP_DEFERRED_ALL; } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); +#if !defined(CONFIG_BCM_KF_TCP_NO_TSQ) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); +#endif /* Here begins the tricky part : * We are called from release_sock() with : @@ -832,9 +978,19 @@ inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + if (flags & (1UL << MPTCP_PATH_MANAGER)) { + if (tcp_sk(sk)->mpcb->pm_ops->release_sock) + tcp_sk(sk)->mpcb->pm_ops->release_sock(sk); + __sock_put(sk); + } + if (flags & (1UL << MPTCP_SUB_DEFERRED)) + mptcp_tsq_sub_deferred(sk); +#endif } EXPORT_SYMBOL(tcp_release_cb); +#if !defined(CONFIG_BCM_KF_TCP_NO_TSQ) void __init tcp_tasklet_init(void) { int i; @@ -848,12 +1004,30 @@ (unsigned long)tsq); } } +#endif /* * Write buffer destructor automatically called from kfree_skb. * We can't xmit new skbs from this context, as we might already * hold qdisc lock. */ +#if defined(CONFIG_BCM_KF_TCP_NO_TSQ) +void tcp_wfree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + /* + * Keep a reference on sk_wmem_alloc, this will be released + * after sk_write_space() call + */ + atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + + if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { + sk->sk_write_space(sk); + } + sk_free(sk); +} +#else void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; @@ -891,6 +1065,7 @@ out: sk_free(sk); } +#endif /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial @@ -903,8 +1078,13 @@ * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) +#else +int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) +#endif { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -956,11 +1136,15 @@ skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); +#if defined(CONFIG_BCM_KF_TCP_NO_TSQ) + skb_set_owner_w(skb, sk); +#else skb_orphan(skb); skb->sk = sk; skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); +#endif /* Build TCP header and checksum it. */ th = tcp_hdr(skb); @@ -977,7 +1161,11 @@ */ th->window = htons(min(tp->rcv_wnd, 65535U)); } else { +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) th->window = htons(tcp_select_window(sk)); +#else + th->window = htons(tp->ops->select_window(sk)); +#endif } th->check = 0; th->urg_ptr = 0; @@ -993,7 +1181,11 @@ } } +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) tcp_options_write((__be32 *)(th + 1), tp, &opts); +#else + tcp_options_write((__be32 *)(th + 1), tp, &opts, skb); +#endif if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) tcp_ecn_send(sk, skb, tcp_header_size); @@ -1043,7 +1235,11 @@ * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, * otherwise socket can stall. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) +#else +void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) +#endif { struct tcp_sock *tp = tcp_sk(sk); @@ -1056,15 +1252,25 @@ } /* Initialize TSO segments for a packet. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) +#else +void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, + unsigned int mss_now) +#endif { struct skb_shared_info *shinfo = skb_shinfo(skb); /* Make sure we own this skb before messing gso_size/gso_segs */ WARN_ON_ONCE(skb_cloned(skb)); +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { +#else + if (skb->len <= mss_now || (is_meta_sk(sk) && !mptcp_sk_can_gso(sk)) || + (!is_meta_sk(sk) && !sk_can_gso(sk)) || skb->ip_summed == CHECKSUM_NONE) { +#endif /* Avoid the costly divide in the normal * non-TSO case. */ @@ -1096,7 +1302,11 @@ /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) +#else +void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) +#endif { struct tcp_sock *tp = tcp_sk(sk); @@ -1159,6 +1369,11 @@ if (nsize < 0) nsize = 0; + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); + return -ENOMEM; + } + if (skb_unclone(skb, gfp)) return -ENOMEM; @@ -1231,7 +1446,11 @@ * eventually). The difference is that pulled data not copied, but * immediately discarded. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void __pskb_trim_head(struct sk_buff *skb, int len) +#else +void __pskb_trim_head(struct sk_buff *skb, int len) +#endif { struct skb_shared_info *shinfo; int i, k, eat; @@ -1320,8 +1539,7 @@ mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); return mss_now; } @@ -1449,6 +1667,9 @@ return mss_now; } +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) +EXPORT_SYMBOL(tcp_current_mss); +#endif /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. * As additional protections, we do not touch cwnd in retransmission phases, @@ -1472,7 +1693,11 @@ tp->snd_cwnd_stamp = tcp_time_stamp; } +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) +#else +void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) +#endif { struct tcp_sock *tp = tcp_sk(sk); @@ -1516,8 +1741,13 @@ * But we can avoid doing the divide again given we already have * skb_pcount = skb->len / mss_now */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, const struct sk_buff *skb) +#else +void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now, + const struct sk_buff *skb) +#endif { if (skb->len < tcp_skb_pcount(skb) * mss_now) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; @@ -1559,11 +1789,19 @@ } /* Returns the portion of skb which can be sent right away */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, unsigned int mss_now, unsigned int max_segs, int nonagle) +#else +unsigned int tcp_mss_split_point(const struct sock *sk, + const struct sk_buff *skb, + unsigned int mss_now, + unsigned int max_segs, + int nonagle) +#endif { const struct tcp_sock *tp = tcp_sk(sk); u32 partial, needed, window, max_len; @@ -1593,13 +1831,23 @@ /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, const struct sk_buff *skb) +#else +unsigned int tcp_cwnd_test(const struct tcp_sock *tp, + const struct sk_buff *skb) +#endif { u32 in_flight, cwnd, halfcwnd; /* Don't be strict about the congestion window for the final FIN. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && +#else + if (skb && + (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && +#endif tcp_skb_pcount(skb) == 1) return 1; @@ -1619,8 +1867,13 @@ * This must be invoked the first time we consider transmitting * SKB onto the wire. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) +#else +int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, + unsigned int mss_now) +#endif { int tso_segs = tcp_skb_pcount(skb); @@ -1635,8 +1888,13 @@ /* Return true if the Nagle test allows this packet to be * sent now. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss, int nonagle) +#else +bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, + unsigned int cur_mss, int nonagle) +#endif { /* Nagle rule does not apply to frames, which sit in the middle of the * write_queue (they have no chances to get new data). @@ -1648,7 +1906,12 @@ return true; /* Don't use the nagle rule for urgent data (or for the final FIN). */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) +#else + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || + mptcp_is_data_fin(skb)) +#endif return true; if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle)) @@ -1658,9 +1921,14 @@ } /* Does at least the first segment of SKB fit into the send window? */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss) +#else +bool tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, + unsigned int cur_mss) +#endif { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1774,7 +2042,11 @@ struct sk_buff *head; int win_divisor; +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) +#else + if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) || mptcp_is_data_fin(skb)) +#endif goto send_now; if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR))) @@ -2029,7 +2301,11 @@ * Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, +#else +bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, +#endif int push_one, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); @@ -2042,7 +2318,15 @@ sent_pkts = 0; +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) if (!push_one) { +#else + /* pmtu not yet supported with MPTCP. Should be possible, by early + * exiting the loop inside tcp_mtu_probe, making sure that only one + * single DSS-mapping gets probed. + */ + if (!push_one && !mptcp(tp)) { +#endif /* Do MTU probing. */ result = tcp_mtu_probe(sk); if (!result) { @@ -2102,6 +2386,7 @@ unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; +#if !defined(CONFIG_BCM_KF_TCP_NO_TSQ) /* TCP Small Queues : * Control number of packets in qdisc/devices to two packets / or ~1 ms. * This allows for : @@ -2126,6 +2411,7 @@ break; } +#endif if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2149,7 +2435,12 @@ /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk); +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) tcp_cwnd_validate(sk, is_cwnd_limited); +#else + if (tp->ops->cwnd_validate) + tp->ops->cwnd_validate(sk, is_cwnd_limited); +#endif return false; } return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); @@ -2241,7 +2532,12 @@ int err = -1; if (tcp_send_head(sk)) { +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); +#else + err = tp->ops->write_xmit(sk, mss, TCP_NAGLE_OFF, 2, + GFP_ATOMIC); +#endif goto rearm_timer; } @@ -2301,8 +2597,13 @@ if (unlikely(sk->sk_state == TCP_CLOSE)) return; +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) if (tcp_write_xmit(sk, cur_mss, nonagle, 0, sk_gfp_atomic(sk, GFP_ATOMIC))) +#else + if (tcp_sk(sk)->ops->write_xmit(sk, cur_mss, nonagle, 0, + sk_gfp_atomic(sk, GFP_ATOMIC))) +#endif tcp_check_probe_timer(sk); } @@ -2315,7 +2616,12 @@ BUG_ON(!skb || skb->len < mss_now); +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation); +#else + tcp_sk(sk)->ops->write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, + sk->sk_allocation); +#endif } /* This function returns the amount that we can raise the @@ -2528,6 +2834,12 @@ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + /* Currently not supported for MPTCP - but it should be possible */ + if (mptcp(tp)) + return; + +#endif tcp_for_write_queue_from_safe(skb, tmp, sk) { if (!tcp_can_collapse(sk, skb)) break; @@ -3011,7 +3323,11 @@ /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) tcp_options_write((__be32 *)(th + 1), tp, &opts); +#else + tcp_options_write((__be32 *)(th + 1), tp, &opts, skb); +#endif th->doff = (tcp_header_size >> 2); TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS); @@ -3088,6 +3404,7 @@ (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) tp->window_clamp = tcp_full_space(sk); +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, @@ -3095,6 +3412,15 @@ sysctl_tcp_window_scaling, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); +#else + tp->ops->select_initial_window(tcp_full_space(sk), + tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), + &tp->rcv_wnd, + &tp->window_clamp, + sysctl_tcp_window_scaling, + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND), sk); +#endif tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; @@ -3118,6 +3444,38 @@ inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); +#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) + +#ifdef CONFIG_MPTCP + if (sock_flag(sk, SOCK_MPTCP) && mptcp_doit(sk)) { + if (is_master_tp(tp)) { + tp->request_mptcp = 1; + mptcp_connect_init(sk); + } else if (tp->mptcp) { + struct inet_sock *inet = inet_sk(sk); + + tp->mptcp->snt_isn = tp->write_seq; + tp->mptcp->init_rcv_wnd = tp->rcv_wnd; + + /* Set nonce for new subflows */ + if (sk->sk_family == AF_INET) + tp->mptcp->mptcp_loc_nonce = mptcp_v4_get_nonce( + inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); +#if IS_ENABLED(CONFIG_IPV6) + else + tp->mptcp->mptcp_loc_nonce = mptcp_v6_get_nonce( + inet6_sk(sk)->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport); +#endif + } + } +#endif +#endif } static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) @@ -3386,7 +3744,11 @@ * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is * out-of-date with SND.UNA-1 to probe window. */ +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) static int tcp_xmit_probe_skb(struct sock *sk, int urgent) +#else +int tcp_xmit_probe_skb(struct sock *sk, int urgent) +#endif { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -3468,7 +3830,11 @@ unsigned long probe_max; int err; +#if !defined(CONFIG_BCM_MPTCP) || !defined(CONFIG_BCM_KF_MPTCP) err = tcp_write_wakeup(sk); +#else + err = tp->ops->write_wakeup(sk); +#endif if (tp->packets_out || !tcp_send_head(sk)) { /* Cancel probe timer, if it is not required. */