--- zzzz-none-000/linux-4.19.183/net/ipv4/tcp_input.c 2021-03-24 10:07:39.000000000 +0000 +++ bcm63-7530ax-756/linux-4.19.183/net/ipv4/tcp_input.c 2023-06-28 08:54:21.000000000 +0000 @@ -76,12 +76,18 @@ #include #include #include +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) +#include +#include +#include +#endif #include #include #include int sysctl_tcp_max_orphans __read_mostly = NR_FILE; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ @@ -105,6 +111,7 @@ #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) +#endif #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -343,8 +350,17 @@ per_mss = roundup_pow_of_two(per_mss) + SKB_DATA_ALIGN(sizeof(struct sk_buff)); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); nr_segs = max_t(u32, nr_segs, tp->reordering + 1); +#else + if (mptcp(tp)) { + nr_segs = mptcp_check_snd_buf(tp); + } else { + nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); + nr_segs = max_t(u32, nr_segs, tp->reordering + 1); + } +#endif /* Fast Recovery (RFC 5681 3.2) : * Cubic needs 1.7 factor, rounded to 2 to include @@ -353,8 +369,22 @@ sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; sndmem *= nr_segs * per_mss; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (sk->sk_sndbuf < sndmem) +#else + /* MPTCP: after this sndmem is the new contribution of the + * current subflow to the aggregated sndbuf */ + if (sk->sk_sndbuf < sndmem) { + int old_sndbuf = sk->sk_sndbuf; +#endif sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* MPTCP: ok, the subflow sndbuf has grown, reflect + * this in the aggregate buffer.*/ + if (mptcp(tp) && old_sndbuf != sk->sk_sndbuf) + mptcp_update_sndbuf(tp); + } +#endif } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -403,9 +433,20 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; + struct tcp_sock *meta_tp = tcp_sk(meta_sk); +#endif int room; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) room = min_t(int, tp->window_clamp, tcp_space(sk)) - tp->rcv_ssthresh; +#else + if (is_meta_sk(sk)) + return; + + room = min_t(int, meta_tp->window_clamp, tcp_space(meta_sk)) - meta_tp->rcv_ssthresh; +#endif /* Check #1 */ if (room > 0 && !tcp_under_memory_pressure(sk)) { @@ -415,13 +456,25 @@ * will fit to rcvbuf in future. */ if (tcp_win_from_space(sk, skb->truesize) <= skb->len) +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) incr = 2 * tp->advmss; +#else + incr = 2 * meta_tp->advmss; +#endif else +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) incr = __tcp_grow_window(sk, skb); +#else + incr = __tcp_grow_window(meta_sk, skb); +#endif if (incr) { incr = max_t(int, incr, 2 * skb->len); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tp->rcv_ssthresh += min(room, incr); +#else + meta_tp->rcv_ssthresh += min(room, incr); +#endif inet_csk(sk)->icsk_ack.quick |= 1; } } @@ -602,7 +655,14 @@ tcp_mstamp_refresh(tp); time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) +#else + if (mptcp(tp)) { + if (mptcp_check_rtt(tp, time)) + return; + } else if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0) +#endif return; /* Number of bytes copied to user in last RTT */ @@ -821,7 +881,11 @@ /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static void tcp_set_rto(struct sock *sk) +#else +void tcp_set_rto(struct sock *sk) +#endif { const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) @@ -1393,6 +1457,15 @@ int len; int in_sack; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* For MPTCP we cannot shift skb-data and remove one skb from the + * send-queue, because this will make us loose the DSS-option (which + * is stored in TCP_SKB_CB(skb)->dss) of the skb we are removing. + */ + if (mptcp(tp)) + goto fallback; + +#endif /* Normally R but no L won't result in plain S */ if (!dup_sack && (TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_RETRANS)) == TCPCB_SACKED_RETRANS) @@ -2945,7 +3018,11 @@ */ tcp_update_rtt_min(sk, ca_rtt_us, flag); tcp_rtt_estimator(sk, seq_rtt_us); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_set_rto(sk); +#else + tp->ops->set_rto(sk); +#endif /* RFC6298: only reset backoff on valid RTT measurement. */ inet_csk(sk)->icsk_backoff = 0; @@ -3013,7 +3090,11 @@ } /* If we get here, the whole TSO packet has not been acked. */ +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) +#else +u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) +#endif { struct tcp_sock *tp = tcp_sk(sk); u32 packets_acked; @@ -3139,6 +3220,10 @@ */ if (likely(!(scb->tcp_flags & TCPHDR_SYN))) { flag |= FLAG_DATA_ACKED; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tp) && mptcp_is_data_seq(skb)) + flag |= MPTCP_FLAG_DATA_ACKED; +#endif } else { flag |= FLAG_SYN_ACKED; tp->retrans_stamp = 0; @@ -3258,7 +3343,11 @@ return flag; } +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static void tcp_ack_probe(struct sock *sk) +#else +void tcp_ack_probe(struct sock *sk) +#endif { struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *head = tcp_send_head(sk); @@ -3330,9 +3419,14 @@ /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static inline bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, const u32 ack_seq, const u32 nwin) +#else +bool tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, + const u32 ack_seq, const u32 nwin) +#endif { return after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || @@ -3570,7 +3664,11 @@ } /* This routine deals with incoming acks, but not outgoing ones. */ +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) +#else +static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) +#endif { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3692,6 +3790,18 @@ tcp_rack_update_reo_wnd(sk, &rs); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tp)) { + if (mptcp_fallback_infinite(sk, flag)) { + pr_debug("%s resetting flow\n", __func__); + mptcp_send_reset(sk); + goto invalid_ack; + } + + mptcp_clean_rtx_infinite(skb, sk); + } + +#endif if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -3792,8 +3902,15 @@ */ void tcp_parse_options(const struct net *net, const struct sk_buff *skb, +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) +#else + struct tcp_options_received *opt_rx, + struct mptcp_options_received *mopt, + int estab, struct tcp_fastopen_cookie *foc, + struct tcp_sock *tp) +#endif { const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); @@ -3877,6 +3994,12 @@ */ break; #endif +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + case TCPOPT_MPTCP: + mptcp_parse_options(ptr - 2, opsize, mopt, skb, tp); + break; + +#endif case TCPOPT_FASTOPEN: tcp_parse_fastopen_option( opsize - TCPOLEN_FASTOPEN_BASE, @@ -3944,7 +4067,13 @@ return true; } +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL); +#else + tcp_parse_options(net, skb, &tp->rx_opt, + mptcp(tp) ? &tp->mptcp->rx_opt : NULL, 1, NULL, tp); + +#endif if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -4103,6 +4232,13 @@ { struct tcp_sock *tp = tcp_sk(sk); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (is_meta_sk(sk)) { + mptcp_fin(sk); + return; + } + +#endif inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN; @@ -4113,6 +4249,12 @@ case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + + if (mptcp(tp)) + mptcp_sub_close_passive(sk); + +#endif inet_csk(sk)->icsk_ack.pingpong = 1; break; @@ -4135,9 +4277,22 @@ tcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tp)) { + /* The socket will get closed by mptcp_data_ready. + * We first have to process all data-sequences. + */ + tp->close_it = 1; + break; + } +#endif /* Received a FIN -- send ACK and enter TIME_WAIT. */ tcp_send_ack(sk); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_time_wait(sk, TCP_TIME_WAIT, 0); +#else + tp->ops->time_wait(sk, TCP_TIME_WAIT, 0); +#endif break; default: /* Only TCP_LISTEN and TCP_CLOSE are left, in these @@ -4159,6 +4314,12 @@ if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* Don't wake up MPTCP-subflows */ + if (mptcp(tp)) + return; + +#endif /* Do not send POLL_HUP for half duplex close. */ if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) @@ -4361,6 +4522,11 @@ *fragstolen = false; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tcp_sk(sk)) && !is_meta_sk(sk)) + return false; + +#endif /* Its possible this segment overlaps with prior segment in queue */ if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; @@ -4415,7 +4581,11 @@ /* This one checks to see if we can put data from the * out_of_order queue into the receive_queue. */ +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static void tcp_ofo_queue(struct sock *sk) +#else +void tcp_ofo_queue(struct sock *sk) +#endif { struct tcp_sock *tp = tcp_sk(sk); __u32 dsack_high = tp->rcv_nxt; @@ -4438,7 +4608,18 @@ p = rb_next(p); rb_erase(&skb->rbnode, &tp->out_of_order_queue); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { +#else + /* In case of MPTCP, the segment may be empty if it's a + * non-data DATA_FIN. (see beginning of tcp_data_queue) + * + * But this only holds true for subflows, not for the + * meta-socket. + */ + if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt) && + (is_meta_sk(sk) || !mptcp(tp) || TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq))) { +#endif SOCK_DEBUG(sk, "ofo packet was already received\n"); tcp_drop(sk, skb); continue; @@ -4472,6 +4653,11 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size) { +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tcp_sk(sk))) + sk = mptcp_meta_sk(sk); + +#endif if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, size)) { @@ -4486,7 +4672,11 @@ return 0; } +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +#else +void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +#endif { struct tcp_sock *tp = tcp_sk(sk); struct rb_node **p, *parent; @@ -4559,7 +4749,12 @@ continue; } if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { +#else + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq) && + (is_meta_sk(sk) || !mptcp(tp) || end_seq != seq)) { +#endif /* All the bits are present. Drop. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); @@ -4606,6 +4801,13 @@ end_seq); break; } +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* MPTCP allows non-data data-fin to be in the ofo-queue */ + if (mptcp(tp) && !is_meta_sk(sk) && TCP_SKB_CB(skb1)->seq == TCP_SKB_CB(skb1)->end_seq) { + skb = skb1; + continue; + } +#endif rb_erase(&skb1->rbnode, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); @@ -4617,7 +4819,11 @@ tp->ooo_last_skb = skb; add_sack: +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (tcp_is_sack(tp)) +#else + if (tcp_is_sack(tp) && seq != end_seq) +#endif tcp_sack_new_ofo_skb(sk, seq, end_seq); end: if (skb) { @@ -4631,8 +4837,13 @@ } } +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, bool *fragstolen) +#else +int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen, + bool *fragstolen) +#endif { int eaten; struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue); @@ -4706,9 +4917,13 @@ const struct tcp_sock *tp = tcp_sk(sk); int avail = tp->rcv_nxt - tp->copied_seq; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && !sock_flag(sk, SOCK_DONE) && tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) +#else + if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && !sock_flag(sk, SOCK_DONE) && !mptcp(tp)) +#endif return; sk->sk_data_ready(sk); @@ -4720,10 +4935,20 @@ bool fragstolen; int eaten; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { +#else + /* If no data is present, but a data_fin is in the options, we still + * have to call mptcp_queue_skb later on. */ + if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq && + !(mptcp(tp) && mptcp_is_data_fin(skb))) { +#endif __kfree_skb(skb); return; } +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + +#endif skb_dst_drop(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); @@ -4750,7 +4975,11 @@ } eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (skb->len) +#else + if (skb->len || mptcp_is_data_fin(skb)) +#endif tcp_event_data_recv(sk, skb); if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); @@ -4772,7 +5001,15 @@ if (eaten > 0) kfree_skb_partial(skb, fragstolen); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (!sock_flag(sk, SOCK_DEAD)) +#else + if (!sock_flag(sk, SOCK_DEAD) || mptcp(tp)) + /* MPTCP: we always have to call data_ready, because + * we may be about to receive a data-fin, which still + * must get queued. + */ +#endif tcp_data_ready(sk); return; } @@ -5120,7 +5357,11 @@ return -1; } +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) static bool tcp_should_expand_sndbuf(const struct sock *sk) +#else +bool tcp_should_expand_sndbuf(const struct sock *sk) +#endif { const struct tcp_sock *tp = tcp_sk(sk); @@ -5155,7 +5396,11 @@ { struct tcp_sock *tp = tcp_sk(sk); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (tcp_should_expand_sndbuf(sk)) { +#else + if (tp->ops->should_expand_sndbuf(sk)) { +#endif tcp_sndbuf_expand(sk); tp->snd_cwnd_stamp = tcp_jiffies32; } @@ -5169,10 +5414,20 @@ sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); /* pairs with tcp_poll() */ smp_mb(); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { +#else + if (mptcp(tcp_sk(sk)) || + (sk->sk_socket && + test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))) { +#endif tcp_new_space(sk); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) +#else + if (sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) +#endif tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } } @@ -5191,6 +5446,10 @@ { struct tcp_sock *tp = tcp_sk(sk); unsigned long rtt, delay; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + struct sock *meta_sk = mptcp(tp) ? mptcp_meta_sk(sk) : sk; + struct tcp_sock *meta_tp = tcp_sk(meta_sk); +#endif /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && @@ -5199,8 +5458,13 @@ * If application uses SO_RCVLOWAT, we want send ack now if * we have not received enough bytes to satisfy the condition. */ +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || __tcp_select_window(sk) >= tp->rcv_wnd)) || +#else + (meta_tp->rcv_nxt - meta_tp->copied_seq < meta_sk->sk_rcvlowat || + tp->ops->__select_window(sk) >= tp->rcv_wnd)) || +#endif /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ @@ -5335,6 +5599,12 @@ { struct tcp_sock *tp = tcp_sk(sk); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* MPTCP urgent data is not yet supported */ + if (mptcp(tp)) + return; + +#endif /* Check if we get a new urgent pointer - normally not. */ if (th->urg) tcp_check_urg(sk, th); @@ -5477,9 +5747,19 @@ goto discard; } +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* If valid: post process the received MPTCP options. */ + if (mptcp(tp) && mptcp_handle_options(sk, th, skb)) + goto discard; + +#endif return true; discard: +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tp)) + mptcp_reset_mopt(tp); +#endif tcp_drop(sk, skb); return false; } @@ -5536,6 +5816,12 @@ tp->rx_opt.saw_tstamp = 0; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* MPTCP: force slowpath. */ + if (mptcp(tp)) + goto slow_path; + +#endif /* pred_flags is 0xS?10 << 16 + snd_wnd * if header_prediction is to be made * 'S' will always be tp->tcp_header_len >> 2 @@ -5721,17 +6007,34 @@ struct tcp_fastopen_cookie *cookie) { struct tcp_sock *tp = tcp_sk(sk); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL; +#else + struct sk_buff *data = NULL; +#endif u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; bool syn_drop = false; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (tp->syn_data) { + if (mptcp(tp)) + data = tcp_write_queue_head(mptcp_meta_sk(sk)); + else + data = tcp_rtx_queue_head(sk); + } + +#endif if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL); +#else + tcp_parse_options(sock_net(sk), synack, &opt, NULL, 0, NULL, NULL); +#endif mss = opt.mss_clamp; } @@ -5755,7 +6058,15 @@ tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (data) { /* Retransmit unacked data in SYN */ +#else + /* In mptcp case, we do not rely on "retransmit", but instead on + * "transmit", because if fastopen data is not acked, the retransmission + * becomes the first MPTCP data (see mptcp_rcv_synsent_fastopen). + */ + if (data && !mptcp(tp)) { /* Retransmit unacked data in SYN */ +#endif skb_rbtree_walk_from(data) { if (__tcp_retransmit_skb(sk, data, 1)) break; @@ -5795,9 +6106,19 @@ struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + struct mptcp_options_received mopt; +#endif bool fastopen_fail; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc); +#else + mptcp_init_mp_opt(&mopt); + + tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, + mptcp(tp) ? &tp->mptcp->rx_opt : &mopt, 0, &foc, tp); +#endif if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5857,6 +6178,37 @@ tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); tcp_ack(sk, skb, FLAG_SLOWPATH); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (tp->request_mptcp || mptcp(tp)) { + int ret; + + rcu_read_lock(); + local_bh_disable(); + ret = mptcp_rcv_synsent_state_process(sk, &sk, + skb, &mopt); + local_bh_enable(); + rcu_read_unlock(); + + /* May have changed if we support MPTCP */ + tp = tcp_sk(sk); + icsk = inet_csk(sk); + + if (ret == 1) + goto reset_and_undo; + if (ret == 2) + goto discard; + } + + if (mptcp(tp) && !is_master_tp(tp)) { + /* Timer for repeating the ACK until an answer + * arrives. Used only when establishing an additional + * subflow inside of an MPTCP connection. + */ + sk_reset_timer(sk, &tp->mptcp->mptcp_ack_timer, + jiffies + icsk->icsk_rto); + } + +#endif /* Ok.. it's good. Set up sequence numbers and * move to established. */ @@ -5883,6 +6235,13 @@ tp->tcp_header_len = sizeof(struct tcphdr); } +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tp)) { + tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN; + tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN; + } + +#endif tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); @@ -5906,9 +6265,20 @@ } if (fastopen_fail) return -1; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (sk->sk_write_pending || +#else + /* With MPTCP we cannot send data on the third ack due to the + * lack of option-space to combine with an MP_CAPABLE. + */ + if (!mptcp(tp) && (sk->sk_write_pending || +#endif icsk->icsk_accept_queue.rskq_defer_accept || +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) icsk->icsk_ack.pingpong) { +#else + icsk->icsk_ack.pingpong)) { +#endif /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * @@ -5947,6 +6317,9 @@ tcp_paws_reject(&tp->rx_opt, 0)) goto discard_and_undo; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* TODO - check this here for MPTCP */ +#endif if (th->syn) { /* We see SYN without ACK. It is attempt of * simultaneous connect with crossed SYNs. @@ -5963,6 +6336,13 @@ tp->tcp_header_len = sizeof(struct tcphdr); } +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tp)) { + tp->tcp_header_len += MPTCP_SUB_LEN_DSM_ALIGN; + tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN; + } + +#endif WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; @@ -6021,6 +6401,9 @@ */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + __releases(&sk->sk_lock.slock) +#endif { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -6063,6 +6446,18 @@ tp->rx_opt.saw_tstamp = 0; tcp_mstamp_refresh(tp); queued = tcp_rcv_synsent_state_process(sk, skb, th); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (is_meta_sk(sk)) { + sk = tcp_sk(sk)->mpcb->master_sk; + tp = tcp_sk(sk); + + /* Need to call it here, because it will announce new + * addresses, which can only be done after the third ack + * of the 3-way handshake. + */ + mptcp_update_metasocket(tp->meta_sk); + } +#endif if (queued >= 0) return queued; @@ -6145,6 +6540,10 @@ if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (mptcp(tp)) + tp->advmss -= MPTCP_SUB_LEN_DSM_ALIGN; +#endif if (!inet_csk(sk)->icsk_ca_ops->cong_control) tcp_update_pacing_rate(sk); @@ -6154,6 +6553,32 @@ tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + + /* Send an ACK when establishing a new MPTCP subflow, i.e. + * using an MP_JOIN subtype. + */ + if (mptcp(tp)) { + if (is_master_tp(tp)) { + mptcp_update_metasocket(mptcp_meta_sk(sk)); + } else { + struct sock *meta_sk = mptcp_meta_sk(sk); + + tcp_send_ack(sk); + + /* Update RTO as it might be worse/better */ + mptcp_set_rto(sk); + + /* If the new RTO would fire earlier, pull it in! */ + if (tcp_sk(meta_sk)->packets_out && + icsk->icsk_timeout > inet_csk(meta_sk)->icsk_rto + jiffies) { + tcp_rearm_rto(meta_sk); + } + + mptcp_push_pending_frames(mptcp_meta_sk(sk)); + } + } +#endif break; case TCP_FIN_WAIT1: { @@ -6201,7 +6626,12 @@ tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) } else if (th->fin || sock_owned_by_user(sk)) { +#else + } else if (th->fin || mptcp_is_data_fin(skb) || + sock_owned_by_user(sk)) { +#endif /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing * and not so rare event. We still can lose it now, @@ -6210,7 +6640,11 @@ */ inet_csk_reset_keepalive_timer(sk, tmo); } else { +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); +#else + tp->ops->time_wait(sk, TCP_FIN_WAIT2, tmo); +#endif goto discard; } break; @@ -6218,7 +6652,11 @@ case TCP_CLOSING: if (tp->snd_una == tp->write_seq) { +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_time_wait(sk, TCP_TIME_WAIT, 0); +#else + tp->ops->time_wait(sk, TCP_TIME_WAIT, 0); +#endif goto discard; } break; @@ -6230,6 +6668,11 @@ goto discard; } break; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + case TCP_CLOSE: + if (tp->mp_killed) + goto discard; +#endif } /* step 6: check the URG bit */ @@ -6251,7 +6694,12 @@ */ if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { +#else + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt) && + !mptcp(tp)) { +#endif NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; @@ -6348,6 +6796,10 @@ ireq->wscale_ok = rx_opt->wscale_ok; ireq->acked = 0; ireq->ecn_ok = 0; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + ireq->mptcp_rqsk = 0; + ireq->saw_mpc = 0; +#endif ireq->ir_rmt_port = tcp_hdr(skb)->source; ireq->ir_num = ntohs(tcp_hdr(skb)->dest); ireq->ir_mark = inet_request_mark(sk, skb); @@ -6446,11 +6898,24 @@ * limitations, they conserve resources and peer is * evidently real one. */ +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + /* MPTCP: new subflows cannot be established in a stateless manner.*/ +#endif + +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if ((net->ipv4.sysctl_tcp_syncookies == 2 || +#else + if (((!is_meta_sk(sk) && net->ipv4.sysctl_tcp_syncookies == 2) || +#endif inet_csk_reqsk_queue_is_full(sk)) && !isn) { want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); if (!want_cookie) goto drop; +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + + if (is_meta_sk(sk)) + goto drop; +#endif } if (sk_acceptq_is_full(sk)) { @@ -6468,8 +6933,13 @@ tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = af_ops->mss_clamp; tmp_opt.user_mss = tp->rx_opt.user_mss; +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, want_cookie ? NULL : &foc); +#else + tcp_parse_options(sock_net(sk), skb, &tmp_opt, NULL, 0, + want_cookie ? NULL : &foc, NULL); +#endif if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -6484,7 +6954,12 @@ /* Note: tcp_v6_init_req() might override ir_iif for link locals */ inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) af_ops->init_req(req, sk, skb); +#else + if (af_ops->init_req(req, sk, skb, want_cookie)) + goto drop_and_free; +#endif if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; @@ -6520,7 +6995,11 @@ tcp_ecn_create_request(req, skb, sk, dst); if (want_cookie) { +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); +#else + isn = cookie_init_sequence(af_ops, req, sk, skb, &req->mss); +#endif req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; @@ -6535,18 +7014,36 @@ fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); } if (fastopen_sk) { +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + struct sock *meta_sk = fastopen_sk; + + if (mptcp(tcp_sk(fastopen_sk))) + meta_sk = mptcp_meta_sk(fastopen_sk); +#endif af_ops->send_synack(fastopen_sk, dst, &fl, req, &foc, TCP_SYNACK_FASTOPEN); /* Add the child socket directly into the accept queue */ +#if !defined(CONFIG_BCM_KF_MPTCP) || !defined(CONFIG_BCM_MPTCP) if (!inet_csk_reqsk_queue_add(sk, req, fastopen_sk)) { +#else + if (!inet_csk_reqsk_queue_add(sk, req, meta_sk)) { +#endif reqsk_fastopen_remove(fastopen_sk, req, false); bh_unlock_sock(fastopen_sk); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (meta_sk != fastopen_sk) + bh_unlock_sock(meta_sk); +#endif sock_put(fastopen_sk); reqsk_put(req); goto drop; } sk->sk_data_ready(sk); bh_unlock_sock(fastopen_sk); +#if defined(CONFIG_BCM_KF_MPTCP) && defined(CONFIG_BCM_MPTCP) + if (meta_sk != fastopen_sk) + bh_unlock_sock(meta_sk); +#endif sock_put(fastopen_sk); } else { tcp_rsk(req)->tfo_listener = false;