--- zzzz-none-000/linux-4.9.276/net/ipv4/tcp_output.c 2021-07-20 14:21:16.000000000 +0000 +++ falcon-5530-750/linux-4.9.276/net/ipv4/tcp_output.c 2023-04-05 08:19:02.000000000 +0000 @@ -42,6 +42,9 @@ #include #include +#if IS_ENABLED(CONFIG_PPA_TCP_LITEPATH) +extern int32_t (*ppa_sw_litepath_tcp_send_hook)(struct sk_buff *skb); +#endif /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse __read_mostly = 1; @@ -777,25 +780,27 @@ list_del(&tp->tsq_node); sk = (struct sock *)tp; - bh_lock_sock(sk); + smp_mb__before_atomic(); + clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags); - if (!sock_owned_by_user(sk)) { - tcp_tsq_handler(sk); - } else { - /* defer the work to tcp_release_cb() */ - set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags); + if (!sk->sk_lock.owned && + test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) { + bh_lock_sock(sk); + if (!sock_owned_by_user(sk)) { + clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); + tcp_tsq_handler(sk); + } + bh_unlock_sock(sk); } - bh_unlock_sock(sk); - clear_bit(TSQ_QUEUED, &tp->tsq_flags); sk_free(sk); } } -#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \ - (1UL << TCP_WRITE_TIMER_DEFERRED) | \ - (1UL << TCP_DELACK_TIMER_DEFERRED) | \ - (1UL << TCP_MTU_REDUCED_DEFERRED)) +#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ + TCPF_WRITE_TIMER_DEFERRED | \ + TCPF_DELACK_TIMER_DEFERRED | \ + TCPF_MTU_REDUCED_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -805,18 +810,17 @@ */ void tcp_release_cb(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nflags; /* perform an atomic operation only if at least one flag is set */ do { - flags = tp->tsq_flags; + flags = sk->sk_tsq_flags; if (!(flags & TCP_DEFERRED_ALL)) return; nflags = flags & ~TCP_DEFERRED_ALL; - } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags); + } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags); - if (flags & (1UL << TCP_TSQ_DEFERRED)) + if (flags & TCPF_TSQ_DEFERRED) tcp_tsq_handler(sk); /* Here begins the tricky part : @@ -830,15 +834,15 @@ */ sock_release_ownership(sk); - if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { + if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); __sock_put(sk); } - if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { + if (flags & TCPF_DELACK_TIMER_DEFERRED) { tcp_delack_timer_handler(sk); __sock_put(sk); } - if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { + if (flags & TCPF_MTU_REDUCED_DEFERRED) { inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } @@ -868,6 +872,7 @@ { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); + unsigned long flags, nval, oval; int wmem; /* Keep one reference on sk_wmem_alloc. @@ -885,16 +890,25 @@ if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; - if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && - !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { - unsigned long flags; + for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { struct tsq_tasklet *tsq; + bool empty; + + if (!(oval & TSQF_THROTTLED) || (oval & TSQF_QUEUED)) + goto out; + + nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED; + nval = cmpxchg(&sk->sk_tsq_flags, oval, nval); + if (nval != oval) + continue; /* queue this socket to tasklet queue */ local_irq_save(flags); tsq = this_cpu_ptr(&tsq_tasklet); + empty = list_empty(&tsq->head); list_add(&tp->tsq_node, &tsq->head); - tasklet_schedule(&tsq->tasklet); + if (empty) + tasklet_schedule(&tsq->tasklet); local_irq_restore(flags); return; } @@ -972,6 +986,10 @@ skb_set_hash_from_sk(skb, sk); atomic_add(skb->truesize, &sk->sk_wmem_alloc); +#ifdef CONFIG_AVM_SK_TC_INDEX + skb->tc_index = sk->sk_tc_index; +#endif + /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; th->source = inet->inet_sport; @@ -1040,9 +1058,20 @@ /* Cleanup our debris for IP stacks */ memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), sizeof(struct inet6_skb_parm))); - +#if IS_ENABLED(CONFIG_PPA_TCP_LITEPATH) + if (sk->sk_state == TCP_ESTABLISHED) { + if (ppa_sw_litepath_tcp_send_hook) { + err = ppa_sw_litepath_tcp_send_hook(skb); + if (!err) + goto xmit_done; + } + } +#endif err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); +#if IS_ENABLED(CONFIG_PPA_TCP_LITEPATH) +xmit_done: +#endif if (unlikely(err > 0)) { tcp_enter_cwr(sk); err = net_xmit_eval(err); @@ -1216,6 +1245,10 @@ buff->truesize += nlen; skb->truesize -= nlen; +#ifdef CONFIG_AVM_SK_TC_INDEX + skb->tc_index = sk->sk_tc_index; +#endif + /* Correct the sequence numbers. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len; TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1599,7 +1632,7 @@ { u32 bytes, segs; - bytes = min(sk->sk_pacing_rate >> 10, + bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, @@ -1967,26 +2000,26 @@ */ static int tcp_mtu_probe(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb, *nskb, *next; struct net *net = sock_net(sk); - int len; int probe_size; int size_needed; - int copy; + int copy, len; int mss_now; int interval; /* Not currently probing/verifying, * not in recovery, * have enough cwnd, and - * not SACKing (the variable headers throw things off) */ - if (!icsk->icsk_mtup.enabled || - icsk->icsk_mtup.probe_size || - inet_csk(sk)->icsk_ca_state != TCP_CA_Open || - tp->snd_cwnd < 11 || - tp->rx_opt.num_sacks || tp->rx_opt.dsack) + * not SACKing (the variable headers throw things off) + */ + if (likely(!icsk->icsk_mtup.enabled || + icsk->icsk_mtup.probe_size || + inet_csk(sk)->icsk_ca_state != TCP_CA_Open || + tp->snd_cwnd < 11 || + tp->rx_opt.num_sacks || tp->rx_opt.dsack)) return -1; /* Use binary search for probe_size between tcp_mss_base, @@ -2130,12 +2163,21 @@ { unsigned int limit; - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift); limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); limit <<= factor; if (atomic_read(&sk->sk_wmem_alloc) > limit) { - set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); + /* Always send the 1st or 2nd skb in write queue. + * No need to wait for TX completion to call us back, + * after softirq/tasklet schedule. + * This helps when TX completions are delayed too much. + */ + if (skb == sk->sk_write_queue.next || + skb->prev == sk->sk_write_queue.next) + return false; + + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); /* It is possible TX completion already happened * before we set TSQ_THROTTLED, so we must * test again the condition. @@ -2233,6 +2275,8 @@ unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; + if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) + clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags); if (tcp_small_queue_check(sk, skb, 0)) break; @@ -3009,8 +3053,14 @@ * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. */ +#ifdef CONFIG_LTQ_TOE_DRIVER + /* don't piggyback the FIN for large packets (TSO) + bug in HW will add FIN flag for all the segmented packets */ + if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk)) && (tskb->len < tcp_current_mss(sk))) { +#else if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { -coalesce: +#endif + coalesce: TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; @@ -3556,8 +3606,6 @@ /* We do not want pure acks influencing TCP Small Queues or fq/pacing * too much. * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784 - * We also avoid tcp_wfree() overhead (cache line miss accessing - * tp->tsq_flags) by using regular sock_wfree() */ skb_set_tcp_pure_ack(buff);