--- zzzz-none-000/linux-3.10.107/net/rds/tcp_listen.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/rds/tcp_listen.c 2021-02-04 17:41:59.000000000 +0000 @@ -38,22 +38,49 @@ #include "rds.h" #include "tcp.h" -/* - * cheesy, but simple.. - */ -static void rds_tcp_accept_worker(struct work_struct *work); -static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker); -static struct socket *rds_tcp_listen_sock; +int rds_tcp_keepalive(struct socket *sock) +{ + /* values below based on xs_udp_default_timeout */ + int keepidle = 5; /* send a probe 'keepidle' secs after last data */ + int keepcnt = 5; /* number of unack'ed probes before declaring dead */ + int keepalive = 1; + int ret = 0; + + ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&keepalive, sizeof(keepalive)); + if (ret < 0) + goto bail; + + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, + (char *)&keepcnt, sizeof(keepcnt)); + if (ret < 0) + goto bail; + + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, + (char *)&keepidle, sizeof(keepidle)); + if (ret < 0) + goto bail; + + /* KEEPINTVL is the interval between successive probes. We follow + * the model in xs_tcp_finish_connecting() and re-use keepidle. + */ + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, + (char *)&keepidle, sizeof(keepidle)); +bail: + return ret; +} -static int rds_tcp_accept_one(struct socket *sock) +int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; struct rds_connection *conn; int ret; struct inet_sock *inet; + struct rds_tcp_connection *rs_tcp; - ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, - sock->sk->sk_protocol, &new_sock); + ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, + sock->sk->sk_type, sock->sk->sk_protocol, + &new_sock); if (ret) goto out; @@ -63,6 +90,10 @@ if (ret < 0) goto out; + ret = rds_tcp_keepalive(new_sock); + if (ret < 0) + goto out; + rds_tcp_tune(new_sock); inet = inet_sk(new_sock->sk); @@ -71,26 +102,35 @@ &inet->inet_saddr, ntohs(inet->inet_sport), &inet->inet_daddr, ntohs(inet->inet_dport)); - conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr, + conn = rds_conn_create(sock_net(sock->sk), + inet->inet_saddr, inet->inet_daddr, &rds_tcp_transport, GFP_KERNEL); if (IS_ERR(conn)) { ret = PTR_ERR(conn); goto out; } - - /* - * see the comment above rds_queue_delayed_reconnect() + /* An incoming SYN request came in, and TCP just accepted it. + * + * If the client reboots, this conn will need to be cleaned up. + * rds_tcp_state_change() will do that cleanup */ - if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { - if (rds_conn_state(conn) == RDS_CONN_UP) - rds_tcp_stats_inc(s_tcp_listen_closed_stale); - else - rds_tcp_stats_inc(s_tcp_connect_raced); - rds_conn_drop(conn); + rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; + if (rs_tcp->t_sock && + ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { + struct sock *nsk = new_sock->sk; + + nsk->sk_user_data = NULL; + nsk->sk_prot->disconnect(nsk, 0); + tcp_done(nsk); + new_sock = NULL; ret = 0; goto out; + } else if (rs_tcp->t_sock) { + rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + conn->c_outgoing = 0; } + rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); rds_tcp_set_callbacks(new_sock, conn); rds_connect_complete(conn); new_sock = NULL; @@ -102,15 +142,9 @@ return ret; } -static void rds_tcp_accept_worker(struct work_struct *work) +void rds_tcp_listen_data_ready(struct sock *sk) { - while (rds_tcp_accept_one(rds_tcp_listen_sock) == 0) - cond_resched(); -} - -void rds_tcp_listen_data_ready(struct sock *sk, int bytes) -{ - void (*ready)(struct sock *sk, int bytes); + void (*ready)(struct sock *sk); rdsdebug("listen data ready sk %p\n", sk); @@ -128,20 +162,20 @@ * socket */ if (sk->sk_state == TCP_LISTEN) - queue_work(rds_wq, &rds_tcp_listen_work); + rds_tcp_accept_work(sk); out: read_unlock(&sk->sk_callback_lock); - ready(sk, bytes); + ready(sk); } -int rds_tcp_listen_init(void) +struct socket *rds_tcp_listen_init(struct net *net) { struct sockaddr_in sin; struct socket *sock = NULL; int ret; - ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) goto out; @@ -153,7 +187,7 @@ sock->sk->sk_data_ready = rds_tcp_listen_data_ready; write_unlock_bh(&sock->sk->sk_callback_lock); - sin.sin_family = PF_INET, + sin.sin_family = PF_INET; sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin.sin_port = (__force u16)htons(RDS_TCP_PORT); @@ -165,17 +199,15 @@ if (ret < 0) goto out; - rds_tcp_listen_sock = sock; - sock = NULL; + return sock; out: if (sock) sock_release(sock); - return ret; + return NULL; } -void rds_tcp_listen_stop(void) +void rds_tcp_listen_stop(struct socket *sock) { - struct socket *sock = rds_tcp_listen_sock; struct sock *sk; if (!sock) @@ -196,5 +228,4 @@ /* wait for accepts to stop and close the socket */ flush_workqueue(rds_wq); sock_release(sock); - rds_tcp_listen_sock = NULL; }