--- zzzz-none-000/linux-3.10.107/net/sunrpc/svc_xprt.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/sunrpc/svc_xprt.c 2021-02-04 17:41:59.000000000 +0000 @@ -15,6 +15,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -40,7 +41,7 @@ * svc_pool->sp_lock protects most of the fields of that pool. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. - * BKL protects svc_serv->sv_nrthread. + * The "service mutex" protects svc_serv->sv_nrthread. * svc_sock->sk_lock protects the svc_sock->sk_deferred list * and the ->sk_info_authunix cache. * @@ -65,7 +66,6 @@ * that no other thread will be using the transport or will * try to set XPT_DEAD. */ - int svc_reg_xprt_class(struct svc_xprt_class *xcl) { struct svc_xprt_class *cl; @@ -218,15 +218,18 @@ */ static void svc_xprt_received(struct svc_xprt *xprt) { - WARN_ON_ONCE(!test_bit(XPT_BUSY, &xprt->xpt_flags)); - if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) + if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) { + WARN_ONCE(1, "xprt=0x%p already busy!", xprt); return; + } + /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_enqueue with: + * 'put', so we need a reference to call svc_enqueue_xprt with: */ svc_xprt_get(xprt); + smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); svc_xprt_put(xprt); } @@ -307,25 +310,6 @@ } EXPORT_SYMBOL_GPL(svc_print_addr); -/* - * Queue up an idle server thread. Must have pool->sp_lock held. - * Note: this is really a stack rather than a queue, so that we only - * use as many different threads as we need, and the rest don't pollute - * the cache. - */ -static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) -{ - list_add(&rqstp->rq_list, &pool->sp_threads); -} - -/* - * Dequeue an nfsd thread. Must have pool->sp_lock held. - */ -static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) -{ - list_del(&rqstp->rq_list); -} - static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) { if (xprt->xpt_flags & ((1<xpt_server, cpu); - put_cpu(); - - spin_lock_bh(&pool->sp_lock); - - if (!list_empty(&pool->sp_threads) && - !list_empty(&pool->sp_sockets)) - printk(KERN_ERR - "svc_xprt_enqueue: " - "threads and transports both waiting??\n"); - - pool->sp_stats.packets++; + goto out; /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY @@ -371,52 +337,108 @@ if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { /* Don't enqueue transport while already enqueued */ dprintk("svc: transport %p busy, not enqueued\n", xprt); - goto out_unlock; + goto out; } - if (!list_empty(&pool->sp_threads)) { - rqstp = list_entry(pool->sp_threads.next, - struct svc_rqst, - rq_list); - dprintk("svc: transport %p served by daemon %p\n", - xprt, rqstp); - svc_thread_dequeue(pool, rqstp); - if (rqstp->rq_xprt) - printk(KERN_ERR - "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", - rqstp, rqstp->rq_xprt); - rqstp->rq_xprt = xprt; - svc_xprt_get(xprt); - pool->sp_stats.threads_woken++; - wake_up(&rqstp->rq_wait); - } else { + cpu = get_cpu(); + pool = svc_pool_for_cpu(xprt->xpt_server, cpu); + + atomic_long_inc(&pool->sp_stats.packets); + +redo_search: + /* find a thread for this xprt */ + rcu_read_lock(); + list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + /* Do a lockless check first */ + if (test_bit(RQ_BUSY, &rqstp->rq_flags)) + continue; + + /* + * Once the xprt has been queued, it can only be dequeued by + * the task that intends to service it. All we can do at that + * point is to try to wake this thread back up so that it can + * do so. + */ + if (!queued) { + spin_lock_bh(&rqstp->rq_lock); + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) { + /* already busy, move on... */ + spin_unlock_bh(&rqstp->rq_lock); + continue; + } + + /* this one will do */ + rqstp->rq_xprt = xprt; + svc_xprt_get(xprt); + spin_unlock_bh(&rqstp->rq_lock); + } + rcu_read_unlock(); + + atomic_long_inc(&pool->sp_stats.threads_woken); + wake_up_process(rqstp->rq_task); + put_cpu(); + goto out; + } + rcu_read_unlock(); + + /* + * We didn't find an idle thread to use, so we need to queue the xprt. + * Do so and then search again. If we find one, we can't hook this one + * up to it directly but we can wake the thread up in the hopes that it + * will pick it up once it searches for a xprt to service. + */ + if (!queued) { + queued = true; dprintk("svc: transport %p put into queue\n", xprt); + spin_lock_bh(&pool->sp_lock); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); pool->sp_stats.sockets_queued++; + spin_unlock_bh(&pool->sp_lock); + goto redo_search; } + rqstp = NULL; + put_cpu(); +out: + trace_svc_xprt_do_enqueue(xprt, rqstp); +} +EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); -out_unlock: - spin_unlock_bh(&pool->sp_lock); +/* + * Queue up a transport with data pending. If there are idle nfsd + * processes, wake 'em up. + * + */ +void svc_xprt_enqueue(struct svc_xprt *xprt) +{ + if (test_bit(XPT_BUSY, &xprt->xpt_flags)) + return; + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* - * Dequeue the first transport. Must be called with the pool->sp_lock held. + * Dequeue the first transport, if there is one. */ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) { - struct svc_xprt *xprt; + struct svc_xprt *xprt = NULL; if (list_empty(&pool->sp_sockets)) - return NULL; - - xprt = list_entry(pool->sp_sockets.next, - struct svc_xprt, xpt_ready); - list_del_init(&xprt->xpt_ready); + goto out; - dprintk("svc: transport %p dequeued, inuse=%d\n", - xprt, atomic_read(&xprt->xpt_ref.refcount)); + spin_lock_bh(&pool->sp_lock); + if (likely(!list_empty(&pool->sp_sockets))) { + xprt = list_first_entry(&pool->sp_sockets, + struct svc_xprt, xpt_ready); + list_del_init(&xprt->xpt_ready); + svc_xprt_get(xprt); + dprintk("svc: transport %p dequeued, inuse=%d\n", + xprt, atomic_read(&xprt->xpt_ref.refcount)); + } + spin_unlock_bh(&pool->sp_lock); +out: + trace_svc_xprt_dequeue(xprt); return xprt; } @@ -439,6 +461,8 @@ atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; + if (xprt->xpt_ops->xpo_adjust_wspace) + xprt->xpt_ops->xpo_adjust_wspace(xprt); svc_xprt_enqueue(xprt); } } @@ -475,34 +499,36 @@ } /* - * External function to wake up a server waiting for data - * This really only makes sense for services like lockd - * which have exactly one thread anyway. + * Some svc_serv's will have occasional work to do, even when a xprt is not + * waiting to be serviced. This function is there to "kick" a task in one of + * those services so that it can wake up and do that work. Note that we only + * bother with pool 0 as we don't need to wake up more than one thread for + * this purpose. */ void svc_wake_up(struct svc_serv *serv) { struct svc_rqst *rqstp; - unsigned int i; struct svc_pool *pool; - for (i = 0; i < serv->sv_nrpools; i++) { - pool = &serv->sv_pools[i]; + pool = &serv->sv_pools[0]; - spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_threads)) { - rqstp = list_entry(pool->sp_threads.next, - struct svc_rqst, - rq_list); - dprintk("svc: daemon %p woken up.\n", rqstp); - /* - svc_thread_dequeue(pool, rqstp); - rqstp->rq_xprt = NULL; - */ - wake_up(&rqstp->rq_wait); - } else - pool->sp_task_pending = 1; - spin_unlock_bh(&pool->sp_lock); + rcu_read_lock(); + list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + /* skip any that aren't queued */ + if (test_bit(RQ_BUSY, &rqstp->rq_flags)) + continue; + rcu_read_unlock(); + dprintk("svc: daemon %p woken up.\n", rqstp); + wake_up_process(rqstp->rq_task); + trace_svc_wake_up(rqstp->rq_task->pid); + return; } + rcu_read_unlock(); + + /* No free entries available */ + set_bit(SP_TASK_PENDING, &pool->sp_flags); + smp_wmb(); + trace_svc_wake_up(0); } EXPORT_SYMBOL_GPL(svc_wake_up); @@ -571,7 +597,7 @@ } } -int svc_alloc_arg(struct svc_rqst *rqstp) +static int svc_alloc_arg(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; struct xdr_buf *arg; @@ -597,6 +623,7 @@ } rqstp->rq_pages[i] = p; } + rqstp->rq_page_end = &rqstp->rq_pages[i]; rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ /* Make arg->head point to first page and arg->pages point to rest */ @@ -612,86 +639,89 @@ return 0; } -struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) +static bool +rqst_should_sleep(struct svc_rqst *rqstp) +{ + struct svc_pool *pool = rqstp->rq_pool; + + /* did someone call svc_wake_up? */ + if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags)) + return false; + + /* was a socket queued? */ + if (!list_empty(&pool->sp_sockets)) + return false; + + /* are we shutting down? */ + if (signalled() || kthread_should_stop()) + return false; + + /* are we freezing? */ + if (freezing(current)) + return false; + + return true; +} + +static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; - DECLARE_WAITQUEUE(wait, current); - long time_left; + long time_left = 0; + + /* rq_xprt should be clear on entry */ + WARN_ON_ONCE(rqstp->rq_xprt); /* Normally we will wait up to 5 seconds for any required * cache information to be provided. */ rqstp->rq_chandle.thread_wait = 5*HZ; - spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; - svc_xprt_get(xprt); /* As there is a shortage of threads and this request * had to be queued, don't allow the thread to wait so * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; - pool->sp_task_pending = 0; - } else { - if (pool->sp_task_pending) { - pool->sp_task_pending = 0; - spin_unlock_bh(&pool->sp_lock); - return ERR_PTR(-EAGAIN); - } - /* No data pending. Go to sleep */ - svc_thread_enqueue(pool, rqstp); + clear_bit(SP_TASK_PENDING, &pool->sp_flags); + return xprt; + } - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); + /* + * We have to be able to interrupt this wait + * to bring down the daemons ... + */ + set_current_state(TASK_INTERRUPTIBLE); + clear_bit(RQ_BUSY, &rqstp->rq_flags); + smp_mb(); - /* - * checking kthread_should_stop() here allows us to avoid - * locking and signalling when stopping kthreads that call - * svc_recv. If the thread has already been woken up, then - * we can exit here without sleeping. If not, then it - * it'll be woken up quickly during the schedule_timeout - */ - if (kthread_should_stop()) { - set_current_state(TASK_RUNNING); - spin_unlock_bh(&pool->sp_lock); - return ERR_PTR(-EINTR); - } + if (likely(rqst_should_sleep(rqstp))) + time_left = schedule_timeout(timeout); + else + __set_current_state(TASK_RUNNING); - add_wait_queue(&rqstp->rq_wait, &wait); - spin_unlock_bh(&pool->sp_lock); + try_to_freeze(); - time_left = schedule_timeout(timeout); + spin_lock_bh(&rqstp->rq_lock); + set_bit(RQ_BUSY, &rqstp->rq_flags); + spin_unlock_bh(&rqstp->rq_lock); - try_to_freeze(); + xprt = rqstp->rq_xprt; + if (xprt != NULL) + return xprt; - spin_lock_bh(&pool->sp_lock); - remove_wait_queue(&rqstp->rq_wait, &wait); - if (!time_left) - pool->sp_stats.threads_timedout++; - - xprt = rqstp->rq_xprt; - if (!xprt) { - svc_thread_dequeue(pool, rqstp); - spin_unlock_bh(&pool->sp_lock); - dprintk("svc: server %p, no data yet\n", rqstp); - if (signalled() || kthread_should_stop()) - return ERR_PTR(-EINTR); - else - return ERR_PTR(-EAGAIN); - } - } - spin_unlock_bh(&pool->sp_lock); - return xprt; + if (!time_left) + atomic_long_inc(&pool->sp_stats.threads_timedout); + + if (signalled() || kthread_should_stop()) + return ERR_PTR(-EINTR); + return ERR_PTR(-EAGAIN); } -void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) +static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) { spin_lock_bh(&serv->sv_lock); set_bit(XPT_TEMP, &newxpt->xpt_flags); @@ -717,7 +747,7 @@ dprintk("svc_recv: found XPT_CLOSE\n"); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ - return 0; + goto out; } if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; @@ -732,7 +762,7 @@ svc_add_new_temp_xprt(serv, newxpt); else module_put(xprt->xpt_class->xcl_owner); - } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { + } else { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, rqstp->rq_pool->sp_id, xprt, @@ -748,6 +778,8 @@ } /* clear XPT_BUSY: */ svc_xprt_received(xprt); +out: + trace_svc_handle_xprt(xprt, len); return len; } @@ -769,42 +801,49 @@ printk(KERN_ERR "svc_recv: service %p, transport not NULL!\n", rqstp); - if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR - "svc_recv: service %p, wait queue active!\n", - rqstp); err = svc_alloc_arg(rqstp); if (err) - return err; + goto out; try_to_freeze(); cond_resched(); + err = -EINTR; if (signalled() || kthread_should_stop()) - return -EINTR; + goto out; xprt = svc_get_next_xprt(rqstp, timeout); - if (IS_ERR(xprt)) - return PTR_ERR(xprt); + if (IS_ERR(xprt)) { + err = PTR_ERR(xprt); + goto out; + } len = svc_handle_xprt(rqstp, xprt); /* No data, incomplete (TCP) read, or accept() */ + err = -EAGAIN; if (len <= 0) - goto out; + goto out_release; clear_bit(XPT_OLD, &xprt->xpt_flags); - rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); + if (xprt->xpt_ops->xpo_secure_port(rqstp)) + set_bit(RQ_SECURE, &rqstp->rq_flags); + else + clear_bit(RQ_SECURE, &rqstp->rq_flags); rqstp->rq_chandle.defer = svc_defer; + rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); if (serv->sv_stats) serv->sv_stats->netcnt++; + trace_svc_recv(rqstp, len); return len; -out: +out_release: rqstp->rq_res.len = 0; svc_xprt_release(rqstp); - return -EAGAIN; +out: + trace_svc_recv(rqstp, err); + return err; } EXPORT_SYMBOL_GPL(svc_recv); @@ -824,12 +863,12 @@ int svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; - int len; + int len = -EFAULT; struct xdr_buf *xb; xprt = rqstp->rq_xprt; if (!xprt) - return -EFAULT; + goto out; /* release the receive skb before sending the reply */ rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); @@ -852,7 +891,9 @@ svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - return 0; + len = 0; +out: + trace_svc_send(rqstp, len); return len; } @@ -887,7 +928,6 @@ continue; list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); - set_bit(XPT_DETACHED, &xprt->xpt_flags); dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ @@ -927,8 +967,7 @@ xprt->xpt_ops->xpo_detach(xprt); spin_lock_bh(&serv->sv_lock); - if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) - list_del_init(&xprt->xpt_list); + list_del_init(&xprt->xpt_list); WARN_ON_ONCE(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; @@ -1072,7 +1111,7 @@ struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_deferred_req *dr; - if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral) + if (rqstp->rq_arg.page_len || !test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags)) return NULL; /* if more than a page, give up FIXME */ if (rqstp->rq_deferred) { dr = rqstp->rq_deferred; @@ -1101,7 +1140,7 @@ } svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - rqstp->rq_dropme = true; + set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; @@ -1303,10 +1342,10 @@ seq_printf(m, "%u %lu %lu %lu %lu\n", pool->sp_id, - pool->sp_stats.packets, + (unsigned long)atomic_long_read(&pool->sp_stats.packets), pool->sp_stats.sockets_queued, - pool->sp_stats.threads_woken, - pool->sp_stats.threads_timedout); + (unsigned long)atomic_long_read(&pool->sp_stats.threads_woken), + (unsigned long)atomic_long_read(&pool->sp_stats.threads_timedout)); return 0; }