--- zzzz-none-000/linux-3.10.107/fs/nfsd/nfscache.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/nfsd/nfscache.c 2021-02-04 17:41:59.000000000 +0000 @@ -27,8 +27,12 @@ */ #define TARGET_BUCKET_SIZE 64 -static struct hlist_head * cache_hash; -static struct list_head lru_head; +struct nfsd_drc_bucket { + struct list_head lru_head; + spinlock_t cache_lock; +}; + +static struct nfsd_drc_bucket *drc_hashtbl; static struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ @@ -36,6 +40,7 @@ /* number of significant bits in the hash value */ static unsigned int maskbits; +static unsigned int drc_hashsize; /* * Stats and other tracking of on the duplicate reply cache. All of these and @@ -43,7 +48,7 @@ */ /* total number of entries */ -static unsigned int num_drc_entries; +static atomic_t num_drc_entries; /* cache misses due only to checksum comparison failures */ static unsigned int payload_misses; @@ -58,24 +63,18 @@ static unsigned int longest_chain_cachesize; static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -static void cache_cleaner_func(struct work_struct *unused); -static int nfsd_reply_cache_shrink(struct shrinker *shrink, - struct shrink_control *sc); +static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, + struct shrink_control *sc); +static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink, + struct shrink_control *sc); static struct shrinker nfsd_reply_cache_shrinker = { - .shrink = nfsd_reply_cache_shrink, + .scan_objects = nfsd_reply_cache_scan, + .count_objects = nfsd_reply_cache_count, .seeks = 1, }; /* - * locking for the reply cache: - * A cache entry is "single use" if c_state == RC_INPROG - * Otherwise, it when accessing _prev or _next, the lock must be held. - */ -static DEFINE_SPINLOCK(cache_lock); -static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); - -/* * Put a cap on the size of the DRC based on the amount of available * low memory in the machine. * @@ -113,6 +112,12 @@ return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); } +static u32 +nfsd_cache_hash(__be32 xid) +{ + return hash_32(be32_to_cpu(xid), maskbits); +} + static struct svc_cacherep * nfsd_reply_cache_alloc(void) { @@ -123,7 +128,6 @@ rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; INIT_LIST_HEAD(&rp->c_lru); - INIT_HLIST_NODE(&rp->c_hash); } return rp; } @@ -135,41 +139,48 @@ drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } - if (!hlist_unhashed(&rp->c_hash)) - hlist_del(&rp->c_hash); list_del(&rp->c_lru); - --num_drc_entries; + atomic_dec(&num_drc_entries); drc_mem_usage -= sizeof(*rp); kmem_cache_free(drc_slab, rp); } static void -nfsd_reply_cache_free(struct svc_cacherep *rp) +nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); nfsd_reply_cache_free_locked(rp); - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); } int nfsd_reply_cache_init(void) { unsigned int hashsize; + unsigned int i; + int status = 0; - INIT_LIST_HEAD(&lru_head); max_drc_entries = nfsd_cache_size_limit(); - num_drc_entries = 0; + atomic_set(&num_drc_entries, 0); hashsize = nfsd_hashsize(max_drc_entries); maskbits = ilog2(hashsize); - register_shrinker(&nfsd_reply_cache_shrinker); + status = register_shrinker(&nfsd_reply_cache_shrinker); + if (status) + return status; + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 0, 0, NULL); if (!drc_slab) goto out_nomem; - cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); - if (!cache_hash) + drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL); + if (!drc_hashtbl) goto out_nomem; + for (i = 0; i < hashsize; i++) { + INIT_LIST_HEAD(&drc_hashtbl[i].lru_head); + spin_lock_init(&drc_hashtbl[i].cache_lock); + } + drc_hashsize = hashsize; return 0; out_nomem: @@ -181,22 +192,24 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unsigned int i; unregister_shrinker(&nfsd_reply_cache_shrinker); - cancel_delayed_work_sync(&cache_cleaner); - while (!list_empty(&lru_head)) { - rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - nfsd_reply_cache_free_locked(rp); + for (i = 0; i < drc_hashsize; i++) { + struct list_head *head = &drc_hashtbl[i].lru_head; + while (!list_empty(head)) { + rp = list_first_entry(head, struct svc_cacherep, c_lru); + nfsd_reply_cache_free_locked(rp); + } } - kfree (cache_hash); - cache_hash = NULL; + kfree (drc_hashtbl); + drc_hashtbl = NULL; + drc_hashsize = 0; - if (drc_slab) { - kmem_cache_destroy(drc_slab); - drc_slab = NULL; - } + kmem_cache_destroy(drc_slab); + drc_slab = NULL; } /* @@ -204,79 +217,67 @@ * not already scheduled. */ static void -lru_put_end(struct svc_cacherep *rp) +lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { rp->c_timestamp = jiffies; - list_move_tail(&rp->c_lru, &lru_head); - schedule_delayed_work(&cache_cleaner, RC_EXPIRE); + list_move_tail(&rp->c_lru, &b->lru_head); } -/* - * Move a cache entry from one hash list to another - */ -static void -hash_refile(struct svc_cacherep *rp) -{ - hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); -} - -/* - * Walk the LRU list and prune off entries that are older than RC_EXPIRE. - * Also prune the oldest ones when the total exceeds the max number of entries. - */ -static void -prune_cache_entries(void) +static long +prune_bucket(struct nfsd_drc_bucket *b) { struct svc_cacherep *rp, *tmp; + long freed = 0; - list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) { /* * Don't free entries attached to calls that are still * in-progress, but do keep scanning the list. */ if (rp->c_state == RC_INPROG) continue; - if (num_drc_entries <= max_drc_entries && + if (atomic_read(&num_drc_entries) <= max_drc_entries && time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(rp); + freed++; } - - /* - * Conditionally rearm the job. If we cleaned out the list, then - * cancel any pending run (since there won't be any work to do). - * Otherwise, we rearm the job or modify the existing one to run in - * RC_EXPIRE since we just ran the pruner. - */ - if (list_empty(&lru_head)) - cancel_delayed_work(&cache_cleaner); - else - mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); + return freed; } -static void -cache_cleaner_func(struct work_struct *unused) +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static long +prune_cache_entries(void) { - spin_lock(&cache_lock); - prune_cache_entries(); - spin_unlock(&cache_lock); -} + unsigned int i; + long freed = 0; -static int -nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) -{ - unsigned int num; + for (i = 0; i < drc_hashsize; i++) { + struct nfsd_drc_bucket *b = &drc_hashtbl[i]; - spin_lock(&cache_lock); - if (sc->nr_to_scan) - prune_cache_entries(); - num = num_drc_entries; - spin_unlock(&cache_lock); + if (list_empty(&b->lru_head)) + continue; + spin_lock(&b->cache_lock); + freed += prune_bucket(b); + spin_unlock(&b->cache_lock); + } + return freed; +} - return num; +static unsigned long +nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) +{ + return atomic_read(&num_drc_entries); } +static unsigned long +nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + return prune_cache_entries(); +} /* * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes */ @@ -313,20 +314,24 @@ static bool nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) { - /* Check RPC header info first */ - if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || - rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers || - rqstp->rq_arg.len != rp->c_len || - !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || - rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + /* Check RPC XID first */ + if (rqstp->rq_xid != rp->c_xid) return false; - /* compare checksum of NFS data */ if (csum != rp->c_csum) { ++payload_misses; return false; } + /* Other discriminators */ + if (rqstp->rq_proc != rp->c_proc || + rqstp->rq_prot != rp->c_prot || + rqstp->rq_vers != rp->c_vers || + rqstp->rq_arg.len != rp->c_len || + !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || + rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + return false; + return true; } @@ -336,14 +341,14 @@ * NULL on failure. */ static struct svc_cacherep * -nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, + __wsum csum) { struct svc_cacherep *rp, *ret = NULL; - struct hlist_head *rh; + struct list_head *rh = &b->lru_head; unsigned int entries = 0; - rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; - hlist_for_each_entry(rp, rh, c_hash) { + list_for_each_entry(rp, rh, c_lru) { ++entries; if (nfsd_cache_match(rqstp, csum, rp)) { ret = rp; @@ -354,11 +359,12 @@ /* tally hash chain length stats */ if (entries > longest_chain) { longest_chain = entries; - longest_chain_cachesize = num_drc_entries; + longest_chain_cachesize = atomic_read(&num_drc_entries); } else if (entries == longest_chain) { /* prefer to keep the smallest cachesize possible here */ - longest_chain_cachesize = min(longest_chain_cachesize, - num_drc_entries); + longest_chain_cachesize = min_t(unsigned int, + longest_chain_cachesize, + atomic_read(&num_drc_entries)); } return ret; @@ -380,6 +386,8 @@ vers = rqstp->rq_vers, proc = rqstp->rq_proc; __wsum csum; + u32 hash = nfsd_cache_hash(xid); + struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; unsigned long age; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; @@ -397,16 +405,16 @@ * preallocate an entry. */ rp = nfsd_reply_cache_alloc(); - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); if (likely(rp)) { - ++num_drc_entries; + atomic_inc(&num_drc_entries); drc_mem_usage += sizeof(*rp); } /* go ahead and prune the cache */ - prune_cache_entries(); + prune_bucket(b); - found = nfsd_cache_search(rqstp, csum); + found = nfsd_cache_search(b, rqstp, csum); if (found) { if (likely(rp)) nfsd_reply_cache_free_locked(rp); @@ -431,8 +439,7 @@ rp->c_len = rqstp->rq_arg.len; rp->c_csum = csum; - hash_refile(rp); - lru_put_end(rp); + lru_put_end(b, rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { @@ -442,14 +449,14 @@ } rp->c_type = RC_NOCACHE; out: - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); return rtn; found_entry: nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - lru_put_end(rp); + lru_put_end(b, rp); rtn = RC_DROPIT; /* Request being processed or excessive rexmits */ @@ -459,7 +466,7 @@ /* From the hall of fame of impractical attacks: * Is this a user who tries to snoop on the cache? */ rtn = RC_DOIT; - if (!rqstp->rq_secure && rp->c_secure) + if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure) goto out; /* Compose RPC reply header */ @@ -504,18 +511,23 @@ { struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; + u32 hash; + struct nfsd_drc_bucket *b; int len; size_t bufsize = 0; if (!rp) return; + hash = nfsd_cache_hash(rp->c_xid); + b = &drc_hashtbl[hash]; + len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } @@ -530,23 +542,23 @@ bufsize = len << 2; cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); if (!cachv->iov_base) { - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } cachv->iov_len = bufsize; memcpy(cachv->iov_base, statp, bufsize); break; case RC_NOCACHE: - nfsd_reply_cache_free(rp); + nfsd_reply_cache_free(b, rp); return; } - spin_lock(&cache_lock); + spin_lock(&b->cache_lock); drc_mem_usage += bufsize; - lru_put_end(rp); - rp->c_secure = rqstp->rq_secure; + lru_put_end(b, rp); + rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); rp->c_type = cachetype; rp->c_state = RC_DONE; - spin_unlock(&cache_lock); + spin_unlock(&b->cache_lock); return; } @@ -577,9 +589,9 @@ */ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - spin_lock(&cache_lock); seq_printf(m, "max entries: %u\n", max_drc_entries); - seq_printf(m, "num entries: %u\n", num_drc_entries); + seq_printf(m, "num entries: %u\n", + atomic_read(&num_drc_entries)); seq_printf(m, "hash buckets: %u\n", 1 << maskbits); seq_printf(m, "mem usage: %u\n", drc_mem_usage); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); @@ -588,7 +600,6 @@ seq_printf(m, "payload misses: %u\n", payload_misses); seq_printf(m, "longest chain len: %u\n", longest_chain); seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); - spin_unlock(&cache_lock); return 0; }