--- zzzz-none-000/linux-3.10.107/fs/ocfs2/dlm/dlmmaster.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/ocfs2/dlm/dlmmaster.c 2021-02-04 17:41:59.000000000 +0000 @@ -82,9 +82,9 @@ return 1; } -static struct kmem_cache *dlm_lockres_cache = NULL; -static struct kmem_cache *dlm_lockname_cache = NULL; -static struct kmem_cache *dlm_mle_cache = NULL; +static struct kmem_cache *dlm_lockres_cache; +static struct kmem_cache *dlm_lockname_cache; +static struct kmem_cache *dlm_mle_cache; static void dlm_mle_release(struct kref *kref); static void dlm_init_mle(struct dlm_master_list_entry *mle, @@ -342,16 +342,13 @@ { struct dlm_master_list_entry *tmpmle; struct hlist_head *bucket; - struct hlist_node *list; unsigned int hash; assert_spin_locked(&dlm->master_lock); hash = dlm_lockid_hash(name, namelen); bucket = dlm_master_hash(dlm, hash); - hlist_for_each(list, bucket) { - tmpmle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry(tmpmle, bucket, master_hash_node) { if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) continue; dlm_get_mle(tmpmle); @@ -475,11 +472,15 @@ void dlm_destroy_master_caches(void) { - if (dlm_lockname_cache) + if (dlm_lockname_cache) { kmem_cache_destroy(dlm_lockname_cache); + dlm_lockname_cache = NULL; + } - if (dlm_lockres_cache) + if (dlm_lockres_cache) { kmem_cache_destroy(dlm_lockres_cache); + dlm_lockres_cache = NULL; + } } static void dlm_lockres_release(struct kref *kref) @@ -497,16 +498,6 @@ mlog(0, "destroying lockres %.*s\n", res->lockname.len, res->lockname.name); - spin_lock(&dlm->track_lock); - if (!list_empty(&res->tracking)) - list_del_init(&res->tracking); - else { - mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", - res->lockname.len, res->lockname.name); - dlm_print_one_lock_resource(res); - } - spin_unlock(&dlm->track_lock); - atomic_dec(&dlm->res_cur_count); if (!hlist_unhashed(&res->hash_node) || @@ -580,6 +571,7 @@ atomic_set(&res->asts_reserved, 0); res->migration_pending = 0; res->inflight_locks = 0; + res->inflight_assert_workers = 0; res->dlm = dlm; @@ -623,9 +615,6 @@ return res; error: - if (res && res->lockname.name) - kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); - if (res) kmem_cache_free(dlm_lockres_cache, res); return NULL; @@ -686,6 +675,35 @@ wake_up(&res->wq); } +void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ + assert_spin_locked(&res->spinlock); + res->inflight_assert_workers++; + mlog(0, "%s:%.*s: inflight assert worker++: now %u\n", + dlm->name, res->lockname.len, res->lockname.name, + res->inflight_assert_workers); +} + +static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ + assert_spin_locked(&res->spinlock); + BUG_ON(res->inflight_assert_workers == 0); + res->inflight_assert_workers--; + mlog(0, "%s:%.*s: inflight assert worker--: now %u\n", + dlm->name, res->lockname.len, res->lockname.name, + res->inflight_assert_workers); +} + +static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) +{ + spin_lock(&res->spinlock); + __dlm_lockres_drop_inflight_worker(dlm, res); + spin_unlock(&res->spinlock); +} + /* * lookup a lock resource by name. * may already exist in the hashtable. @@ -767,8 +785,18 @@ dlm_lockres_grab_inflight_ref(dlm, tmpres); spin_unlock(&tmpres->spinlock); - if (res) + if (res) { + spin_lock(&dlm->track_lock); + if (!list_empty(&res->tracking)) + list_del_init(&res->tracking); + else + mlog(ML_ERROR, "Resource %.*s not " + "on the Tracking list\n", + res->lockname.len, + res->lockname.name); + spin_unlock(&dlm->track_lock); dlm_lockres_put(res); + } res = tmpres; goto leave; } @@ -1411,6 +1439,7 @@ int found, ret; int set_maybe; int dispatch_assert = 0; + int dispatched = 0; if (!dlm_grab(dlm)) return DLM_MASTER_RESP_NO; @@ -1437,6 +1466,18 @@ /* take care of the easy cases up front */ spin_lock(&res->spinlock); + + /* + * Right after dlm spinlock was released, dlm_thread could have + * purged the lockres. Check if lockres got unhashed. If so + * start over. + */ + if (hlist_unhashed(&res->hash_node)) { + spin_unlock(&res->spinlock); + dlm_lockres_put(res); + goto way_up_top; + } + if (res->state & (DLM_LOCK_RES_RECOVERING| DLM_LOCK_RES_MIGRATING)) { spin_unlock(&res->spinlock); @@ -1611,19 +1652,26 @@ } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); + spin_lock(&res->spinlock); ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, DLM_ASSERT_MASTER_MLE_CLEANUP); if (ret < 0) { mlog(ML_ERROR, "failed to dispatch assert master work\n"); response = DLM_MASTER_RESP_ERROR; + spin_unlock(&res->spinlock); dlm_lockres_put(res); + } else { + dispatched = 1; + __dlm_lockres_grab_inflight_worker(dlm, res); + spin_unlock(&res->spinlock); } } else { if (res) dlm_lockres_put(res); } - dlm_put(dlm); + if (!dispatched) + dlm_put(dlm); return response; } @@ -1903,8 +1951,10 @@ * up nodes that this node contacted */ while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, nn+1)) < O2NM_MAX_NODES) { - if (nn != dlm->node_num && nn != assert->node_idx) + if (nn != dlm->node_num && nn != assert->node_idx) { master_request = 1; + break; + } } } mle->master = assert->node_idx; @@ -2010,6 +2060,10 @@ "and killing the other node now! This node is OK and can continue.\n"); __dlm_print_one_lock_resource(res); spin_unlock(&res->spinlock); + spin_lock(&dlm->master_lock); + if (mle) + __dlm_put_mle(mle); + spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); *ret_data = (void *)res; dlm_put(dlm); @@ -2041,7 +2095,6 @@ /* queue up work for dlm_assert_master_worker */ - dlm_grab(dlm); /* get an extra ref for the work item */ dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL); item->u.am.lockres = res; /* already have a ref */ /* can optionally ignore node numbers higher than this node */ @@ -2130,6 +2183,8 @@ dlm_lockres_release_ast(dlm, res); put: + dlm_lockres_drop_inflight_worker(dlm, res); + dlm_lockres_put(res); mlog(0, "finished with dlm_assert_master_worker\n"); @@ -2372,6 +2427,14 @@ assert_spin_locked(&res->spinlock); + /* delay migration when the lockres is in MIGRATING state */ + if (res->state & DLM_LOCK_RES_MIGRATING) + return 0; + + /* delay migration when the lockres is in RECOCERING state */ + if (res->state & DLM_LOCK_RES_RECOVERING) + return 0; + if (res->owner != dlm->node_num) return 0; @@ -2775,6 +2838,8 @@ res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; if (!ret) BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); + else + res->migration_pending = 0; spin_unlock(&res->spinlock); /* @@ -3091,11 +3156,15 @@ /* remove it so that only one mle will be found */ __dlm_unlink_mle(dlm, tmp); __dlm_mle_detach_hb_events(dlm, tmp); - ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; - mlog(0, "%s:%.*s: master=%u, newmaster=%u, " - "telling master to get ref for cleared out mle " - "during migration\n", dlm->name, namelen, name, - master, new_master); + if (tmp->type == DLM_MLE_MASTER) { + ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; + mlog(0, "%s:%.*s: master=%u, newmaster=%u, " + "telling master to get ref " + "for cleared out mle during " + "migration\n", dlm->name, + namelen, name, master, + new_master); + } } spin_unlock(&tmp->spinlock); } @@ -3193,7 +3262,7 @@ struct dlm_master_list_entry *mle; struct dlm_lock_resource *res; struct hlist_head *bucket; - struct hlist_node *list; + struct hlist_node *tmp; unsigned int i; mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); @@ -3204,10 +3273,7 @@ spin_lock(&dlm->master_lock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { BUG_ON(mle->type != DLM_MLE_BLOCK && mle->type != DLM_MLE_MASTER && mle->type != DLM_MLE_MIGRATION); @@ -3397,7 +3463,7 @@ int i; struct hlist_head *bucket; struct dlm_master_list_entry *mle; - struct hlist_node *tmp, *list; + struct hlist_node *tmp; /* * We notified all other nodes that we are exiting the domain and @@ -3413,9 +3479,7 @@ for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each_safe(list, tmp, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { if (mle->type != DLM_MLE_BLOCK) { mlog(ML_ERROR, "bad mle: %p\n", mle); dlm_print_one_mle(mle);