--- zzzz-none-000/linux-3.10.107/drivers/md/dm-log-userspace-base.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/md/dm-log-userspace-base.c 2021-02-04 17:41:59.000000000 +0000 @@ -6,16 +6,20 @@ #include #include +#include #include #include #include #include +#include #include "dm-log-userspace-transfer.h" -#define DM_LOG_USERSPACE_VSN "1.1.0" +#define DM_LOG_USERSPACE_VSN "1.3.0" -struct flush_entry { +#define FLUSH_ENTRY_POOL_SIZE 16 + +struct dm_dirty_log_flush_entry { int type; region_t region; struct list_head list; @@ -32,13 +36,24 @@ struct log_c { struct dm_target *ti; struct dm_dev *log_dev; + + char *usr_argv_str; + uint32_t usr_argc; + uint32_t region_size; region_t region_count; uint64_t luid; char uuid[DM_UUID_LEN]; - char *usr_argv_str; - uint32_t usr_argc; + /* + * Mark and clear requests are held until a flush is issued + * so that we can group, and thereby limit, the amount of + * network traffic between kernel and userspace. The 'flush_lock' + * is used to protect these lists. + */ + spinlock_t flush_lock; + struct list_head mark_list; + struct list_head clear_list; /* * in_sync_hint gets set when doing is_remote_recovering. It @@ -50,27 +65,21 @@ uint64_t in_sync_hint; /* - * Mark and clear requests are held until a flush is issued - * so that we can group, and thereby limit, the amount of - * network traffic between kernel and userspace. The 'flush_lock' - * is used to protect these lists. + * Workqueue for flush of clear region requests. */ - spinlock_t flush_lock; - struct list_head mark_list; - struct list_head clear_list; -}; + struct workqueue_struct *dmlog_wq; + struct delayed_work flush_log_work; + atomic_t sched_flush; -static mempool_t *flush_entry_pool; + /* + * Combine userspace flush and mark requests for efficiency. + */ + uint32_t integrated_flush; -static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) -{ - return kmalloc(sizeof(struct flush_entry), gfp_mask); -} + mempool_t *flush_entry_pool; +}; -static void flush_entry_free(void *element, void *pool_data) -{ - kfree(element); -} +static struct kmem_cache *_flush_entry_cache; static int userspace_do_request(struct log_c *lc, const char *uuid, int request_type, char *data, size_t data_size, @@ -122,6 +131,9 @@ *ctr_str = NULL; + /* + * Determine overall size of the string. + */ for (i = 0, str_size = 0; i < argc; i++) str_size += strlen(argv[i]) + 1; /* +1 for space between args */ @@ -141,18 +153,39 @@ return str_size; } +static void do_flush(struct work_struct *work) +{ + int r; + struct log_c *lc = container_of(work, struct log_c, flush_log_work.work); + + atomic_set(&lc->sched_flush, 0); + + r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, NULL, 0, NULL, NULL); + + if (r) + dm_table_event(lc->ti->table); +} + /* * userspace_ctr * * argv contains: - * - * Where 'other args' is the userspace implementation specific log - * arguments. An example might be: - * clustered-disk [[no]sync] - * - * So, this module will strip off the for identification purposes - * when communicating with userspace about a log; but will pass on everything - * else. + * [integrated_flush] + * Where 'other args' are the userspace implementation-specific log + * arguments. + * + * Example: + * [integrated_flush] clustered-disk + * [[no]sync] + * + * This module strips off the and uses it for identification + * purposes when communicating with userspace about a log. + * + * If integrated_flush is defined, the kernel combines flush + * and mark requests. + * + * The rest of the line, beginning with 'clustered-disk', is passed + * to the userspace ctr function. */ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, unsigned argc, char **argv) @@ -188,12 +221,22 @@ return -EINVAL; } + lc->usr_argc = argc; + strncpy(lc->uuid, argv[0], DM_UUID_LEN); + argc--; + argv++; spin_lock_init(&lc->flush_lock); INIT_LIST_HEAD(&lc->mark_list); INIT_LIST_HEAD(&lc->clear_list); - str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); + if (!strcasecmp(argv[0], "integrated_flush")) { + lc->integrated_flush = 1; + argc--; + argv++; + } + + str_size = build_constructor_string(ti, argc, argv, &ctr_str); if (str_size < 0) { kfree(lc); return str_size; @@ -206,6 +249,14 @@ goto out; } + lc->flush_entry_pool = mempool_create_slab_pool(FLUSH_ENTRY_POOL_SIZE, + _flush_entry_cache); + if (!lc->flush_entry_pool) { + DMERR("Failed to create flush_entry_pool"); + r = -ENOMEM; + goto out; + } + /* * Send table string and get back any opened device. */ @@ -246,14 +297,27 @@ DMERR("Failed to register %s with device-mapper", devices_rdata); } + + if (lc->integrated_flush) { + lc->dmlog_wq = alloc_workqueue("dmlogd", WQ_MEM_RECLAIM, 0); + if (!lc->dmlog_wq) { + DMERR("couldn't start dmlogd"); + r = -ENOMEM; + goto out; + } + + INIT_DELAYED_WORK(&lc->flush_log_work, do_flush); + atomic_set(&lc->sched_flush, 0); + } + out: kfree(devices_rdata); if (r) { + mempool_destroy(lc->flush_entry_pool); kfree(lc); kfree(ctr_str); } else { lc->usr_argv_str = ctr_str; - lc->usr_argc = argc; log->context = lc; } @@ -264,13 +328,22 @@ { struct log_c *lc = log->context; + if (lc->integrated_flush) { + /* flush workqueue */ + if (atomic_read(&lc->sched_flush)) + flush_delayed_work(&lc->flush_log_work); + + destroy_workqueue(lc->dmlog_wq); + } + (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, - NULL, 0, - NULL, NULL); + NULL, 0, NULL, NULL); if (lc->log_dev) dm_put_device(lc->ti, lc->log_dev); + mempool_destroy(lc->flush_entry_pool); + kfree(lc->usr_argv_str); kfree(lc); @@ -283,8 +356,7 @@ struct log_c *lc = log->context; r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, - NULL, 0, - NULL, NULL); + NULL, 0, NULL, NULL); return r; } @@ -294,9 +366,14 @@ int r; struct log_c *lc = log->context; + /* + * Run planned flush earlier. + */ + if (lc->integrated_flush && atomic_read(&lc->sched_flush)) + flush_delayed_work(&lc->flush_log_work); + r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, - NULL, 0, - NULL, NULL); + NULL, 0, NULL, NULL); return r; } @@ -308,8 +385,7 @@ lc->in_sync_hint = 0; r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, - NULL, 0, - NULL, NULL); + NULL, 0, NULL, NULL); return r; } @@ -391,7 +467,7 @@ static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list) { int r = 0; - struct flush_entry *fe; + struct dm_dirty_log_flush_entry *fe; list_for_each_entry(fe, flush_list, list) { r = userspace_do_request(lc, lc->uuid, fe->type, @@ -405,12 +481,13 @@ return r; } -static int flush_by_group(struct log_c *lc, struct list_head *flush_list) +static int flush_by_group(struct log_c *lc, struct list_head *flush_list, + int flush_with_payload) { int r = 0; int count; uint32_t type = 0; - struct flush_entry *fe, *tmp_fe; + struct dm_dirty_log_flush_entry *fe, *tmp_fe; LIST_HEAD(tmp_list); uint64_t group[MAX_FLUSH_GROUP_COUNT]; @@ -431,15 +508,29 @@ break; } - r = userspace_do_request(lc, lc->uuid, type, - (char *)(group), - count * sizeof(uint64_t), - NULL, NULL); - if (r) { - /* Group send failed. Attempt one-by-one. */ - list_splice_init(&tmp_list, flush_list); - r = flush_one_by_one(lc, flush_list); - break; + if (flush_with_payload) { + r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, + (char *)(group), + count * sizeof(uint64_t), + NULL, NULL); + /* + * Integrated flush failed. + */ + if (r) + break; + } else { + r = userspace_do_request(lc, lc->uuid, type, + (char *)(group), + count * sizeof(uint64_t), + NULL, NULL); + if (r) { + /* + * Group send failed. Attempt one-by-one. + */ + list_splice_init(&tmp_list, flush_list); + r = flush_one_by_one(lc, flush_list); + break; + } } } @@ -476,30 +567,61 @@ struct log_c *lc = log->context; LIST_HEAD(mark_list); LIST_HEAD(clear_list); - struct flush_entry *fe, *tmp_fe; + int mark_list_is_empty; + int clear_list_is_empty; + struct dm_dirty_log_flush_entry *fe, *tmp_fe; + mempool_t *flush_entry_pool = lc->flush_entry_pool; spin_lock_irqsave(&lc->flush_lock, flags); list_splice_init(&lc->mark_list, &mark_list); list_splice_init(&lc->clear_list, &clear_list); spin_unlock_irqrestore(&lc->flush_lock, flags); - if (list_empty(&mark_list) && list_empty(&clear_list)) + mark_list_is_empty = list_empty(&mark_list); + clear_list_is_empty = list_empty(&clear_list); + + if (mark_list_is_empty && clear_list_is_empty) return 0; - r = flush_by_group(lc, &mark_list); + r = flush_by_group(lc, &clear_list, 0); if (r) - goto fail; + goto out; + + if (!lc->integrated_flush) { + r = flush_by_group(lc, &mark_list, 0); + if (r) + goto out; + r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, + NULL, 0, NULL, NULL); + goto out; + } - r = flush_by_group(lc, &clear_list); + /* + * Send integrated flush request with mark_list as payload. + */ + r = flush_by_group(lc, &mark_list, 1); if (r) - goto fail; + goto out; - r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, - NULL, 0, NULL, NULL); + if (mark_list_is_empty && !atomic_read(&lc->sched_flush)) { + /* + * When there are only clear region requests, + * we schedule a flush in the future. + */ + queue_delayed_work(lc->dmlog_wq, &lc->flush_log_work, 3 * HZ); + atomic_set(&lc->sched_flush, 1); + } else { + /* + * Cancel pending flush because we + * have already flushed in mark_region. + */ + cancel_delayed_work(&lc->flush_log_work); + atomic_set(&lc->sched_flush, 0); + } -fail: +out: /* - * We can safely remove these entries, even if failure. + * We can safely remove these entries, even after failure. * Calling code will receive an error and will know that * the log facility has failed. */ @@ -528,10 +650,10 @@ { unsigned long flags; struct log_c *lc = log->context; - struct flush_entry *fe; + struct dm_dirty_log_flush_entry *fe; /* Wait for an allocation, but _never_ fail */ - fe = mempool_alloc(flush_entry_pool, GFP_NOIO); + fe = mempool_alloc(lc->flush_entry_pool, GFP_NOIO); BUG_ON(!fe); spin_lock_irqsave(&lc->flush_lock, flags); @@ -557,7 +679,7 @@ { unsigned long flags; struct log_c *lc = log->context; - struct flush_entry *fe; + struct dm_dirty_log_flush_entry *fe; /* * If we fail to allocate, we skip the clearing of @@ -565,7 +687,7 @@ * to cause the region to be resync'ed when the * device is activated next time. */ - fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); + fe = mempool_alloc(lc->flush_entry_pool, GFP_ATOMIC); if (!fe) { DMERR("Failed to allocate memory to clear region."); return; @@ -603,8 +725,7 @@ rdata_size = sizeof(pkg); r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, - NULL, 0, - (char *)&pkg, &rdata_size); + NULL, 0, (char *)&pkg, &rdata_size); *region = pkg.r; return (r) ? r : (int)pkg.i; @@ -619,7 +740,6 @@ static void userspace_set_region_sync(struct dm_dirty_log *log, region_t region, int in_sync) { - int r; struct log_c *lc = log->context; struct { region_t r; @@ -629,13 +749,12 @@ pkg.r = region; pkg.i = (int64_t)in_sync; - r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, - (char *)&pkg, sizeof(pkg), - NULL, NULL); + (void) userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, + (char *)&pkg, sizeof(pkg), NULL, NULL); /* * It would be nice to be able to report failures. - * However, it is easy emough to detect and resolve. + * However, it is easy enough to detect and resolve. */ return; } @@ -657,8 +776,7 @@ rdata_size = sizeof(sync_count); r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, - NULL, 0, - (char *)&sync_count, &rdata_size); + NULL, 0, (char *)&sync_count, &rdata_size); if (r) return 0; @@ -685,8 +803,7 @@ switch (status_type) { case STATUSTYPE_INFO: r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, - NULL, 0, - result, &sz); + NULL, 0, result, &sz); if (r) { sz = 0; @@ -699,8 +816,10 @@ BUG_ON(!table_args); /* There will always be a ' ' */ table_args++; - DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, - lc->uuid, table_args); + DMEMIT("%s %u %s ", log->type->name, lc->usr_argc, lc->uuid); + if (lc->integrated_flush) + DMEMIT("integrated_flush "); + DMEMIT("%s ", table_args); break; } return (r) ? 0 : (int)sz; @@ -717,7 +836,7 @@ int r; uint64_t region64 = region; struct log_c *lc = log->context; - static unsigned long long limit; + static unsigned long limit; struct { int64_t is_recovering; uint64_t in_sync_hint; @@ -733,7 +852,7 @@ */ if (region < lc->in_sync_hint) return 0; - else if (jiffies < limit) + else if (time_after(limit, jiffies)) return 1; limit = jiffies + (HZ / 4); @@ -773,18 +892,16 @@ { int r = 0; - flush_entry_pool = mempool_create(100, flush_entry_alloc, - flush_entry_free, NULL); - - if (!flush_entry_pool) { - DMWARN("Unable to create flush_entry_pool: No memory."); + _flush_entry_cache = KMEM_CACHE(dm_dirty_log_flush_entry, 0); + if (!_flush_entry_cache) { + DMWARN("Unable to create flush_entry_cache: No memory."); return -ENOMEM; } r = dm_ulog_tfr_init(); if (r) { DMWARN("Unable to initialize userspace log communications"); - mempool_destroy(flush_entry_pool); + kmem_cache_destroy(_flush_entry_cache); return r; } @@ -792,7 +909,7 @@ if (r) { DMWARN("Couldn't register userspace dirty log type"); dm_ulog_tfr_exit(); - mempool_destroy(flush_entry_pool); + kmem_cache_destroy(_flush_entry_cache); return r; } @@ -804,7 +921,7 @@ { dm_dirty_log_type_unregister(&_userspace_type); dm_ulog_tfr_exit(); - mempool_destroy(flush_entry_pool); + kmem_cache_destroy(_flush_entry_cache); DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded"); return;