--- zzzz-none-000/linux-3.10.107/drivers/md/bitmap.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/md/bitmap.c 2021-02-04 17:41:59.000000000 +0000 @@ -72,6 +72,19 @@ /* this page has not been allocated yet */ spin_unlock_irq(&bitmap->lock); + /* It is possible that this is being called inside a + * prepare_to_wait/finish_wait loop from raid5c:make_request(). + * In general it is not permitted to sleep in that context as it + * can cause the loop to spin freely. + * That doesn't apply here as we can only reach this point + * once with any loop. + * When this function completes, either bp[page].map or + * bp[page].hijacked. In either case, this function will + * abort before getting to this point again. So there is + * no risk of a free-spin, and so it is safe to assert + * that sleeping here is allowed. + */ + sched_annotate_sleep(); mappage = kzalloc(PAGE_SIZE, GFP_NOIO); spin_lock_irq(&bitmap->lock); @@ -164,11 +177,16 @@ * nr_pending is 0 and In_sync is clear, the entries we return will * still be in the same position on the list when we re-enter * list_for_each_entry_continue_rcu. + * + * Note that if entered with 'rdev == NULL' to start at the + * beginning, we temporarily assign 'rdev' to an address which + * isn't really an rdev, but which can be used by + * list_for_each_entry_continue_rcu() to find the first entry. */ rcu_read_lock(); if (rdev == NULL) /* start at the beginning */ - rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set); + rdev = list_entry(&mddev->disks, struct md_rdev, same_set); else { /* release the previous rdev and start from there. */ rdev_dec_pending(rdev, mddev); @@ -192,6 +210,10 @@ struct block_device *bdev; struct mddev *mddev = bitmap->mddev; struct bitmap_storage *store = &bitmap->storage; + int node_offset = 0; + + if (mddev_is_clustered(bitmap->mddev)) + node_offset = bitmap->cluster_slot * store->file_pages; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; @@ -420,6 +442,7 @@ /* This might have been changed by a reshape */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); + sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> bitmap_info.space); kunmap_atomic(sb); @@ -471,7 +494,7 @@ bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; - bitmap->storage.sb_page = alloc_page(GFP_KERNEL); + bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (bitmap->storage.sb_page == NULL) return -ENOMEM; bitmap->storage.sb_page->index = 0; @@ -518,6 +541,7 @@ sb->state = cpu_to_le32(bitmap->flags); bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->mddev->events); + bitmap->mddev->bitmap_info.nodes = 0; kunmap_atomic(sb); @@ -531,9 +555,11 @@ bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; unsigned long long events; + int nodes = 0; unsigned long sectors_reserved = 0; int err = -EINVAL; struct page *sb_page; + loff_t offset = bitmap->mddev->bitmap_info.offset; if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { chunksize = 128 * 1024 * 1024; @@ -549,6 +575,22 @@ return -ENOMEM; bitmap->storage.sb_page = sb_page; +re_read: + /* If cluster_slot is set, the cluster is setup */ + if (bitmap->cluster_slot >= 0) { + sector_t bm_blocks = bitmap->mddev->resync_max_sectors; + + sector_div(bm_blocks, + bitmap->mddev->bitmap_info.chunksize >> 9); + /* bits to bytes */ + bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); + /* to 4k blocks */ + bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); + offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); + pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, + bitmap->cluster_slot, offset); + } + if (bitmap->storage.file) { loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; @@ -557,7 +599,7 @@ bitmap, bytes, sb_page); } else { err = read_sb_page(bitmap->mddev, - bitmap->mddev->bitmap_info.offset, + offset, sb_page, 0, sizeof(bitmap_super_t)); } @@ -565,19 +607,26 @@ return err; err = -EINVAL; - sb = kmap_atomic(sb_page); chunksize = le32_to_cpu(sb->chunksize); daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); + /* Setup nodes/clustername only if bitmap version is + * cluster-compatible + */ + if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) reason = "bad magic"; else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || - le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) + le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) reason = "unrecognized superblock version"; else if (chunksize < 512) reason = "bitmap chunksize too small"; @@ -608,7 +657,7 @@ goto out; } events = le64_to_cpu(sb->events); - if (events < bitmap->mddev->events) { + if (!nodes && (events < bitmap->mddev->events)) { printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " "-- forcing full recovery\n", @@ -623,20 +672,40 @@ if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); bitmap->events_cleared = le64_to_cpu(sb->events_cleared); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); err = 0; + out: kunmap_atomic(sb); + /* Assiging chunksize is required for "re_read" */ + bitmap->mddev->bitmap_info.chunksize = chunksize; + if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { + err = md_setup_cluster(bitmap->mddev, nodes); + if (err) { + pr_err("%s: Could not setup cluster service (%d)\n", + bmname(bitmap), err); + goto out_no_sb; + } + bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); + goto re_read; + } + + out_no_sb: if (test_bit(BITMAP_STALE, &bitmap->flags)) bitmap->events_cleared = bitmap->mddev->events; bitmap->mddev->bitmap_info.chunksize = chunksize; bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; bitmap->mddev->bitmap_info.max_write_behind = write_behind; + bitmap->mddev->bitmap_info.nodes = nodes; if (bitmap->mddev->bitmap_info.space == 0 || bitmap->mddev->bitmap_info.space > sectors_reserved) bitmap->mddev->bitmap_info.space = sectors_reserved; - if (err) + if (err) { bitmap_print_sb(bitmap); + if (bitmap->cluster_slot < 0) + md_cluster_stop(bitmap->mddev); + } return err; } @@ -671,23 +740,20 @@ /* * return a pointer to the page in the filemap that contains the given bit * - * this lookup is complicated by the fact that the bitmap sb might be exactly - * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page - * 0 or page 1 */ static inline struct page *filemap_get_page(struct bitmap_storage *store, unsigned long chunk) { if (file_page_index(store, chunk) >= store->file_pages) return NULL; - return store->filemap[file_page_index(store, chunk) - - file_page_index(store, 0)]; + return store->filemap[file_page_index(store, chunk)]; } static int bitmap_storage_alloc(struct bitmap_storage *store, - unsigned long chunks, int with_super) + unsigned long chunks, int with_super, + int slot_number) { - int pnum; + int pnum, offset = 0; unsigned long num_pages; unsigned long bytes; @@ -696,6 +762,7 @@ bytes += sizeof(bitmap_super_t); num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); + offset = slot_number * (num_pages - 1); store->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); @@ -706,20 +773,22 @@ store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); if (store->sb_page == NULL) return -ENOMEM; - store->sb_page->index = 0; } + pnum = 0; if (store->sb_page) { store->filemap[0] = store->sb_page; pnum = 1; + store->sb_page->index = offset; } + for ( ; pnum < num_pages; pnum++) { store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!store->filemap[pnum]) { store->file_pages = pnum; return -ENOMEM; } - store->filemap[pnum]->index = pnum; + store->filemap[pnum]->index = pnum + offset; } store->file_pages = pnum; @@ -778,7 +847,7 @@ if (bitmap->storage.file) { path = kmalloc(PAGE_SIZE, GFP_KERNEL); if (path) - ptr = d_path(&bitmap->storage.file->f_path, + ptr = file_path(bitmap->storage.file, path, PAGE_SIZE); printk(KERN_ALERT @@ -878,6 +947,28 @@ } } +static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) +{ + unsigned long bit; + struct page *page; + void *paddr; + unsigned long chunk = block >> bitmap->counts.chunkshift; + int set = 0; + + page = filemap_get_page(&bitmap->storage, chunk); + if (!page) + return -EINVAL; + bit = file_page_offset(&bitmap->storage, chunk); + paddr = kmap_atomic(page); + if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) + set = test_bit(bit, paddr); + else + set = test_bit_le(bit, paddr); + kunmap_atomic(paddr); + return set; +} + + /* this gets called when the md device is ready to unplug its underlying * (slave) device queues -- before we let any writes go down, we need to * sync the dirty pages of the bitmap file to disk */ @@ -928,7 +1019,7 @@ */ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) { - unsigned long i, chunks, index, oldindex, bit; + unsigned long i, chunks, index, oldindex, bit, node_offset = 0; struct page *page = NULL; unsigned long bit_cnt = 0; struct file *file; @@ -974,6 +1065,9 @@ if (!bitmap->mddev->bitmap_info.external) offset = sizeof(bitmap_super_t); + if (mddev_is_clustered(bitmap->mddev)) + node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); + for (i = 0; i < chunks; i++) { int b; index = file_page_index(&bitmap->storage, i); @@ -994,7 +1088,7 @@ bitmap->mddev, bitmap->mddev->bitmap_info.offset, page, - index, count); + index + node_offset, count); if (ret) goto err; @@ -1200,7 +1294,6 @@ j < bitmap->storage.file_pages && !test_bit(BITMAP_STALE, &bitmap->flags); j++) { - if (test_page_attr(bitmap, j, BITMAP_PAGE_DIRTY)) /* bitmap_unplug will handle the rest */ @@ -1477,7 +1570,7 @@ } EXPORT_SYMBOL(bitmap_close_sync); -void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) +void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) { sector_t s = 0; sector_t blocks; @@ -1488,7 +1581,7 @@ bitmap->last_end_sync = jiffies; return; } - if (time_before(jiffies, (bitmap->last_end_sync + if (!force && time_before(jiffies, (bitmap->last_end_sync + bitmap->mddev->bitmap_info.daemon_sleep))) return; wait_event(bitmap->mddev->recovery_wait, @@ -1523,11 +1616,13 @@ return; } if (!*bmc) { - *bmc = 2 | (needed ? NEEDED_MASK : 0); + *bmc = 2; bitmap_count_page(&bitmap->counts, offset, 1); bitmap_set_pending(&bitmap->counts, offset); bitmap->allclean = 0; } + if (needed) + *bmc |= NEEDED_MASK; spin_unlock_irq(&bitmap->counts.lock); } @@ -1584,6 +1679,10 @@ if (!bitmap) /* there was no bitmap */ return; + if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && + bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev)) + md_cluster_stop(bitmap->mddev); + /* Shouldn't be needed - but just in case.... */ wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes) == 0); @@ -1612,7 +1711,9 @@ return; mutex_lock(&mddev->bitmap_info.mutex); + spin_lock(&mddev->lock); mddev->bitmap = NULL; /* disconnect from the md device */ + spin_unlock(&mddev->lock); mutex_unlock(&mddev->bitmap_info.mutex); if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; @@ -1627,13 +1728,13 @@ * initialize the bitmap structure * if this returns an error, bitmap_destroy must be called to do clean up */ -int bitmap_create(struct mddev *mddev) +struct bitmap *bitmap_create(struct mddev *mddev, int slot) { struct bitmap *bitmap; sector_t blocks = mddev->resync_max_sectors; struct file *file = mddev->bitmap_info.file; int err; - struct sysfs_dirent *bm = NULL; + struct kernfs_node *bm = NULL; BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); @@ -1641,7 +1742,7 @@ bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) - return -ENOMEM; + return ERR_PTR(-ENOMEM); spin_lock_init(&bitmap->counts.lock); atomic_set(&bitmap->pending_writes, 0); @@ -1650,11 +1751,12 @@ init_waitqueue_head(&bitmap->behind_wait); bitmap->mddev = mddev; + bitmap->cluster_slot = slot; if (mddev->kobj.sd) - bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap"); + bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); if (bm) { - bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear"); + bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); sysfs_put(bm); } else bitmap->sysfs_can_clear = NULL; @@ -1697,12 +1799,14 @@ printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", bitmap->counts.pages, bmname(bitmap)); - mddev->bitmap = bitmap; - return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; + err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; + if (err) + goto error; + return bitmap; error: bitmap_free(bitmap); - return err; + return ERR_PTR(err); } int bitmap_load(struct mddev *mddev) @@ -1756,6 +1860,56 @@ } EXPORT_SYMBOL_GPL(bitmap_load); +/* Loads the bitmap associated with slot and copies the resync information + * to our bitmap + */ +int bitmap_copy_from_slot(struct mddev *mddev, int slot, + sector_t *low, sector_t *high, bool clear_bits) +{ + int rv = 0, i, j; + sector_t block, lo = 0, hi = 0; + struct bitmap_counts *counts; + struct bitmap *bitmap = bitmap_create(mddev, slot); + + if (IS_ERR(bitmap)) + return PTR_ERR(bitmap); + + rv = bitmap_init_from_disk(bitmap, 0); + if (rv) + goto err; + + counts = &bitmap->counts; + for (j = 0; j < counts->chunks; j++) { + block = (sector_t)j << counts->chunkshift; + if (bitmap_file_test_bit(bitmap, block)) { + if (!lo) + lo = block; + hi = block; + bitmap_file_clear_bit(bitmap, block); + bitmap_set_memory_bits(mddev->bitmap, block, 1); + bitmap_file_set_bit(mddev->bitmap, block); + } + } + + if (clear_bits) { + bitmap_update_sb(bitmap); + /* Setting this for the ev_page should be enough. + * And we do not require both write_all and PAGE_DIRT either + */ + for (i = 0; i < bitmap->storage.file_pages; i++) + set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); + bitmap_write_all(bitmap); + bitmap_unplug(bitmap); + } + *low = lo; + *high = hi; +err: + bitmap_free(bitmap); + return rv; +} +EXPORT_SYMBOL_GPL(bitmap_copy_from_slot); + + void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) { unsigned long chunk_kb; @@ -1777,7 +1931,7 @@ chunk_kb ? "KB" : "B"); if (bitmap->storage.file) { seq_printf(seq, ", file: "); - seq_path(seq, &bitmap->storage.file->f_path, " \t\n"); + seq_file_path(seq, bitmap->storage.file, " \t\n"); } seq_printf(seq, "\n"); @@ -1840,7 +1994,9 @@ memset(&store, 0, sizeof(store)); if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) ret = bitmap_storage_alloc(&store, chunks, - !bitmap->mddev->bitmap_info.external); + !bitmap->mddev->bitmap_info.external, + mddev_is_clustered(bitmap->mddev) + ? bitmap->cluster_slot : 0); if (ret) goto err; @@ -1986,7 +2142,6 @@ if (mddev->bitmap_info.file) { struct file *f = mddev->bitmap_info.file; mddev->bitmap_info.file = NULL; - restore_bitmap_write_access(f); fput(f); } } else { @@ -2000,9 +2155,9 @@ } else { int rv; if (buf[0] == '+') - rv = strict_strtoll(buf+1, 10, &offset); + rv = kstrtoll(buf+1, 10, &offset); else - rv = strict_strtoll(buf, 10, &offset); + rv = kstrtoll(buf, 10, &offset); if (rv) return rv; if (offset == 0) @@ -2013,13 +2168,18 @@ return -EINVAL; mddev->bitmap_info.offset = offset; if (mddev->pers) { + struct bitmap *bitmap; mddev->pers->quiesce(mddev, 1); - rv = bitmap_create(mddev); - if (!rv) + bitmap = bitmap_create(mddev, -1); + if (IS_ERR(bitmap)) + rv = PTR_ERR(bitmap); + else { + mddev->bitmap = bitmap; rv = bitmap_load(mddev); - if (rv) { - bitmap_destroy(mddev); - mddev->bitmap_info.offset = 0; + if (rv) { + bitmap_destroy(mddev); + mddev->bitmap_info.offset = 0; + } } mddev->pers->quiesce(mddev, 0); if (rv) @@ -2137,7 +2297,7 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len) { unsigned long backlog; - int rv = strict_strtoul(buf, 10, &backlog); + int rv = kstrtoul(buf, 10, &backlog); if (rv) return rv; if (backlog > COUNTER_MAX) @@ -2163,7 +2323,7 @@ unsigned long csize; if (mddev->bitmap) return -EBUSY; - rv = strict_strtoul(buf, 10, &csize); + rv = kstrtoul(buf, 10, &csize); if (rv) return rv; if (csize < 512 || @@ -2178,6 +2338,8 @@ static ssize_t metadata_show(struct mddev *mddev, char *page) { + if (mddev_is_clustered(mddev)) + return sprintf(page, "clustered\n"); return sprintf(page, "%s\n", (mddev->bitmap_info.external ? "external" : "internal")); } @@ -2190,7 +2352,8 @@ return -EBUSY; if (strncmp(buf, "external", 8) == 0) mddev->bitmap_info.external = 1; - else if (strncmp(buf, "internal", 8) == 0) + else if ((strncmp(buf, "internal", 8) == 0) || + (strncmp(buf, "clustered", 9) == 0)) mddev->bitmap_info.external = 0; else return -EINVAL; @@ -2203,11 +2366,13 @@ static ssize_t can_clear_show(struct mddev *mddev, char *page) { int len; + spin_lock(&mddev->lock); if (mddev->bitmap) len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? "false" : "true")); else len = sprintf(page, "\n"); + spin_unlock(&mddev->lock); return len; } @@ -2232,10 +2397,15 @@ static ssize_t behind_writes_used_show(struct mddev *mddev, char *page) { + ssize_t ret; + spin_lock(&mddev->lock); if (mddev->bitmap == NULL) - return sprintf(page, "0\n"); - return sprintf(page, "%lu\n", - mddev->bitmap->behind_writes_used); + ret = sprintf(page, "0\n"); + else + ret = sprintf(page, "%lu\n", + mddev->bitmap->behind_writes_used); + spin_unlock(&mddev->lock); + return ret; } static ssize_t