--- zzzz-none-000/linux-3.10.107/drivers/md/dm-thin-metadata.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/md/dm-thin-metadata.c 2021-02-04 17:41:59.000000000 +0000 @@ -76,7 +76,7 @@ #define THIN_SUPERBLOCK_MAGIC 27022010 #define THIN_SUPERBLOCK_LOCATION 0 -#define THIN_VERSION 1 +#define THIN_VERSION 2 #define THIN_METADATA_CACHE_SIZE 64 #define SECTOR_TO_BLOCK_SHIFT 3 @@ -184,7 +184,6 @@ uint64_t trans_id; unsigned long flags; sector_t data_block_size; - bool read_only:1; /* * Set if a transaction has to be aborted but the attempt to roll back @@ -192,6 +191,13 @@ * operation possible in this state is the closing of the device. */ bool fail_io:1; + + /* + * Reading the space map roots can fail, so we read it into these + * buffers before the superblock is locked and updated. + */ + __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE]; + __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; }; struct dm_thin_device { @@ -390,7 +396,9 @@ } } - return dm_bm_unlock(b); + dm_bm_unlock(b); + + return 0; } static void __setup_btree_details(struct dm_pool_metadata *pmd) @@ -431,26 +439,53 @@ pmd->details_info.value_type.equal = NULL; } +static int save_sm_roots(struct dm_pool_metadata *pmd) +{ + int r; + size_t len; + + r = dm_sm_root_size(pmd->metadata_sm, &len); + if (r < 0) + return r; + + r = dm_sm_copy_root(pmd->metadata_sm, &pmd->metadata_space_map_root, len); + if (r < 0) + return r; + + r = dm_sm_root_size(pmd->data_sm, &len); + if (r < 0) + return r; + + return dm_sm_copy_root(pmd->data_sm, &pmd->data_space_map_root, len); +} + +static void copy_sm_roots(struct dm_pool_metadata *pmd, + struct thin_disk_superblock *disk) +{ + memcpy(&disk->metadata_space_map_root, + &pmd->metadata_space_map_root, + sizeof(pmd->metadata_space_map_root)); + + memcpy(&disk->data_space_map_root, + &pmd->data_space_map_root, + sizeof(pmd->data_space_map_root)); +} + static int __write_initial_superblock(struct dm_pool_metadata *pmd) { int r; struct dm_block *sblock; - size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; if (bdev_size > THIN_METADATA_MAX_SECTORS) bdev_size = THIN_METADATA_MAX_SECTORS; - r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); - if (r < 0) - return r; - - r = dm_sm_root_size(pmd->data_sm, &data_len); + r = dm_sm_commit(pmd->data_sm); if (r < 0) return r; - r = dm_sm_commit(pmd->data_sm); + r = save_sm_roots(pmd); if (r < 0) return r; @@ -471,27 +506,15 @@ disk_super->trans_id = 0; disk_super->held_root = 0; - r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, - metadata_len); - if (r < 0) - goto bad_locked; - - r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, - data_len); - if (r < 0) - goto bad_locked; + copy_sm_roots(pmd, disk_super); disk_super->data_mapping_root = cpu_to_le64(pmd->root); disk_super->device_details_root = cpu_to_le64(pmd->details_root); - disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE); disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); return dm_tm_commit(pmd->tm, sblock); - -bad_locked: - dm_bm_unlock(sblock); - return r; } static int __format_metadata(struct dm_pool_metadata *pmd) @@ -629,7 +652,9 @@ } __setup_btree_details(pmd); - return dm_bm_unlock(sblock); + dm_bm_unlock(sblock); + + return 0; bad_cleanup_data_sm: dm_sm_destroy(pmd->data_sm); @@ -660,7 +685,7 @@ { int r; - pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, + pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, THIN_METADATA_CACHE_SIZE, THIN_MAX_CONCURRENT_LOCKS); if (IS_ERR(pmd->bm)) { @@ -778,6 +803,10 @@ if (r < 0) return r; + r = save_sm_roots(pmd); + if (r < 0) + return r; + r = superblock_lock(pmd, &sblock); if (r) return r; @@ -789,21 +818,9 @@ disk_super->trans_id = cpu_to_le64(pmd->trans_id); disk_super->flags = cpu_to_le32(pmd->flags); - r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, - metadata_len); - if (r < 0) - goto out_locked; - - r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, - data_len); - if (r < 0) - goto out_locked; + copy_sm_roots(pmd, disk_super); return dm_tm_commit(pmd->tm, sblock); - -out_locked: - dm_bm_unlock(sblock); - return r; } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, @@ -822,7 +839,6 @@ init_rwsem(&pmd->root_lock); pmd->time = 0; INIT_LIST_HEAD(&pmd->thin_devices); - pmd->read_only = false; pmd->fail_io = false; pmd->bdev = bdev; pmd->data_block_size = data_block_size; @@ -866,7 +882,7 @@ return -EBUSY; } - if (!pmd->read_only && !pmd->fail_io) { + if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) { r = __commit_transaction(pmd); if (r < 0) DMWARN("%s: __commit_transaction() failed, error = %d", @@ -1291,7 +1307,9 @@ dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root)); dm_sm_dec_block(pmd->metadata_sm, held_root); - return dm_tm_unlock(pmd->tm, copy); + dm_tm_unlock(pmd->tm, copy); + + return 0; } int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) @@ -1321,7 +1339,9 @@ disk_super = dm_block_data(sblock); *result = le64_to_cpu(disk_super->held_root); - return dm_bm_unlock(sblock); + dm_bm_unlock(sblock); + + return 0; } int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, @@ -1376,45 +1396,99 @@ } int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, - int can_block, struct dm_thin_lookup_result *result) + int can_issue_io, struct dm_thin_lookup_result *result) { - int r = -EINVAL; - uint64_t block_time = 0; + int r; __le64 value; struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; struct dm_btree_info *info; - if (can_block) { - down_read(&pmd->root_lock); + down_read(&pmd->root_lock); + if (pmd->fail_io) { + up_read(&pmd->root_lock); + return -EINVAL; + } + + if (can_issue_io) { info = &pmd->info; - } else if (down_read_trylock(&pmd->root_lock)) + } else info = &pmd->nb_info; - else - return -EWOULDBLOCK; - - if (pmd->fail_io) - goto out; r = dm_btree_lookup(info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - -out: - up_read(&pmd->root_lock); - if (!r) { + uint64_t block_time = 0; dm_block_t exception_block; uint32_t exception_time; + + block_time = le64_to_cpu(value); unpack_block_time(block_time, &exception_block, &exception_time); result->block = exception_block; result->shared = __snapshotted_since(td, exception_time); } + up_read(&pmd->root_lock); return r; } +/* FIXME: write a more efficient one in btree */ +int dm_thin_find_mapped_range(struct dm_thin_device *td, + dm_block_t begin, dm_block_t end, + dm_block_t *thin_begin, dm_block_t *thin_end, + dm_block_t *pool_begin, bool *maybe_shared) +{ + int r; + dm_block_t pool_end; + struct dm_thin_lookup_result lookup; + + if (end < begin) + return -ENODATA; + + /* + * Find first mapped block. + */ + while (begin < end) { + r = dm_thin_find_block(td, begin, true, &lookup); + if (r) { + if (r != -ENODATA) + return r; + } else + break; + + begin++; + } + + if (begin == end) + return -ENODATA; + + *thin_begin = begin; + *pool_begin = lookup.block; + *maybe_shared = lookup.shared; + + begin++; + pool_end = *pool_begin + 1; + while (begin != end) { + r = dm_thin_find_block(td, begin, true, &lookup); + if (r) { + if (r == -ENODATA) + break; + else + return r; + } + + if ((lookup.block != pool_end) || + (lookup.shared != *maybe_shared)) + break; + + pool_end++; + begin++; + } + + *thin_end = begin; + return 0; +} + static int __insert(struct dm_thin_device *td, dm_block_t block, dm_block_t data_block) { @@ -1467,6 +1541,65 @@ return 0; } +static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end) +{ + int r; + unsigned count, total_count = 0; + struct dm_pool_metadata *pmd = td->pmd; + dm_block_t keys[1] = { td->id }; + __le64 value; + dm_block_t mapping_root; + + /* + * Find the mapping tree + */ + r = dm_btree_lookup(&pmd->tl_info, pmd->root, keys, &value); + if (r) + return r; + + /* + * Remove from the mapping tree, taking care to inc the + * ref count so it doesn't get deleted. + */ + mapping_root = le64_to_cpu(value); + dm_tm_inc(pmd->tm, mapping_root); + r = dm_btree_remove(&pmd->tl_info, pmd->root, keys, &pmd->root); + if (r) + return r; + + /* + * Remove leaves stops at the first unmapped entry, so we have to + * loop round finding mapped ranges. + */ + while (begin < end) { + r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value); + if (r == -ENODATA) + break; + + if (r) + return r; + + if (begin >= end) + break; + + r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count); + if (r) + return r; + + total_count += count; + } + + td->mapped_blocks -= total_count; + td->changed = 1; + + /* + * Reinsert the mapping tree. + */ + value = cpu_to_le64(mapping_root); + __dm_bless_for_disk(&value); + return dm_btree_insert(&pmd->tl_info, pmd->root, keys, &value, &pmd->root); +} + int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) { int r = -EINVAL; @@ -1479,6 +1612,19 @@ return r; } +int dm_thin_remove_range(struct dm_thin_device *td, + dm_block_t begin, dm_block_t end) +{ + int r = -EINVAL; + + down_write(&td->pmd->root_lock); + if (!td->pmd->fail_io) + r = __remove_range(td, begin, end); + up_write(&td->pmd->root_lock); + + return r; +} + int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) { int r; @@ -1631,15 +1777,6 @@ return r; } -int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) -{ - down_read(&pmd->root_lock); - *result = pmd->data_block_size; - up_read(&pmd->root_lock); - - return 0; -} - int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { int r = -EINVAL; @@ -1744,11 +1881,17 @@ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) { down_write(&pmd->root_lock); - pmd->read_only = true; dm_bm_set_read_only(pmd->bm); up_write(&pmd->root_lock); } +void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd) +{ + down_write(&pmd->root_lock); + dm_bm_set_read_write(pmd->bm); + up_write(&pmd->root_lock); +} + int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_block_t threshold, dm_sm_threshold_fn fn, @@ -1762,3 +1905,46 @@ return r; } + +int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + down_write(&pmd->root_lock); + pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG; + + r = superblock_lock(pmd, &sblock); + if (r) { + DMERR("couldn't read superblock"); + goto out; + } + + disk_super = dm_block_data(sblock); + disk_super->flags = cpu_to_le32(pmd->flags); + + dm_bm_unlock(sblock); +out: + up_write(&pmd->root_lock); + return r; +} + +bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd) +{ + bool needs_check; + + down_read(&pmd->root_lock); + needs_check = pmd->flags & THIN_METADATA_NEEDS_CHECK_FLAG; + up_read(&pmd->root_lock); + + return needs_check; +} + +void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd) +{ + down_read(&pmd->root_lock); + if (!pmd->fail_io) + dm_tm_issue_prefetches(pmd->tm); + up_read(&pmd->root_lock); +}