--- zzzz-none-000/linux-3.10.107/fs/btrfs/backref.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/btrfs/backref.c 2021-02-04 17:41:59.000000000 +0000 @@ -25,6 +25,9 @@ #include "delayed-ref.h" #include "locking.h" +/* Just an arbitrary number so we can be sure this happened */ +#define BACKREF_FOUND_SHARED 6 + struct extent_inode_elem { u64 inum; u64 offset; @@ -36,16 +39,23 @@ u64 extent_item_pos, struct extent_inode_elem **eie) { - u64 data_offset; - u64 data_len; + u64 offset = 0; struct extent_inode_elem *e; - data_offset = btrfs_file_extent_offset(eb, fi); - data_len = btrfs_file_extent_num_bytes(eb, fi); + if (!btrfs_file_extent_compression(eb, fi) && + !btrfs_file_extent_encryption(eb, fi) && + !btrfs_file_extent_other_encoding(eb, fi)) { + u64 data_offset; + u64 data_len; - if (extent_item_pos < data_offset || - extent_item_pos >= data_offset + data_len) - return 1; + data_offset = btrfs_file_extent_offset(eb, fi); + data_len = btrfs_file_extent_num_bytes(eb, fi); + + if (extent_item_pos < data_offset || + extent_item_pos >= data_offset + data_len) + return 1; + offset = extent_item_pos - data_offset; + } e = kmalloc(sizeof(*e), GFP_NOFS); if (!e) @@ -53,12 +63,22 @@ e->next = *eie; e->inum = key->objectid; - e->offset = key->offset + (extent_item_pos - data_offset); + e->offset = key->offset + offset; *eie = e; return 0; } +static void free_inode_elem_list(struct extent_inode_elem *eie) +{ + struct extent_inode_elem *eie_next; + + for (; eie; eie = eie_next) { + eie_next = eie->next; + kfree(eie); + } +} + static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, u64 extent_item_pos, struct extent_inode_elem **eie) @@ -112,6 +132,26 @@ u64 wanted_disk_byte; }; +static struct kmem_cache *btrfs_prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ + btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", + sizeof(struct __prelim_ref), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_prelim_ref_cache) + return -ENOMEM; + return 0; +} + +void btrfs_prelim_ref_exit(void) +{ + if (btrfs_prelim_ref_cache) + kmem_cache_destroy(btrfs_prelim_ref_cache); +} + /* * the rules for all callers of this function are: * - obtaining the parent is the goal @@ -153,20 +193,46 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, struct btrfs_key *key, int level, - u64 parent, u64 wanted_disk_byte, int count) + u64 parent, u64 wanted_disk_byte, int count, + gfp_t gfp_mask) { struct __prelim_ref *ref; - /* in case we're adding delayed refs, we're holding the refs spinlock */ - ref = kmalloc(sizeof(*ref), GFP_ATOMIC); + if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) + return 0; + + ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; ref->root_id = root_id; - if (key) + if (key) { ref->key_for_search = *key; - else + /* + * We can often find data backrefs with an offset that is too + * large (>= LLONG_MAX, maximum allowed file offset) due to + * underflows when subtracting a file's offset with the data + * offset of its corresponding extent data item. This can + * happen for example in the clone ioctl. + * So if we detect such case we set the search key's offset to + * zero to make sure we will find the matching file extent item + * at add_all_parents(), otherwise we will miss it because the + * offset taken form the backref is much larger then the offset + * of the file extent item. This can make us scan a very large + * number of file extent items, but at least it will not make + * us miss any. + * This is an ugly workaround for a behaviour that should have + * never existed, but it does and a fix for the clone ioctl + * would touch a lot of places, cause backwards incompatibility + * and would not fix the problem for extents cloned with older + * kernels. + */ + if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY && + ref->key_for_search.offset >= LLONG_MAX) + ref->key_for_search.offset = 0; + } else { memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); + } ref->inode_list = NULL; ref->level = level; @@ -179,18 +245,20 @@ } static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, - struct ulist *parents, int level, - struct btrfs_key *key_for_search, u64 time_seq, - u64 wanted_disk_byte, - const u64 *extent_item_pos) + struct ulist *parents, struct __prelim_ref *ref, + int level, u64 time_seq, const u64 *extent_item_pos, + u64 total_refs) { int ret = 0; int slot; struct extent_buffer *eb; struct btrfs_key key; + struct btrfs_key *key_for_search = &ref->key_for_search; struct btrfs_file_extent_item *fi; - struct extent_inode_elem *eie = NULL; + struct extent_inode_elem *eie = NULL, *old = NULL; u64 disk_byte; + u64 wanted_disk_byte = ref->wanted_disk_byte; + u64 count = 0; if (level != 0) { eb = path->nodes[level]; @@ -205,10 +273,14 @@ * the first item to check. But sometimes, we may enter it with * slot==nritems. In that case, go to the next leaf before we continue. */ - if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) - ret = btrfs_next_old_leaf(root, path, time_seq); + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + if (time_seq == (u64)-1) + ret = btrfs_next_leaf(root, path); + else + ret = btrfs_next_old_leaf(root, path, time_seq); + } - while (!ret) { + while (!ret && count < total_refs) { eb = path->nodes[0]; slot = path->slots[0]; @@ -223,6 +295,8 @@ if (disk_byte == wanted_disk_byte) { eie = NULL; + old = NULL; + count++; if (extent_item_pos) { ret = check_extent_in_eb(&key, eb, fi, *extent_item_pos, @@ -230,23 +304,30 @@ if (ret < 0) break; } - if (!ret) { - ret = ulist_add(parents, eb->start, - (uintptr_t)eie, GFP_NOFS); - if (ret < 0) - break; - if (!extent_item_pos) { - ret = btrfs_next_old_leaf(root, path, - time_seq); - continue; - } + if (ret > 0) + goto next; + ret = ulist_add_merge_ptr(parents, eb->start, + eie, (void **)&old, GFP_NOFS); + if (ret < 0) + break; + if (!ret && extent_item_pos) { + while (old->next) + old = old->next; + old->next = eie; } + eie = NULL; } - ret = btrfs_next_old_item(root, path, time_seq); +next: + if (time_seq == (u64)-1) + ret = btrfs_next_item(root, path); + else + ret = btrfs_next_old_item(root, path, time_seq); } if (ret > 0) ret = 0; + else if (ret < 0) + free_inode_elem_list(eie); return ret; } @@ -255,54 +336,72 @@ * to a logical address */ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, - int search_commit_root, - u64 time_seq, - struct __prelim_ref *ref, - struct ulist *parents, - const u64 *extent_item_pos) + struct btrfs_path *path, u64 time_seq, + struct __prelim_ref *ref, + struct ulist *parents, + const u64 *extent_item_pos, u64 total_refs) { - struct btrfs_path *path; struct btrfs_root *root; struct btrfs_key root_key; struct extent_buffer *eb; int ret = 0; int root_level; int level = ref->level; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - path->search_commit_root = !!search_commit_root; + int index; root_key.objectid = ref->root_id; root_key.type = BTRFS_ROOT_ITEM_KEY; root_key.offset = (u64)-1; - root = btrfs_read_fs_root_no_name(fs_info, &root_key); + + index = srcu_read_lock(&fs_info->subvol_srcu); + + root = btrfs_get_fs_root(fs_info, &root_key, false); if (IS_ERR(root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); goto out; } - root_level = btrfs_old_root_level(root, time_seq); + if (btrfs_test_is_dummy_root(root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); + ret = -ENOENT; + goto out; + } + + if (path->search_commit_root) + root_level = btrfs_header_level(root->commit_root); + else if (time_seq == (u64)-1) + root_level = btrfs_header_level(root->node); + else + root_level = btrfs_old_root_level(root, time_seq); - if (root_level + 1 == level) + if (root_level + 1 == level) { + srcu_read_unlock(&fs_info->subvol_srcu, index); goto out; + } path->lowest_level = level; - ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); + if (time_seq == (u64)-1) + ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path, + 0, 0); + else + ret = btrfs_search_old_slot(root, &ref->key_for_search, path, + time_seq); + + /* root node has been locked, we can release @subvol_srcu safely here */ + srcu_read_unlock(&fs_info->subvol_srcu, index); + pr_debug("search slot in root %llu (level %d, ref count %d) returned " "%d for key (%llu %u %llu)\n", - (unsigned long long)ref->root_id, level, ref->count, ret, - (unsigned long long)ref->key_for_search.objectid, - ref->key_for_search.type, - (unsigned long long)ref->key_for_search.offset); + ref->root_id, level, ref->count, ret, + ref->key_for_search.objectid, ref->key_for_search.type, + ref->key_for_search.offset); if (ret < 0) goto out; eb = path->nodes[level]; while (!eb) { - if (!level) { - WARN_ON(1); + if (WARN_ON(!level)) { ret = 1; goto out; } @@ -310,11 +409,11 @@ eb = path->nodes[level]; } - ret = add_all_parents(root, path, parents, level, &ref->key_for_search, - time_seq, ref->wanted_disk_byte, - extent_item_pos); + ret = add_all_parents(root, path, parents, ref, level, time_seq, + extent_item_pos, total_refs); out: - btrfs_free_path(path); + path->lowest_level = 0; + btrfs_release_path(path); return ret; } @@ -322,9 +421,10 @@ * resolve all indirect backrefs from the list */ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, - int search_commit_root, u64 time_seq, + struct btrfs_path *path, u64 time_seq, struct list_head *head, - const u64 *extent_item_pos) + const u64 *extent_item_pos, u64 total_refs, + u64 root_objectid) { int err; int ret = 0; @@ -349,24 +449,35 @@ continue; if (ref->count == 0) continue; - err = __resolve_indirect_ref(fs_info, search_commit_root, - time_seq, ref, parents, - extent_item_pos); - if (err == -ENOMEM) + if (root_objectid && ref->root_id != root_objectid) { + ret = BACKREF_FOUND_SHARED; goto out; - if (err) + } + err = __resolve_indirect_ref(fs_info, path, time_seq, ref, + parents, extent_item_pos, + total_refs); + /* + * we can only tolerate ENOENT,otherwise,we should catch error + * and return directly. + */ + if (err == -ENOENT) { continue; + } else if (err) { + ret = err; + goto out; + } /* we put the first parent into the ref at hand */ ULIST_ITER_INIT(&uiter); node = ulist_next(parents, &uiter); ref->parent = node ? node->val : 0; ref->inode_list = node ? - (struct extent_inode_elem *)(uintptr_t)node->aux : 0; + (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; /* additional parents require new refs being added here */ while ((node = ulist_next(parents, &uiter))) { - new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); + new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, + GFP_NOFS); if (!new_ref) { ret = -ENOMEM; goto out; @@ -422,8 +533,10 @@ continue; BUG_ON(!ref->wanted_disk_byte); eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, - fs_info->tree_root->leafsize, 0); - if (!eb || !extent_buffer_uptodate(eb)) { + 0); + if (IS_ERR(eb)) { + return PTR_ERR(eb); + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); return -EIO; } @@ -439,7 +552,7 @@ } /* - * merge two lists of backrefs and adjust counts accordingly + * merge backrefs and adjust counts accordingly * * mode = 1: merge identical keys, if key is set * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. @@ -467,9 +580,9 @@ ref2 = list_entry(pos2, struct __prelim_ref, list); + if (!ref_for_same_block(ref1, ref2)) + continue; if (mode == 1) { - if (!ref_for_same_block(ref1, ref2)) - continue; if (!ref1->parent && ref2->parent) { xchg = ref1; ref1 = ref2; @@ -490,7 +603,7 @@ ref1->count += ref2->count; list_del(&ref2->list); - kfree(ref2); + kmem_cache_free(btrfs_prelim_ref_cache, ref2); } } @@ -501,10 +614,11 @@ * smaller or equal that seq to the list */ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, - struct list_head *prefs) + struct list_head *prefs, u64 *total_refs, + u64 inum) { + struct btrfs_delayed_ref_node *node; struct btrfs_delayed_extent_op *extent_op = head->extent_op; - struct rb_node *n = &head->node.rb_node; struct btrfs_key key; struct btrfs_key op_key = {0}; int sgn; @@ -513,14 +627,8 @@ if (extent_op && extent_op->update_key) btrfs_disk_key_to_cpu(&op_key, &extent_op->key); - while ((n = rb_prev(n))) { - struct btrfs_delayed_ref_node *node; - node = rb_entry(n, struct btrfs_delayed_ref_node, - rb_node); - if (node->bytenr != head->node.bytenr) - break; - WARN_ON(node->is_head); - + spin_lock(&head->lock); + list_for_each_entry(node, &head->ref_list, list) { if (node->seq > seq) continue; @@ -538,6 +646,7 @@ default: BUG_ON(1); } + *total_refs += (node->ref_mod * sgn); switch (node->type) { case BTRFS_TREE_BLOCK_REF_KEY: { struct btrfs_delayed_tree_ref *ref; @@ -545,17 +654,17 @@ ref = btrfs_delayed_node_to_tree_ref(node); ret = __add_prelim_ref(prefs, ref->root, &op_key, ref->level + 1, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_BLOCK_REF_KEY: { struct btrfs_delayed_tree_ref *ref; ref = btrfs_delayed_node_to_tree_ref(node); - ret = __add_prelim_ref(prefs, ref->root, NULL, + ret = __add_prelim_ref(prefs, 0, NULL, ref->level + 1, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_EXTENT_DATA_REF_KEY: { @@ -565,32 +674,38 @@ key.objectid = ref->objectid; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = ref->offset; + + /* + * Found a inum that doesn't match our known inum, we + * know it's shared. + */ + if (inum && ref->objectid != inum) { + ret = BACKREF_FOUND_SHARED; + break; + } + ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_delayed_data_ref *ref; ref = btrfs_delayed_node_to_data_ref(node); - - key.objectid = ref->objectid; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = ref->offset; - ret = __add_prelim_ref(prefs, ref->root, &key, 0, + ret = __add_prelim_ref(prefs, 0, NULL, 0, ref->parent, node->bytenr, - node->ref_mod * sgn); + node->ref_mod * sgn, GFP_ATOMIC); break; } default: WARN_ON(1); } if (ret) - return ret; + break; } - - return 0; + spin_unlock(&head->lock); + return ret; } /* @@ -598,12 +713,14 @@ */ static int __add_inline_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, - int *info_level, struct list_head *prefs) + int *info_level, struct list_head *prefs, + u64 *total_refs, u64 inum) { int ret = 0; int slot; struct extent_buffer *leaf; struct btrfs_key key; + struct btrfs_key found_key; unsigned long ptr; unsigned long end; struct btrfs_extent_item *ei; @@ -621,17 +738,22 @@ ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); flags = btrfs_extent_flags(leaf, ei); + *total_refs += btrfs_extent_refs(leaf, ei); + btrfs_item_key_to_cpu(leaf, &found_key, slot); ptr = (unsigned long)(ei + 1); end = (unsigned long)ei + item_size; - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + if (found_key.type == BTRFS_EXTENT_ITEM_KEY && + flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { struct btrfs_tree_block_info *info; info = (struct btrfs_tree_block_info *)ptr; *info_level = btrfs_tree_block_level(leaf, info); ptr += sizeof(struct btrfs_tree_block_info); BUG_ON(ptr > end); + } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { + *info_level = found_key.offset; } else { BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); } @@ -649,7 +771,7 @@ case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, *info_level + 1, offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -658,13 +780,13 @@ sdref = (struct btrfs_shared_data_ref *)(iref + 1); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, offset, NULL, *info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -677,9 +799,15 @@ dref); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); + + if (inum && key.objectid != inum) { + ret = BACKREF_FOUND_SHARED; + break; + } + root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: @@ -698,7 +826,7 @@ */ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 bytenr, - int info_level, struct list_head *prefs) + int info_level, struct list_head *prefs, u64 inum) { struct btrfs_root *extent_root = fs_info->extent_root; int ret; @@ -730,7 +858,7 @@ case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, info_level + 1, key.offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -740,13 +868,13 @@ struct btrfs_shared_data_ref); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, - bytenr, count); + bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, key.offset, NULL, info_level + 1, 0, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -760,9 +888,15 @@ dref); key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); + + if (inum && key.objectid != inum) { + ret = BACKREF_FOUND_SHARED; + break; + } + root = btrfs_extent_data_ref_root(leaf, dref); ret = __add_prelim_ref(prefs, root, &key, 0, 0, - bytenr, count); + bytenr, count, GFP_NOFS); break; } default: @@ -782,12 +916,20 @@ * indirect refs to their parent bytenr. * When roots are found, they're added to the roots list * + * NOTE: This can return values > 0 + * + * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave + * much like trans == NULL case, the difference only lies in it will not + * commit root. + * The special case is for qgroup to search roots in commit_transaction(). + * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist *refs, - struct ulist *roots, const u64 *extent_item_pos) + struct ulist *roots, const u64 *extent_item_pos, + u64 root_objectid, u64 inum) { struct btrfs_key key; struct btrfs_path *path; @@ -795,22 +937,32 @@ struct btrfs_delayed_ref_head *head; int info_level = 0; int ret; - int search_commit_root = (trans == BTRFS_BACKREF_SEARCH_COMMIT_ROOT); struct list_head prefs_delayed; struct list_head prefs; struct __prelim_ref *ref; + struct extent_inode_elem *eie = NULL; + u64 total_refs = 0; INIT_LIST_HEAD(&prefs); INIT_LIST_HEAD(&prefs_delayed); key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = (u64)-1; + if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->search_commit_root = !!search_commit_root; + if (!trans) { + path->search_commit_root = 1; + path->skip_locking = 1; + } + + if (time_seq == (u64)-1) + path->skip_locking = 1; /* * grab both a lock on the path and a lock on the delayed ref head. @@ -825,7 +977,12 @@ goto out; BUG_ON(ret == 0); - if (trans != BTRFS_BACKREF_SEARCH_COMMIT_ROOT) { +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (trans && likely(trans->type != __TRANS_DUMMY) && + time_seq != (u64)-1) { +#else + if (trans && time_seq != (u64)-1) { +#endif /* * look if there are updates for this ref queued and lock the * head @@ -849,15 +1006,16 @@ btrfs_put_delayed_ref(&head->node); goto again; } + spin_unlock(&delayed_refs->lock); ret = __add_delayed_refs(head, time_seq, - &prefs_delayed); + &prefs_delayed, &total_refs, + inum); mutex_unlock(&head->mutex); - if (ret) { - spin_unlock(&delayed_refs->lock); + if (ret) goto out; - } + } else { + spin_unlock(&delayed_refs->lock); } - spin_unlock(&delayed_refs->lock); } if (path->slots[0]) { @@ -869,13 +1027,15 @@ slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid == bytenr && - key.type == BTRFS_EXTENT_ITEM_KEY) { + (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY)) { ret = __add_inline_refs(fs_info, path, bytenr, - &info_level, &prefs); + &info_level, &prefs, + &total_refs, inum); if (ret) goto out; ret = __add_keyed_refs(fs_info, path, bytenr, - info_level, &prefs); + info_level, &prefs, inum); if (ret) goto out; } @@ -890,8 +1050,9 @@ __merge_refs(&prefs, 1); - ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, - &prefs, extent_item_pos); + ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, + extent_item_pos, total_refs, + root_objectid); if (ret) goto out; @@ -899,36 +1060,46 @@ while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); - list_del(&ref->list); WARN_ON(ref->count < 0); - if (ref->count && ref->root_id && ref->parent == 0) { + if (roots && ref->count && ref->root_id && ref->parent == 0) { + if (root_objectid && ref->root_id != root_objectid) { + ret = BACKREF_FOUND_SHARED; + goto out; + } + /* no parent == root of tree */ ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); if (ret < 0) goto out; } if (ref->count && ref->parent) { - struct extent_inode_elem *eie = NULL; - if (extent_item_pos && !ref->inode_list) { - u32 bsz; + if (extent_item_pos && !ref->inode_list && + ref->level == 0) { struct extent_buffer *eb; - bsz = btrfs_level_size(fs_info->extent_root, - info_level); + eb = read_tree_block(fs_info->extent_root, - ref->parent, bsz, 0); - if (!eb || !extent_buffer_uptodate(eb)) { + ref->parent, 0); + if (IS_ERR(eb)) { + ret = PTR_ERR(eb); + goto out; + } else if (!extent_buffer_uptodate(eb)) { free_extent_buffer(eb); ret = -EIO; goto out; } + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, &eie); - ref->inode_list = eie; + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); + if (ret < 0) + goto out; + ref->inode_list = eie; } - ret = ulist_add_merge(refs, ref->parent, - (uintptr_t)ref->inode_list, - (u64 *)&eie, GFP_NOFS); + ret = ulist_add_merge_ptr(refs, ref->parent, + ref->inode_list, + (void **)&eie, GFP_NOFS); if (ret < 0) goto out; if (!ret && extent_item_pos) { @@ -941,8 +1112,10 @@ eie = eie->next; eie->next = ref->inode_list; } + eie = NULL; } - kfree(ref); + list_del(&ref->list); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } out: @@ -950,15 +1123,16 @@ while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } while (!list_empty(&prefs_delayed)) { ref = list_first_entry(&prefs_delayed, struct __prelim_ref, list); list_del(&ref->list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } - + if (ret < 0) + free_inode_elem_list(eie); return ret; } @@ -966,7 +1140,6 @@ { struct ulist_node *node = NULL; struct extent_inode_elem *eie; - struct extent_inode_elem *eie_next; struct ulist_iterator uiter; ULIST_ITER_INIT(&uiter); @@ -974,10 +1147,7 @@ if (!node->aux) continue; eie = (struct extent_inode_elem *)(uintptr_t)node->aux; - for (; eie; eie = eie_next) { - eie_next = eie->next; - kfree(eie); - } + free_inode_elem_list(eie); node->aux = 0; } @@ -997,22 +1167,14 @@ u64 time_seq, struct ulist **leafs, const u64 *extent_item_pos) { - struct ulist *tmp; int ret; - tmp = ulist_alloc(GFP_NOFS); - if (!tmp) - return -ENOMEM; *leafs = ulist_alloc(GFP_NOFS); - if (!*leafs) { - ulist_free(tmp); + if (!*leafs) return -ENOMEM; - } ret = find_parent_nodes(trans, fs_info, bytenr, - time_seq, *leafs, tmp, extent_item_pos); - ulist_free(tmp); - + time_seq, *leafs, NULL, extent_item_pos, 0, 0); if (ret < 0 && ret != -ENOENT) { free_leaf_list(*leafs); return ret; @@ -1034,9 +1196,9 @@ * * returns 0 on success, < 0 on error. */ -int btrfs_find_all_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots) +static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **roots) { struct ulist *tmp; struct ulist_node *node = NULL; @@ -1055,7 +1217,7 @@ ULIST_ITER_INIT(&uiter); while (1) { ret = find_parent_nodes(trans, fs_info, bytenr, - time_seq, tmp, *roots, NULL); + time_seq, tmp, *roots, NULL, 0, 0); if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); ulist_free(*roots); @@ -1065,61 +1227,88 @@ if (!node) break; bytenr = node->val; + cond_resched(); } ulist_free(tmp); return 0; } - -static int __inode_info(u64 inum, u64 ioff, u8 key_type, - struct btrfs_root *fs_root, struct btrfs_path *path, - struct btrfs_key *found_key) +int btrfs_find_all_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 time_seq, struct ulist **roots) { int ret; - struct btrfs_key key; - struct extent_buffer *eb; - - key.type = key_type; - key.objectid = inum; - key.offset = ioff; - - ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); - if (ret < 0) - return ret; - eb = path->nodes[0]; - if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { - ret = btrfs_next_leaf(fs_root, path); - if (ret) - return ret; - eb = path->nodes[0]; - } - - btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); - if (found_key->type != key.type || found_key->objectid != key.objectid) - return 1; - - return 0; + if (!trans) + down_read(&fs_info->commit_root_sem); + ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots); + if (!trans) + up_read(&fs_info->commit_root_sem); + return ret; } -/* - * this makes the path point to (inum INODE_ITEM ioff) +/** + * btrfs_check_shared - tell us whether an extent is shared + * + * @trans: optional trans handle + * + * btrfs_check_shared uses the backref walking code but will short + * circuit as soon as it finds a root or inode that doesn't match the + * one passed in. This provides a significant performance benefit for + * callers (such as fiemap) which want to know whether the extent is + * shared but do not need a ref count. + * + * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. */ -int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, - struct btrfs_path *path) +int btrfs_check_shared(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 root_objectid, + u64 inum, u64 bytenr) { - struct btrfs_key key; - return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path, - &key); -} + struct ulist *tmp = NULL; + struct ulist *roots = NULL; + struct ulist_iterator uiter; + struct ulist_node *node; + struct seq_list elem = SEQ_LIST_INIT(elem); + int ret = 0; -static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, - struct btrfs_path *path, - struct btrfs_key *found_key) -{ - return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path, - found_key); + tmp = ulist_alloc(GFP_NOFS); + roots = ulist_alloc(GFP_NOFS); + if (!tmp || !roots) { + ulist_free(tmp); + ulist_free(roots); + return -ENOMEM; + } + + if (trans) + btrfs_get_tree_mod_seq(fs_info, &elem); + else + down_read(&fs_info->commit_root_sem); + ULIST_ITER_INIT(&uiter); + while (1) { + ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, + roots, NULL, root_objectid, inum); + if (ret == BACKREF_FOUND_SHARED) { + /* this is the only condition under which we return 1 */ + ret = 1; + break; + } + if (ret < 0 && ret != -ENOENT) + break; + ret = 0; + node = ulist_next(tmp, &uiter); + if (!node) + break; + bytenr = node->val; + cond_resched(); + } + if (trans) + btrfs_put_tree_mod_seq(fs_info, &elem); + else + up_read(&fs_info->commit_root_sem); + ulist_free(tmp); + ulist_free(roots); + return ret; } int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, @@ -1135,7 +1324,7 @@ unsigned long ptr; key.objectid = inode_objectid; - btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); + key.type = BTRFS_INODE_EXTREF_KEY; key.offset = start_off; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -1175,7 +1364,7 @@ ret = -ENOENT; if (found_key.objectid != inode_objectid) break; - if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) + if (found_key.type != BTRFS_INODE_EXTREF_KEY) break; ret = 0; @@ -1232,7 +1421,8 @@ btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); } - ret = inode_ref_info(parent, 0, fs_root, path, &found_key); + ret = btrfs_find_item(fs_root, path, parent, 0, + BTRFS_INODE_REF_KEY, &found_key); if (ret > 0) ret = -ENOENT; if (ret) @@ -1285,29 +1475,38 @@ { int ret; u64 flags; + u64 size = 0; u32 item_size; struct extent_buffer *eb; struct btrfs_extent_item *ei; struct btrfs_key key; - key.type = BTRFS_EXTENT_ITEM_KEY; + if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) + key.type = BTRFS_METADATA_ITEM_KEY; + else + key.type = BTRFS_EXTENT_ITEM_KEY; key.objectid = logical; key.offset = (u64)-1; ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) return ret; - ret = btrfs_previous_item(fs_info->extent_root, path, - 0, BTRFS_EXTENT_ITEM_KEY); - if (ret < 0) - return ret; + ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0); + if (ret) { + if (ret > 0) + ret = -ENOENT; + return ret; + } btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); - if (found_key->type != BTRFS_EXTENT_ITEM_KEY || - found_key->objectid > logical || - found_key->objectid + found_key->offset <= logical) { - pr_debug("logical %llu is not within any extent\n", - (unsigned long long)logical); + if (found_key->type == BTRFS_METADATA_ITEM_KEY) + size = fs_info->extent_root->nodesize; + else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) + size = found_key->offset; + + if (found_key->objectid > logical || + found_key->objectid + size <= logical) { + pr_debug("logical %llu is not within any extent\n", logical); return -ENOENT; } @@ -1320,11 +1519,8 @@ pr_debug("logical %llu is at position %llu within the extent (%llu " "EXTENT_ITEM %llu) flags %#llx size %u\n", - (unsigned long long)logical, - (unsigned long long)(logical - found_key->objectid), - (unsigned long long)found_key->objectid, - (unsigned long long)found_key->offset, - (unsigned long long)flags, item_size); + logical, logical - found_key->objectid, found_key->objectid, + found_key->offset, flags, item_size); WARN_ON(!flags_ret); if (flags_ret) { @@ -1405,7 +1601,6 @@ { int ret; int type; - struct btrfs_tree_block_info *info; struct btrfs_extent_inline_ref *eiref; if (*ptr == (unsigned long)-1) @@ -1426,9 +1621,17 @@ } /* we can treat both ref types equally here */ - info = (struct btrfs_tree_block_info *)(ei + 1); *out_root = btrfs_extent_inline_ref_offset(eb, eiref); - *out_level = btrfs_tree_block_level(eb, info); + + if (key->type == BTRFS_EXTENT_ITEM_KEY) { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + *out_level = btrfs_tree_block_level(eb, info); + } else { + ASSERT(key->type == BTRFS_METADATA_ITEM_KEY); + *out_level = (u8)key->offset; + } if (ret == 1) *ptr = (unsigned long)-1; @@ -1469,25 +1672,25 @@ iterate_extent_inodes_t *iterate, void *ctx) { int ret; - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct ulist *refs = NULL; struct ulist *roots = NULL; struct ulist_node *ref_node = NULL; struct ulist_node *root_node = NULL; - struct seq_list tree_mod_seq_elem = {}; + struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); struct ulist_iterator ref_uiter; struct ulist_iterator root_uiter; pr_debug("resolving all inodes for extent %llu\n", extent_item_objectid); - if (search_commit_root) { - trans = BTRFS_BACKREF_SEARCH_COMMIT_ROOT; - } else { + if (!search_commit_root) { trans = btrfs_join_transaction(fs_info->extent_root); if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); + } else { + down_read(&fs_info->commit_root_sem); } ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, @@ -1498,15 +1701,15 @@ ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { - ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, - tree_mod_seq_elem.seq, &roots); + ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val, + tree_mod_seq_elem.seq, &roots); if (ret) break; ULIST_ITER_INIT(&root_uiter); while (!ret && (root_node = ulist_next(roots, &root_uiter))) { pr_debug("root %llu references leaf %llu, data list " "%#llx\n", root_node->val, ref_node->val, - (long long)ref_node->aux); + ref_node->aux); ret = iterate_leaf_refs((struct extent_inode_elem *) (uintptr_t)ref_node->aux, root_node->val, @@ -1521,6 +1724,8 @@ if (!search_commit_root) { btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); btrfs_end_transaction(trans, fs_info->extent_root); + } else { + up_read(&fs_info->commit_root_sem); } return ret; @@ -1571,9 +1776,10 @@ struct btrfs_key found_key; while (!ret) { - path->leave_spinning = 1; - ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, - &found_key); + ret = btrfs_find_item(fs_root, path, inum, + parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY, + &found_key); + if (ret < 0) break; if (ret) { @@ -1584,23 +1790,25 @@ parent = found_key.offset; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - item = btrfs_item_nr(eb, slot); + item = btrfs_item_nr(slot); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { name_len = btrfs_inode_ref_name_len(eb, iref); /* path must be released before calling iterate()! */ pr_debug("following ref at offset %u for inode %llu in " - "tree %llu\n", cur, - (unsigned long long)found_key.objectid, - (unsigned long long)fs_root->objectid); + "tree %llu\n", cur, found_key.objectid, + fs_root->objectid); ret = iterate(parent, name_len, (unsigned long)(iref + 1), eb, ctx); if (ret) @@ -1628,7 +1836,6 @@ int found = 0; struct extent_buffer *eb; struct btrfs_inode_extref *extref; - struct extent_buffer *leaf; u32 item_size; u32 cur_offset; unsigned long ptr; @@ -1645,17 +1852,19 @@ ++found; slot = path->slots[0]; - eb = path->nodes[0]; - /* make sure we can use eb after releasing the path */ - atomic_inc(&eb->refs); + eb = btrfs_clone_extent_buffer(path->nodes[0]); + if (!eb) { + ret = -ENOMEM; + break; + } + extent_buffer_get(eb); btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); cur_offset = 0; while (cur_offset < item_size) { @@ -1669,7 +1878,7 @@ if (ret) break; - cur_offset += btrfs_inode_extref_name_len(leaf, extref); + cur_offset += btrfs_inode_extref_name_len(eb, extref); cur_offset += sizeof(*extref); } btrfs_tree_read_unlock_blocking(eb);