--- zzzz-none-000/linux-3.10.107/fs/btrfs/qgroup.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/btrfs/qgroup.c 2021-02-04 17:41:59.000000000 +0000 @@ -32,6 +32,8 @@ #include "ulist.h" #include "backref.h" #include "extent_io.h" +#include "qgroup.h" + /* TODO XXX FIXME * - subvol delete -> delete when ref goes to 0? delete limits also? @@ -83,11 +85,42 @@ /* * temp variables for accounting operations + * Refer to qgroup_shared_accouting() for details. */ - u64 tag; - u64 refcnt; + u64 old_refcnt; + u64 new_refcnt; }; +static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, + int mod) +{ + if (qg->old_refcnt < seq) + qg->old_refcnt = seq; + qg->old_refcnt += mod; +} + +static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, + int mod) +{ + if (qg->new_refcnt < seq) + qg->new_refcnt = seq; + qg->new_refcnt += mod; +} + +static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) +{ + if (qg->old_refcnt < seq) + return 0; + return qg->old_refcnt - seq; +} + +static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) +{ + if (qg->new_refcnt < seq) + return 0; + return qg->new_refcnt - seq; +} + /* * glue structure to represent the relations between qgroups. */ @@ -98,13 +131,13 @@ struct btrfs_qgroup *member; }; -struct qgroup_rescan { - struct btrfs_work work; - struct btrfs_fs_info *fs_info; -}; +#define ptr_to_u64(x) ((u64)(uintptr_t)x) +#define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) -static void qgroup_rescan_start(struct btrfs_fs_info *fs_info, - struct qgroup_rescan *qscan); +static int +qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, + int init_flags); +static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); /* must be called with qgroup_ioctl_lock held */ static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, @@ -160,18 +193,11 @@ return qgroup; } -/* must be called with qgroup_lock held */ -static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) { - struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); struct btrfs_qgroup_list *list; - if (!qgroup) - return -ENOENT; - - rb_erase(&qgroup->node, &fs_info->qgroup_tree); list_del(&qgroup->dirty); - while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, struct btrfs_qgroup_list, next_group); @@ -188,7 +214,18 @@ kfree(list); } kfree(qgroup); +} +/* must be called with qgroup_lock held */ +static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) +{ + struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); + + if (!qgroup) + return -ENOENT; + + rb_erase(&qgroup->node, &fs_info->qgroup_tree); + __del_qgroup_rb(qgroup); return 0; } @@ -241,6 +278,21 @@ return -ENOENT; } +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, + u64 rfer, u64 excl) +{ + struct btrfs_qgroup *qgroup; + + qgroup = find_qgroup_rb(fs_info, qgroupid); + if (!qgroup) + return -EINVAL; + if (qgroup->rfer != rfer || qgroup->excl != excl) + return -EINVAL; + return 0; +} +#endif + /* * The full config is read in one go, only called from open_ctree() * It doesn't use any locking, as at this point we're still single-threaded @@ -255,10 +307,17 @@ int slot; int ret = 0; u64 flags = 0; + u64 rescan_progress = 0; if (!fs_info->quota_enabled) return 0; + fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); + if (!fs_info->qgroup_ulist) { + ret = -ENOMEM; + goto out; + } + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; @@ -293,33 +352,20 @@ if (btrfs_qgroup_status_version(l, ptr) != BTRFS_QGROUP_STATUS_VERSION) { - printk(KERN_ERR - "btrfs: old qgroup version, quota disabled\n"); + btrfs_err(fs_info, + "old qgroup version, quota disabled"); goto out; } if (btrfs_qgroup_status_generation(l, ptr) != fs_info->generation) { flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - printk(KERN_ERR - "btrfs: qgroup generation mismatch, " - "marked as inconsistent\n"); + btrfs_err(fs_info, + "qgroup generation mismatch, " + "marked as inconsistent"); } fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, ptr); - fs_info->qgroup_rescan_progress.objectid = - btrfs_qgroup_status_rescan(l, ptr); - if (fs_info->qgroup_flags & - BTRFS_QGROUP_STATUS_FLAG_RESCAN) { - struct qgroup_rescan *qscan = - kmalloc(sizeof(*qscan), GFP_NOFS); - if (!qscan) { - ret = -ENOMEM; - goto out; - } - fs_info->qgroup_rescan_progress.type = 0; - fs_info->qgroup_rescan_progress.offset = 0; - qgroup_rescan_start(fs_info, qscan); - } + rescan_progress = btrfs_qgroup_status_rescan(l, ptr); goto next1; } @@ -330,7 +376,7 @@ qgroup = find_qgroup_rb(fs_info, found_key.offset); if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { - printk(KERN_ERR "btrfs: inconsitent qgroup config\n"); + btrfs_err(fs_info, "inconsistent qgroup config"); flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; } if (!qgroup) { @@ -401,10 +447,9 @@ ret = add_relation_rb(fs_info, found_key.objectid, found_key.offset); if (ret == -ENOENT) { - printk(KERN_WARNING - "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", - (unsigned long long)found_key.objectid, - (unsigned long long)found_key.offset); + btrfs_warn(fs_info, + "orphan qgroup relation 0x%llx->0x%llx", + found_key.objectid, found_key.offset); ret = 0; /* ignore the error */ } if (ret) @@ -421,45 +466,44 @@ if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && + ret >= 0) { + ret = qgroup_rescan_init(fs_info, rescan_progress, 0); } btrfs_free_path(path); + if (ret < 0) { + ulist_free(fs_info->qgroup_ulist); + fs_info->qgroup_ulist = NULL; + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + } + return ret < 0 ? ret : 0; } /* - * This is only called from close_ctree() or open_ctree(), both in single- - * treaded paths. Clean up the in-memory structures. No locking needed. + * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), + * first two are in single-threaded paths.And for the third one, we have set + * quota_root to be null with qgroup_lock held before, so it is safe to clean + * up the in-memory structures without qgroup_lock held. */ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) { struct rb_node *n; struct btrfs_qgroup *qgroup; - struct btrfs_qgroup_list *list; while ((n = rb_first(&fs_info->qgroup_tree))) { qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); - - while (!list_empty(&qgroup->groups)) { - list = list_first_entry(&qgroup->groups, - struct btrfs_qgroup_list, - next_group); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - - while (!list_empty(&qgroup->members)) { - list = list_first_entry(&qgroup->members, - struct btrfs_qgroup_list, - next_member); - list_del(&list->next_group); - list_del(&list->next_member); - kfree(list); - } - kfree(qgroup); + __del_qgroup_rb(qgroup); } + /* + * we call btrfs_free_qgroup_config() when umounting + * filesystem and disabling quota, so we set qgroup_ulit + * to be null here to avoid double free. + */ + ulist_free(fs_info->qgroup_ulist); + fs_info->qgroup_ulist = NULL; } static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, @@ -527,6 +571,9 @@ struct extent_buffer *leaf; struct btrfs_key key; + if (btrfs_test_is_dummy_root(quota_root)) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -535,9 +582,15 @@ key.type = BTRFS_QGROUP_INFO_KEY; key.offset = qgroupid; + /* + * Avoid a transaction abort by catching -EEXIST here. In that + * case, we proceed by re-initializing the existing structure + * on disk. + */ + ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*qgroup_info)); - if (ret) + if (ret && ret != -EEXIST) goto out; leaf = path->nodes[0]; @@ -556,7 +609,7 @@ key.type = BTRFS_QGROUP_LIMIT_KEY; ret = btrfs_insert_empty_item(trans, quota_root, path, &key, sizeof(*qgroup_limit)); - if (ret) + if (ret && ret != -EEXIST) goto out; leaf = path->nodes[0]; @@ -623,9 +676,8 @@ } static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 qgroupid, - u64 flags, u64 max_rfer, u64 max_excl, - u64 rsv_rfer, u64 rsv_excl) + struct btrfs_root *root, + struct btrfs_qgroup *qgroup) { struct btrfs_path *path; struct btrfs_key key; @@ -636,7 +688,7 @@ key.objectid = 0; key.type = BTRFS_QGROUP_LIMIT_KEY; - key.offset = qgroupid; + key.offset = qgroup->qgroupid; path = btrfs_alloc_path(); if (!path) @@ -651,13 +703,12 @@ l = path->nodes[0]; slot = path->slots[0]; - qgroup_limit = btrfs_item_ptr(l, path->slots[0], - struct btrfs_qgroup_limit_item); - btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); - btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); - btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); - btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); - btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); + qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); + btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); + btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); + btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); + btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); + btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); btrfs_mark_buffer_dirty(l); @@ -677,6 +728,9 @@ int ret; int slot; + if (btrfs_test_is_dummy_root(root)) + return 0; + key.objectid = 0; key.type = BTRFS_QGROUP_INFO_KEY; key.offset = qgroup->qgroupid; @@ -694,8 +748,7 @@ l = path->nodes[0]; slot = path->slots[0]; - qgroup_info = btrfs_item_ptr(l, path->slots[0], - struct btrfs_qgroup_info_item); + qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); @@ -819,6 +872,12 @@ goto out; } + fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); + if (!fs_info->qgroup_ulist) { + ret = -ENOMEM; + goto out; + } + /* * initially create the quota tree */ @@ -916,6 +975,10 @@ kfree(quota_root); } out: + if (ret) { + ulist_free(fs_info->qgroup_ulist); + fs_info->qgroup_ulist = NULL; + } mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } @@ -930,18 +993,16 @@ mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; - spin_lock(&fs_info->qgroup_lock); fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + btrfs_qgroup_wait_for_completion(fs_info, false); + spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; - btrfs_free_qgroup_config(fs_info); + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; spin_unlock(&fs_info->qgroup_lock); - if (!quota_root) { - ret = -EINVAL; - goto out; - } + btrfs_free_qgroup_config(fs_info); ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) @@ -954,7 +1015,7 @@ list_del("a_root->dirty_list); btrfs_tree_lock(quota_root->node); - clean_tree_block(trans, tree_root, quota_root->node); + clean_tree_block(trans, tree_root->fs_info, quota_root->node); btrfs_tree_unlock(quota_root->node); btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); @@ -973,6 +1034,110 @@ list_add(&qgroup->dirty, &fs_info->dirty_qgroups); } +/* + * The easy accounting, if we are adding/removing the only ref for an extent + * then this qgroup and all of the parent qgroups get their refrence and + * exclusive counts adjusted. + * + * Caller should hold fs_info->qgroup_lock. + */ +static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, + struct ulist *tmp, u64 ref_root, + u64 num_bytes, int sign) +{ + struct btrfs_qgroup *qgroup; + struct btrfs_qgroup_list *glist; + struct ulist_node *unode; + struct ulist_iterator uiter; + int ret = 0; + + qgroup = find_qgroup_rb(fs_info, ref_root); + if (!qgroup) + goto out; + + qgroup->rfer += sign * num_bytes; + qgroup->rfer_cmpr += sign * num_bytes; + + WARN_ON(sign < 0 && qgroup->excl < num_bytes); + qgroup->excl += sign * num_bytes; + qgroup->excl_cmpr += sign * num_bytes; + if (sign > 0) + qgroup->reserved -= num_bytes; + + qgroup_dirty(fs_info, qgroup); + + /* Get all of the parent groups that contain this qgroup */ + list_for_each_entry(glist, &qgroup->groups, next_group) { + ret = ulist_add(tmp, glist->group->qgroupid, + ptr_to_u64(glist->group), GFP_ATOMIC); + if (ret < 0) + goto out; + } + + /* Iterate all of the parents and adjust their reference counts */ + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(tmp, &uiter))) { + qgroup = u64_to_ptr(unode->aux); + qgroup->rfer += sign * num_bytes; + qgroup->rfer_cmpr += sign * num_bytes; + WARN_ON(sign < 0 && qgroup->excl < num_bytes); + qgroup->excl += sign * num_bytes; + if (sign > 0) + qgroup->reserved -= num_bytes; + qgroup->excl_cmpr += sign * num_bytes; + qgroup_dirty(fs_info, qgroup); + + /* Add any parents of the parents */ + list_for_each_entry(glist, &qgroup->groups, next_group) { + ret = ulist_add(tmp, glist->group->qgroupid, + ptr_to_u64(glist->group), GFP_ATOMIC); + if (ret < 0) + goto out; + } + } + ret = 0; +out: + return ret; +} + + +/* + * Quick path for updating qgroup with only excl refs. + * + * In that case, just update all parent will be enough. + * Or we needs to do a full rescan. + * Caller should also hold fs_info->qgroup_lock. + * + * Return 0 for quick update, return >0 for need to full rescan + * and mark INCONSISTENT flag. + * Return < 0 for other error. + */ +static int quick_update_accounting(struct btrfs_fs_info *fs_info, + struct ulist *tmp, u64 src, u64 dst, + int sign) +{ + struct btrfs_qgroup *qgroup; + int ret = 1; + int err = 0; + + qgroup = find_qgroup_rb(fs_info, src); + if (!qgroup) + goto out; + if (qgroup->excl == qgroup->rfer) { + ret = 0; + err = __qgroup_excl_accounting(fs_info, tmp, dst, + qgroup->excl, sign); + if (err < 0) { + ret = err; + goto out; + } + } +out: + if (ret) + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + return ret; +} + int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst) { @@ -980,8 +1145,17 @@ struct btrfs_qgroup *parent; struct btrfs_qgroup *member; struct btrfs_qgroup_list *list; + struct ulist *tmp; int ret = 0; + /* Check the level of src and dst first */ + if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) + return -EINVAL; + + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + return -ENOMEM; + mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; if (!quota_root) { @@ -1015,23 +1189,33 @@ spin_lock(&fs_info->qgroup_lock); ret = add_relation_rb(quota_root->fs_info, src, dst); + if (ret < 0) { + spin_unlock(&fs_info->qgroup_lock); + goto out; + } + ret = quick_update_accounting(fs_info, tmp, src, dst, 1); spin_unlock(&fs_info->qgroup_lock); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); + ulist_free(tmp); return ret; } -int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, +int __del_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst) { struct btrfs_root *quota_root; struct btrfs_qgroup *parent; struct btrfs_qgroup *member; struct btrfs_qgroup_list *list; + struct ulist *tmp; int ret = 0; int err; - mutex_lock(&fs_info->qgroup_ioctl_lock); + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) + return -ENOMEM; + quota_root = fs_info->quota_root; if (!quota_root) { ret = -EINVAL; @@ -1060,14 +1244,27 @@ spin_lock(&fs_info->qgroup_lock); del_relation_rb(fs_info, src, dst); + ret = quick_update_accounting(fs_info, tmp, src, dst, -1); spin_unlock(&fs_info->qgroup_lock); out: + ulist_free(tmp); + return ret; +} + +int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 src, u64 dst) +{ + int ret = 0; + + mutex_lock(&fs_info->qgroup_ioctl_lock); + ret = __del_qgroup_relation(trans, fs_info, src, dst); mutex_unlock(&fs_info->qgroup_ioctl_lock); + return ret; } int btrfs_create_qgroup(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) + struct btrfs_fs_info *fs_info, u64 qgroupid) { struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; @@ -1105,6 +1302,7 @@ { struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; + struct btrfs_qgroup_list *list; int ret = 0; mutex_lock(&fs_info->qgroup_ioctl_lock); @@ -1119,15 +1317,24 @@ ret = -ENOENT; goto out; } else { - /* check if there are no relations to this qgroup */ - if (!list_empty(&qgroup->groups) || - !list_empty(&qgroup->members)) { + /* check if there are no children of this qgroup */ + if (!list_empty(&qgroup->members)) { ret = -EBUSY; goto out; } } ret = del_qgroup_item(trans, quota_root, qgroupid); + while (!list_empty(&qgroup->groups)) { + list = list_first_entry(&qgroup->groups, + struct btrfs_qgroup_list, next_group); + ret = __del_qgroup_relation(trans, fs_info, + qgroupid, + list->group->qgroupid); + if (ret) + goto out; + } + spin_lock(&fs_info->qgroup_lock); del_qgroup_rb(quota_root->fs_info, qgroupid); spin_unlock(&fs_info->qgroup_lock); @@ -1143,6 +1350,11 @@ struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; + /* Sometimes we would want to clear the limit on this qgroup. + * To meet this requirement, we treat the -1 as a special value + * which tell kernel to clear the limit on this qgroup. + */ + const u64 CLEAR_VALUE = -1; mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; @@ -1156,62 +1368,137 @@ ret = -ENOENT; goto out; } - ret = update_qgroup_limit_item(trans, quota_root, qgroupid, - limit->flags, limit->max_rfer, - limit->max_excl, limit->rsv_rfer, - limit->rsv_excl); + + spin_lock(&fs_info->qgroup_lock); + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { + if (limit->max_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + qgroup->max_rfer = 0; + } else { + qgroup->max_rfer = limit->max_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { + if (limit->max_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + qgroup->max_excl = 0; + } else { + qgroup->max_excl = limit->max_excl; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { + if (limit->rsv_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + qgroup->rsv_rfer = 0; + } else { + qgroup->rsv_rfer = limit->rsv_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { + if (limit->rsv_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + qgroup->rsv_excl = 0; + } else { + qgroup->rsv_excl = limit->rsv_excl; + } + } + qgroup->lim_flags |= limit->flags; + + spin_unlock(&fs_info->qgroup_lock); + + ret = update_qgroup_limit_item(trans, quota_root, qgroup); if (ret) { fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - printk(KERN_INFO "unable to update quota limit for %llu\n", - (unsigned long long)qgroupid); + btrfs_info(fs_info, "unable to update quota limit for %llu", + qgroupid); } - spin_lock(&fs_info->qgroup_lock); - qgroup->lim_flags = limit->flags; - qgroup->max_rfer = limit->max_rfer; - qgroup->max_excl = limit->max_excl; - qgroup->rsv_rfer = limit->rsv_rfer; - qgroup->rsv_excl = limit->rsv_excl; - spin_unlock(&fs_info->qgroup_lock); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; } -/* - * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts - * the modification into a list that's later used by btrfs_end_transaction to - * pass the recorded modifications on to btrfs_qgroup_account_ref. - */ -int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, - struct btrfs_delayed_ref_node *node, - struct btrfs_delayed_extent_op *extent_op) -{ - struct qgroup_update *u; - - BUG_ON(!trans->delayed_ref_elem.seq); - u = kmalloc(sizeof(*u), GFP_NOFS); - if (!u) - return -ENOMEM; +int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_qgroup_extent_record *record; + struct btrfs_delayed_ref_root *delayed_refs; + struct rb_node *node; + u64 qgroup_to_skip; + int ret = 0; - u->node = node; - u->extent_op = extent_op; - list_add_tail(&u->list, &trans->qgroup_ref_list); + delayed_refs = &trans->transaction->delayed_refs; + qgroup_to_skip = delayed_refs->qgroup_to_skip; - return 0; + /* + * No need to do lock, since this function will only be called in + * btrfs_commmit_transaction(). + */ + node = rb_first(&delayed_refs->dirty_extent_root); + while (node) { + record = rb_entry(node, struct btrfs_qgroup_extent_record, + node); + ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, + &record->old_roots); + if (ret < 0) + break; + if (qgroup_to_skip) + ulist_del(record->old_roots, qgroup_to_skip, 0); + node = rb_next(node); + } + return ret; +} + +struct btrfs_qgroup_extent_record +*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record) +{ + struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; + struct rb_node *parent_node = NULL; + struct btrfs_qgroup_extent_record *entry; + u64 bytenr = record->bytenr; + + assert_spin_locked(&delayed_refs->lock); + + while (*p) { + parent_node = *p; + entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, + node); + if (bytenr < entry->bytenr) + p = &(*p)->rb_left; + else if (bytenr > entry->bytenr) + p = &(*p)->rb_right; + else + return entry; + } + + rb_link_node(&record->node, parent_node, p); + rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); + return NULL; } -static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info, - struct ulist *roots, struct ulist *tmp, - u64 seq) +#define UPDATE_NEW 0 +#define UPDATE_OLD 1 +/* + * Walk all of the roots that points to the bytenr and adjust their refcnts. + */ +static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, + struct ulist *roots, struct ulist *tmp, + struct ulist *qgroups, u64 seq, int update_old) { struct ulist_node *unode; struct ulist_iterator uiter; struct ulist_node *tmp_unode; struct ulist_iterator tmp_uiter; struct btrfs_qgroup *qg; - int ret; + int ret = 0; + if (!roots) + return 0; ULIST_ITER_INIT(&uiter); while ((unode = ulist_next(roots, &uiter))) { qg = find_qgroup_rb(fs_info, unode->val); @@ -1219,269 +1506,278 @@ continue; ulist_reinit(tmp); - /* XXX id not needed */ - ret = ulist_add(tmp, qg->qgroupid, - (u64)(uintptr_t)qg, GFP_ATOMIC); + ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), + GFP_ATOMIC); + if (ret < 0) + return ret; + ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); if (ret < 0) return ret; ULIST_ITER_INIT(&tmp_uiter); while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { struct btrfs_qgroup_list *glist; - qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; - if (qg->refcnt < seq) - qg->refcnt = seq + 1; + qg = u64_to_ptr(tmp_unode->aux); + if (update_old) + btrfs_qgroup_update_old_refcnt(qg, seq, 1); else - ++qg->refcnt; - + btrfs_qgroup_update_new_refcnt(qg, seq, 1); list_for_each_entry(glist, &qg->groups, next_group) { + ret = ulist_add(qgroups, glist->group->qgroupid, + ptr_to_u64(glist->group), + GFP_ATOMIC); + if (ret < 0) + return ret; ret = ulist_add(tmp, glist->group->qgroupid, - (u64)(uintptr_t)glist->group, + ptr_to_u64(glist->group), GFP_ATOMIC); if (ret < 0) return ret; } } } - return 0; } -static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info, - struct ulist *roots, struct ulist *tmp, - u64 seq, int sgn, u64 num_bytes, - struct btrfs_qgroup *qgroup) +/* + * Update qgroup rfer/excl counters. + * Rfer update is easy, codes can explain themselves. + * + * Excl update is tricky, the update is split into 2 part. + * Part 1: Possible exclusive <-> sharing detect: + * | A | !A | + * ------------------------------------- + * B | * | - | + * ------------------------------------- + * !B | + | ** | + * ------------------------------------- + * + * Conditions: + * A: cur_old_roots < nr_old_roots (not exclusive before) + * !A: cur_old_roots == nr_old_roots (possible exclusive before) + * B: cur_new_roots < nr_new_roots (not exclusive now) + * !B: cur_new_roots == nr_new_roots (possible exclsuive now) + * + * Results: + * +: Possible sharing -> exclusive -: Possible exclusive -> sharing + * *: Definitely not changed. **: Possible unchanged. + * + * For !A and !B condition, the exception is cur_old/new_roots == 0 case. + * + * To make the logic clear, we first use condition A and B to split + * combination into 4 results. + * + * Then, for result "+" and "-", check old/new_roots == 0 case, as in them + * only on variant maybe 0. + * + * Lastly, check result **, since there are 2 variants maybe 0, split them + * again(2x2). + * But this time we don't need to consider other things, the codes and logic + * is easy to understand now. + */ +static int qgroup_update_counters(struct btrfs_fs_info *fs_info, + struct ulist *qgroups, + u64 nr_old_roots, + u64 nr_new_roots, + u64 num_bytes, u64 seq) { struct ulist_node *unode; struct ulist_iterator uiter; struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - int ret; - - ulist_reinit(tmp); - ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); - if (ret < 0) - return ret; + u64 cur_new_count, cur_old_count; ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(tmp, &uiter))) { - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; - if (qg->refcnt < seq) { - /* not visited by step 1 */ - qg->rfer += sgn * num_bytes; - qg->rfer_cmpr += sgn * num_bytes; - if (roots->nnodes == 0) { - qg->excl += sgn * num_bytes; - qg->excl_cmpr += sgn * num_bytes; - } - qgroup_dirty(fs_info, qg); - } - WARN_ON(qg->tag >= seq); - qg->tag = seq; + while ((unode = ulist_next(qgroups, &uiter))) { + bool dirty = false; - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(tmp, glist->group->qgroupid, - (uintptr_t)glist->group, GFP_ATOMIC); - if (ret < 0) - return ret; + qg = u64_to_ptr(unode->aux); + cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); + cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); + + /* Rfer update part */ + if (cur_old_count == 0 && cur_new_count > 0) { + qg->rfer += num_bytes; + qg->rfer_cmpr += num_bytes; + dirty = true; + } + if (cur_old_count > 0 && cur_new_count == 0) { + qg->rfer -= num_bytes; + qg->rfer_cmpr -= num_bytes; + dirty = true; + } + + /* Excl update part */ + /* Exclusive/none -> shared case */ + if (cur_old_count == nr_old_roots && + cur_new_count < nr_new_roots) { + /* Exclusive -> shared */ + if (cur_old_count != 0) { + qg->excl -= num_bytes; + qg->excl_cmpr -= num_bytes; + dirty = true; + } } - } - - return 0; -} - -static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info, - struct ulist *roots, struct ulist *tmp, - u64 seq, int sgn, u64 num_bytes) -{ - struct ulist_node *unode; - struct ulist_iterator uiter; - struct btrfs_qgroup *qg; - struct ulist_node *tmp_unode; - struct ulist_iterator tmp_uiter; - int ret; - - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - qg = find_qgroup_rb(fs_info, unode->val); - if (!qg) - continue; - - ulist_reinit(tmp); - ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC); - if (ret < 0) - return ret; - - ULIST_ITER_INIT(&tmp_uiter); - while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { - struct btrfs_qgroup_list *glist; - qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux; - if (qg->tag == seq) - continue; - - if (qg->refcnt - seq == roots->nnodes) { - qg->excl -= sgn * num_bytes; - qg->excl_cmpr -= sgn * num_bytes; - qgroup_dirty(fs_info, qg); + /* Shared -> exclusive/none case */ + if (cur_old_count < nr_old_roots && + cur_new_count == nr_new_roots) { + /* Shared->exclusive */ + if (cur_new_count != 0) { + qg->excl += num_bytes; + qg->excl_cmpr += num_bytes; + dirty = true; } + } - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(tmp, glist->group->qgroupid, - (uintptr_t)glist->group, - GFP_ATOMIC); - if (ret < 0) - return ret; + /* Exclusive/none -> exclusive/none case */ + if (cur_old_count == nr_old_roots && + cur_new_count == nr_new_roots) { + if (cur_old_count == 0) { + /* None -> exclusive/none */ + + if (cur_new_count != 0) { + /* None -> exclusive */ + qg->excl += num_bytes; + qg->excl_cmpr += num_bytes; + dirty = true; + } + /* None -> none, nothing changed */ + } else { + /* Exclusive -> exclusive/none */ + + if (cur_new_count == 0) { + /* Exclusive -> none */ + qg->excl -= num_bytes; + qg->excl_cmpr -= num_bytes; + dirty = true; + } + /* Exclusive -> exclusive, nothing changed */ } } - } + if (dirty) + qgroup_dirty(fs_info, qg); + } return 0; } -/* - * btrfs_qgroup_account_ref is called for every ref that is added to or deleted - * from the fs. First, all roots referencing the extent are searched, and - * then the space is accounted accordingly to the different roots. The - * accounting algorithm works in 3 steps documented inline. - */ -int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *node, - struct btrfs_delayed_extent_op *extent_op) +int +btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 bytenr, u64 num_bytes, + struct ulist *old_roots, struct ulist *new_roots) { - struct btrfs_key ins; - struct btrfs_root *quota_root; - u64 ref_root; - struct btrfs_qgroup *qgroup; - struct ulist *roots = NULL; + struct ulist *qgroups = NULL; struct ulist *tmp = NULL; u64 seq; + u64 nr_new_roots = 0; + u64 nr_old_roots = 0; int ret = 0; - int sgn; - if (!fs_info->quota_enabled) - return 0; + if (new_roots) + nr_new_roots = new_roots->nnodes; + if (old_roots) + nr_old_roots = old_roots->nnodes; + if (!fs_info->quota_enabled) + goto out_free; BUG_ON(!fs_info->quota_root); - ins.objectid = node->bytenr; - ins.offset = node->num_bytes; - ins.type = BTRFS_EXTENT_ITEM_KEY; - - if (node->type == BTRFS_TREE_BLOCK_REF_KEY || - node->type == BTRFS_SHARED_BLOCK_REF_KEY) { - struct btrfs_delayed_tree_ref *ref; - ref = btrfs_delayed_node_to_tree_ref(node); - ref_root = ref->root; - } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || - node->type == BTRFS_SHARED_DATA_REF_KEY) { - struct btrfs_delayed_data_ref *ref; - ref = btrfs_delayed_node_to_data_ref(node); - ref_root = ref->root; - } else { - BUG(); - } - - if (!is_fstree(ref_root)) { - /* - * non-fs-trees are not being accounted - */ - return 0; + qgroups = ulist_alloc(GFP_NOFS); + if (!qgroups) { + ret = -ENOMEM; + goto out_free; } - - switch (node->action) { - case BTRFS_ADD_DELAYED_REF: - case BTRFS_ADD_DELAYED_EXTENT: - sgn = 1; - seq = btrfs_tree_mod_seq_prev(node->seq); - break; - case BTRFS_DROP_DELAYED_REF: - sgn = -1; - seq = node->seq; - break; - case BTRFS_UPDATE_DELAYED_HEAD: - return 0; - default: - BUG(); + tmp = ulist_alloc(GFP_NOFS); + if (!tmp) { + ret = -ENOMEM; + goto out_free; } mutex_lock(&fs_info->qgroup_rescan_lock); if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { - if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { + if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { mutex_unlock(&fs_info->qgroup_rescan_lock); - return 0; + ret = 0; + goto out_free; } } mutex_unlock(&fs_info->qgroup_rescan_lock); - /* - * the delayed ref sequence number we pass depends on the direction of - * the operation. for add operations, we pass - * tree_mod_log_prev_seq(node->seq) to skip - * the delayed ref's current sequence number, because we need the state - * of the tree before the add operation. for delete operations, we pass - * (node->seq) to include the delayed ref's current sequence number, - * because we need the state of the tree after the delete operation. - */ - ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots); - if (ret < 0) - return ret; - - mutex_lock(&fs_info->qgroup_rescan_lock); spin_lock(&fs_info->qgroup_lock); - if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { - if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) { - ret = 0; - goto unlock; - } - } - - quota_root = fs_info->quota_root; - if (!quota_root) - goto unlock; - - qgroup = find_qgroup_rb(fs_info, ref_root); - if (!qgroup) - goto unlock; - - /* - * step 1: for each old ref, visit all nodes once and inc refcnt - */ - tmp = ulist_alloc(GFP_ATOMIC); - if (!tmp) { - ret = -ENOMEM; - goto unlock; - } seq = fs_info->qgroup_seq; - fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ - ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); - if (ret) - goto unlock; + /* Update old refcnts using old_roots */ + ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, + UPDATE_OLD); + if (ret < 0) + goto out; - /* - * step 2: walk from the new root - */ - ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn, - node->num_bytes, qgroup); - if (ret) - goto unlock; + /* Update new refcnts using new_roots */ + ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, + UPDATE_NEW); + if (ret < 0) + goto out; + + qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, + num_bytes, seq); /* - * step 3: walk again from old refs + * Bump qgroup_seq to avoid seq overlap */ - ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn, - node->num_bytes); - if (ret) - goto unlock; - -unlock: + fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; +out: spin_unlock(&fs_info->qgroup_lock); - mutex_unlock(&fs_info->qgroup_rescan_lock); - ulist_free(roots); +out_free: ulist_free(tmp); + ulist_free(qgroups); + ulist_free(old_roots); + ulist_free(new_roots); + return ret; +} +int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_qgroup_extent_record *record; + struct btrfs_delayed_ref_root *delayed_refs; + struct ulist *new_roots = NULL; + struct rb_node *node; + u64 qgroup_to_skip; + int ret = 0; + + delayed_refs = &trans->transaction->delayed_refs; + qgroup_to_skip = delayed_refs->qgroup_to_skip; + while ((node = rb_first(&delayed_refs->dirty_extent_root))) { + record = rb_entry(node, struct btrfs_qgroup_extent_record, + node); + + if (!ret) { + /* + * Use (u64)-1 as time_seq to do special search, which + * doesn't lock tree or delayed_refs and search current + * root. It's safe inside commit_transaction(). + */ + ret = btrfs_find_all_roots(trans, fs_info, + record->bytenr, (u64)-1, &new_roots); + if (ret < 0) + goto cleanup; + if (qgroup_to_skip) + ulist_del(new_roots, qgroup_to_skip, 0); + ret = btrfs_qgroup_account_extent(trans, fs_info, + record->bytenr, record->num_bytes, + record->old_roots, new_roots); + record->old_roots = NULL; + new_roots = NULL; + } +cleanup: + ulist_free(record->old_roots); + ulist_free(new_roots); + new_roots = NULL; + rb_erase(node, &delayed_refs->dirty_extent_root); + kfree(record); + + } return ret; } @@ -1514,6 +1810,10 @@ if (ret) fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + ret = update_qgroup_limit_item(trans, quota_root, qgroup); + if (ret) + fs_info->qgroup_flags |= + BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; spin_lock(&fs_info->qgroup_lock); } if (fs_info->quota_enabled) @@ -1527,9 +1827,12 @@ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; if (!ret && start_rescan_worker) { - ret = btrfs_qgroup_rescan(fs_info); - if (ret) - pr_err("btrfs: start rescan quota failed: %d\n", ret); + ret = qgroup_rescan_init(fs_info, 0, 1); + if (!ret) { + qgroup_rescan_zero_tracking(fs_info); + btrfs_queue_work(fs_info->qgroup_rescan_workers, + &fs_info->qgroup_rescan_work); + } ret = 0; } @@ -1574,6 +1877,11 @@ ret = -EINVAL; goto out; } + + if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) { + ret = -EINVAL; + goto out; + } ++i_qgroups; } } @@ -1585,21 +1893,9 @@ if (ret) goto out; - if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { - ret = update_qgroup_limit_item(trans, quota_root, objectid, - inherit->lim.flags, - inherit->lim.max_rfer, - inherit->lim.max_excl, - inherit->lim.rsv_rfer, - inherit->lim.rsv_excl); - if (ret) - goto out; - } - if (srcid) { struct btrfs_root *srcroot; struct btrfs_key srckey; - int srcroot_level; srckey.objectid = srcid; srckey.type = BTRFS_ROOT_ITEM_KEY; @@ -1611,8 +1907,7 @@ } rcu_read_lock(); - srcroot_level = btrfs_header_level(srcroot->node); - level_size = btrfs_level_size(srcroot, srcroot_level); + level_size = srcroot->nodesize; rcu_read_unlock(); } @@ -1643,14 +1938,46 @@ goto unlock; } + if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { + dstgroup->lim_flags = inherit->lim.flags; + dstgroup->max_rfer = inherit->lim.max_rfer; + dstgroup->max_excl = inherit->lim.max_excl; + dstgroup->rsv_rfer = inherit->lim.rsv_rfer; + dstgroup->rsv_excl = inherit->lim.rsv_excl; + + ret = update_qgroup_limit_item(trans, quota_root, dstgroup); + if (ret) { + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + btrfs_info(fs_info, "unable to update quota limit for %llu", + dstgroup->qgroupid); + goto unlock; + } + } + if (srcid) { srcgroup = find_qgroup_rb(fs_info, srcid); if (!srcgroup) goto unlock; - dstgroup->rfer = srcgroup->rfer - level_size; - dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size; + + /* + * We call inherit after we clone the root in order to make sure + * our counts don't go crazy, so at this point the only + * difference between the two roots should be the root node. + */ + dstgroup->rfer = srcgroup->rfer; + dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; + dstgroup->excl = level_size; + dstgroup->excl_cmpr = level_size; srcgroup->excl = level_size; srcgroup->excl_cmpr = level_size; + + /* inherit the limit info */ + dstgroup->lim_flags = srcgroup->lim_flags; + dstgroup->max_rfer = srcgroup->max_rfer; + dstgroup->max_excl = srcgroup->max_excl; + dstgroup->rsv_rfer = srcgroup->rsv_rfer; + dstgroup->rsv_excl = srcgroup->rsv_excl; + qgroup_dirty(fs_info, dstgroup); qgroup_dirty(fs_info, srcgroup); } @@ -1707,20 +2034,13 @@ return ret; } -/* - * reserve some space for a qgroup and all its parents. The reservation takes - * place with start_transaction or dealloc_reserve, similar to ENOSPC - * accounting. If not enough space is available, EDQUOT is returned. - * We assume that the requested space is new for all qgroups. - */ -int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) +static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes) { struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; struct btrfs_fs_info *fs_info = root->fs_info; u64 ref_root = root->root_key.objectid; int ret = 0; - struct ulist *ulist = NULL; struct ulist_node *unode; struct ulist_iterator uiter; @@ -1743,21 +2063,17 @@ * in a first step, we check all affected qgroups if any limits would * be exceeded */ - ulist = ulist_alloc(GFP_ATOMIC); - if (!ulist) { - ret = -ENOMEM; - goto out; - } - ret = ulist_add(ulist, qgroup->qgroupid, + ulist_reinit(fs_info->qgroup_ulist); + ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); if (ret < 0) goto out; ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(ulist, &uiter))) { + while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { struct btrfs_qgroup *qg; struct btrfs_qgroup_list *glist; - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; + qg = u64_to_ptr(unode->aux); if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && qg->reserved + (s64)qg->rfer + num_bytes > @@ -1774,7 +2090,8 @@ } list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(ulist, glist->group->qgroupid, + ret = ulist_add(fs_info->qgroup_ulist, + glist->group->qgroupid, (uintptr_t)glist->group, GFP_ATOMIC); if (ret < 0) goto out; @@ -1785,30 +2102,26 @@ * no limits exceeded, now record the reservation into all qgroups */ ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(ulist, &uiter))) { + while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { struct btrfs_qgroup *qg; - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; + qg = u64_to_ptr(unode->aux); qg->reserved += num_bytes; } out: spin_unlock(&fs_info->qgroup_lock); - ulist_free(ulist); - return ret; } -void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) +void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, + u64 ref_root, u64 num_bytes) { struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; - struct btrfs_fs_info *fs_info = root->fs_info; - struct ulist *ulist = NULL; struct ulist_node *unode; struct ulist_iterator uiter; - u64 ref_root = root->root_key.objectid; int ret = 0; if (!is_fstree(ref_root)) @@ -1827,26 +2140,23 @@ if (!qgroup) goto out; - ulist = ulist_alloc(GFP_ATOMIC); - if (!ulist) { - btrfs_std_error(fs_info, -ENOMEM); - goto out; - } - ret = ulist_add(ulist, qgroup->qgroupid, + ulist_reinit(fs_info->qgroup_ulist); + ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); if (ret < 0) goto out; ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(ulist, &uiter))) { + while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { struct btrfs_qgroup *qg; struct btrfs_qgroup_list *glist; - qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; + qg = u64_to_ptr(unode->aux); qg->reserved -= num_bytes; list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(ulist, glist->group->qgroupid, + ret = ulist_add(fs_info->qgroup_ulist, + glist->group->qgroupid, (uintptr_t)glist->group, GFP_ATOMIC); if (ret < 0) goto out; @@ -1855,14 +2165,20 @@ out: spin_unlock(&fs_info->qgroup_lock); - ulist_free(ulist); } +static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes) +{ + return btrfs_qgroup_free_refroot(root->fs_info, root->objectid, + num_bytes); +} void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) { if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) return; - pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n", + btrfs_err(trans->root->fs_info, + "qgroups not uptodate in trans handle %p: list is%s empty, " + "seq is %#x.%x", trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", (u32)(trans->delayed_ref_elem.seq >> 32), (u32)trans->delayed_ref_elem.seq); @@ -1871,34 +2187,29 @@ /* * returns < 0 on error, 0 when more leafs are to be scanned. - * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. + * returns 1 when done. */ static int -qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path, - struct btrfs_trans_handle *trans, struct ulist *tmp, - struct extent_buffer *scratch_leaf) +qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct btrfs_trans_handle *trans) { struct btrfs_key found; - struct btrfs_fs_info *fs_info = qscan->fs_info; + struct extent_buffer *scratch_leaf = NULL; struct ulist *roots = NULL; - struct ulist_node *unode; - struct ulist_iterator uiter; - struct seq_list tree_mod_seq_elem = {}; - u64 seq; + struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); + u64 num_bytes; int slot; int ret; - path->leave_spinning = 1; mutex_lock(&fs_info->qgroup_rescan_lock); ret = btrfs_search_slot_for_read(fs_info->extent_root, &fs_info->qgroup_rescan_progress, path, 1, 0); pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", - (unsigned long long)fs_info->qgroup_rescan_progress.objectid, + fs_info->qgroup_rescan_progress.objectid, fs_info->qgroup_rescan_progress.type, - (unsigned long long)fs_info->qgroup_rescan_progress.offset, - ret); + fs_info->qgroup_rescan_progress.offset, ret); if (ret) { /* @@ -1920,86 +2231,44 @@ fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); - memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); + scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); + if (!scratch_leaf) { + ret = -ENOMEM; + mutex_unlock(&fs_info->qgroup_rescan_lock); + goto out; + } + extent_buffer_get(scratch_leaf); + btrfs_tree_read_lock(scratch_leaf); + btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); slot = path->slots[0]; btrfs_release_path(path); mutex_unlock(&fs_info->qgroup_rescan_lock); for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { btrfs_item_key_to_cpu(scratch_leaf, &found, slot); - if (found.type != BTRFS_EXTENT_ITEM_KEY) + if (found.type != BTRFS_EXTENT_ITEM_KEY && + found.type != BTRFS_METADATA_ITEM_KEY) continue; - ret = btrfs_find_all_roots(trans, fs_info, found.objectid, - tree_mod_seq_elem.seq, &roots); + if (found.type == BTRFS_METADATA_ITEM_KEY) + num_bytes = fs_info->extent_root->nodesize; + else + num_bytes = found.offset; + + ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, + &roots); if (ret < 0) goto out; - spin_lock(&fs_info->qgroup_lock); - seq = fs_info->qgroup_seq; - fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ - - ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq); - if (ret) { - spin_unlock(&fs_info->qgroup_lock); - ulist_free(roots); + /* For rescan, just pass old_roots as NULL */ + ret = btrfs_qgroup_account_extent(trans, fs_info, + found.objectid, num_bytes, NULL, roots); + if (ret < 0) goto out; - } - - /* - * step2 of btrfs_qgroup_account_ref works from a single root, - * we're doing all at once here. - */ - ulist_reinit(tmp); - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - struct btrfs_qgroup *qg; - - qg = find_qgroup_rb(fs_info, unode->val); - if (!qg) - continue; - - ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, - GFP_ATOMIC); - if (ret < 0) { - spin_unlock(&fs_info->qgroup_lock); - ulist_free(roots); - goto out; - } - } - - /* this loop is similar to step 2 of btrfs_qgroup_account_ref */ - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(tmp, &uiter))) { - struct btrfs_qgroup *qg; - struct btrfs_qgroup_list *glist; - - qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; - qg->rfer += found.offset; - qg->rfer_cmpr += found.offset; - WARN_ON(qg->tag >= seq); - if (qg->refcnt - seq == roots->nnodes) { - qg->excl += found.offset; - qg->excl_cmpr += found.offset; - } - qgroup_dirty(fs_info, qg); - - list_for_each_entry(glist, &qg->groups, next_group) { - ret = ulist_add(tmp, glist->group->qgroupid, - (uintptr_t)glist->group, - GFP_ATOMIC); - if (ret < 0) { - spin_unlock(&fs_info->qgroup_lock); - ulist_free(roots); - goto out; - } - } - } - - spin_unlock(&fs_info->qgroup_lock); - ulist_free(roots); - ret = 0; } - out: + if (scratch_leaf) { + btrfs_tree_read_unlock_blocking(scratch_leaf); + free_extent_buffer(scratch_leaf); + } btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); return ret; @@ -2007,27 +2276,19 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) { - struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan, - work); + struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, + qgroup_rescan_work); struct btrfs_path *path; struct btrfs_trans_handle *trans = NULL; - struct btrfs_fs_info *fs_info = qscan->fs_info; - struct ulist *tmp = NULL; - struct extent_buffer *scratch_leaf = NULL; int err = -ENOMEM; + int ret = 0; path = btrfs_alloc_path(); if (!path) goto out; - tmp = ulist_alloc(GFP_NOFS); - if (!tmp) - goto out; - scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); - if (!scratch_leaf) - goto out; err = 0; - while (!err) { + while (!err && !btrfs_fs_closing(fs_info)) { trans = btrfs_start_transaction(fs_info->fs_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -2036,8 +2297,7 @@ if (!fs_info->quota_enabled) { err = -EINTR; } else { - err = qgroup_rescan_leaf(qscan, path, trans, - tmp, scratch_leaf); + err = qgroup_rescan_leaf(fs_info, path, trans); } if (err > 0) btrfs_commit_transaction(trans, fs_info->fs_root); @@ -2046,15 +2306,13 @@ } out: - kfree(scratch_leaf); - ulist_free(tmp); btrfs_free_path(path); - kfree(qscan); mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + if (!btrfs_fs_closing(fs_info)) + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; - if (err == 2 && + if (err > 0 && fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; } else if (err < 0) { @@ -2062,53 +2320,106 @@ } mutex_unlock(&fs_info->qgroup_rescan_lock); - if (err >= 0) { - pr_info("btrfs: qgroup scan completed%s\n", - err == 2 ? " (inconsistency flag cleared)" : ""); + /* + * only update status, since the previous part has alreay updated the + * qgroup info. + */ + trans = btrfs_start_transaction(fs_info->quota_root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + btrfs_err(fs_info, + "fail to start transaction for status update: %d\n", + err); + goto done; + } + ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); + if (ret < 0) { + err = ret; + btrfs_err(fs_info, "fail to update qgroup status: %d\n", err); + } + btrfs_end_transaction(trans, fs_info->quota_root); + + if (btrfs_fs_closing(fs_info)) { + btrfs_info(fs_info, "qgroup scan paused"); + } else if (err >= 0) { + btrfs_info(fs_info, "qgroup scan completed%s", + err > 0 ? " (inconsistency flag cleared)" : ""); } else { - pr_err("btrfs: qgroup scan failed with %d\n", err); + btrfs_err(fs_info, "qgroup scan failed with %d", err); } -} -static void -qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan) -{ - memset(&qscan->work, 0, sizeof(qscan->work)); - qscan->work.func = btrfs_qgroup_rescan_worker; - qscan->fs_info = fs_info; - - pr_info("btrfs: qgroup scan started\n"); - btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work); +done: + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_rescan_running = false; + mutex_unlock(&fs_info->qgroup_rescan_lock); + complete_all(&fs_info->qgroup_rescan_completion); } -int -btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) +/* + * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all + * memory required for the rescan context. + */ +static int +qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, + int init_flags) { int ret = 0; - struct rb_node *n; - struct btrfs_qgroup *qgroup; - struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS); - if (!qscan) - return -ENOMEM; + if (!init_flags && + (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || + !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { + ret = -EINVAL; + goto err; + } mutex_lock(&fs_info->qgroup_rescan_lock); spin_lock(&fs_info->qgroup_lock); - if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) - ret = -EINPROGRESS; - else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) - ret = -EINVAL; - if (ret) { - spin_unlock(&fs_info->qgroup_lock); - mutex_unlock(&fs_info->qgroup_rescan_lock); - kfree(qscan); - return ret; + + if (init_flags) { + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) + ret = -EINPROGRESS; + else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) + ret = -EINVAL; + + if (ret) { + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); + goto err; + } + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; } - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; memset(&fs_info->qgroup_rescan_progress, 0, sizeof(fs_info->qgroup_rescan_progress)); + fs_info->qgroup_rescan_progress.objectid = progress_objectid; + init_completion(&fs_info->qgroup_rescan_completion); + fs_info->qgroup_rescan_running = true; + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + memset(&fs_info->qgroup_rescan_work, 0, + sizeof(fs_info->qgroup_rescan_work)); + btrfs_init_work(&fs_info->qgroup_rescan_work, + btrfs_qgroup_rescan_helper, + btrfs_qgroup_rescan_worker, NULL, NULL); + + if (ret) { +err: + btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); + return ret; + } + + return 0; +} + +static void +qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) +{ + struct rb_node *n; + struct btrfs_qgroup *qgroup; + + spin_lock(&fs_info->qgroup_lock); /* clear all current qgroup tracking information */ for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { qgroup = rb_entry(n, struct btrfs_qgroup, node); @@ -2118,9 +2429,267 @@ qgroup->excl_cmpr = 0; } spin_unlock(&fs_info->qgroup_lock); - mutex_unlock(&fs_info->qgroup_rescan_lock); +} - qgroup_rescan_start(fs_info, qscan); +int +btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) +{ + int ret = 0; + struct btrfs_trans_handle *trans; + + ret = qgroup_rescan_init(fs_info, 0, 1); + if (ret) + return ret; + + /* + * We have set the rescan_progress to 0, which means no more + * delayed refs will be accounted by btrfs_qgroup_account_ref. + * However, btrfs_qgroup_account_ref may be right after its call + * to btrfs_find_all_roots, in which case it would still do the + * accounting. + * To solve this, we're committing the transaction, which will + * ensure we run all delayed refs and only after that, we are + * going to clear all tracking information for a clean start. + */ + + trans = btrfs_join_transaction(fs_info->fs_root); + if (IS_ERR(trans)) { + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + return PTR_ERR(trans); + } + ret = btrfs_commit_transaction(trans, fs_info->fs_root); + if (ret) { + fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; + return ret; + } + + qgroup_rescan_zero_tracking(fs_info); + + btrfs_queue_work(fs_info->qgroup_rescan_workers, + &fs_info->qgroup_rescan_work); return 0; } + +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, + bool interruptible) +{ + int running; + int ret = 0; + + mutex_lock(&fs_info->qgroup_rescan_lock); + spin_lock(&fs_info->qgroup_lock); + running = fs_info->qgroup_rescan_running; + spin_unlock(&fs_info->qgroup_lock); + mutex_unlock(&fs_info->qgroup_rescan_lock); + + if (!running) + return 0; + + if (interruptible) + ret = wait_for_completion_interruptible( + &fs_info->qgroup_rescan_completion); + else + wait_for_completion(&fs_info->qgroup_rescan_completion); + + return ret; +} + +/* + * this is only called from open_ctree where we're still single threaded, thus + * locking is omitted here. + */ +void +btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) +{ + if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) + btrfs_queue_work(fs_info->qgroup_rescan_workers, + &fs_info->qgroup_rescan_work); +} + +/* + * Reserve qgroup space for range [start, start + len). + * + * This function will either reserve space from related qgroups or doing + * nothing if the range is already reserved. + * + * Return 0 for successful reserve + * Return <0 for error (including -EQUOT) + * + * NOTE: this function may sleep for memory allocation. + */ +int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_changeset changeset; + struct ulist_node *unode; + struct ulist_iterator uiter; + int ret; + + if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) || + len == 0) + return 0; + + changeset.bytes_changed = 0; + changeset.range_changed = ulist_alloc(GFP_NOFS); + ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, + start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS, + &changeset); + trace_btrfs_qgroup_reserve_data(inode, start, len, + changeset.bytes_changed, + QGROUP_RESERVE); + if (ret < 0) + goto cleanup; + ret = qgroup_reserve(root, changeset.bytes_changed); + if (ret < 0) + goto cleanup; + + ulist_free(changeset.range_changed); + return ret; + +cleanup: + /* cleanup already reserved ranges */ + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(changeset.range_changed, &uiter))) + clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, + unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, + GFP_NOFS); + ulist_free(changeset.range_changed); + return ret; +} + +static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, + int free) +{ + struct extent_changeset changeset; + int trace_op = QGROUP_RELEASE; + int ret; + + changeset.bytes_changed = 0; + changeset.range_changed = ulist_alloc(GFP_NOFS); + if (!changeset.range_changed) + return -ENOMEM; + + ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, + start + len -1, EXTENT_QGROUP_RESERVED, GFP_NOFS, + &changeset); + if (ret < 0) + goto out; + + if (free) { + qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed); + trace_op = QGROUP_FREE; + } + trace_btrfs_qgroup_release_data(inode, start, len, + changeset.bytes_changed, trace_op); +out: + ulist_free(changeset.range_changed); + return ret; +} + +/* + * Free a reserved space range from io_tree and related qgroups + * + * Should be called when a range of pages get invalidated before reaching disk. + * Or for error cleanup case. + * + * For data written to disk, use btrfs_qgroup_release_data(). + * + * NOTE: This function may sleep for memory allocation. + */ +int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len) +{ + return __btrfs_qgroup_release_data(inode, start, len, 1); +} + +/* + * Release a reserved space range from io_tree only. + * + * Should be called when a range of pages get written to disk and corresponding + * FILE_EXTENT is inserted into corresponding root. + * + * Since new qgroup accounting framework will only update qgroup numbers at + * commit_transaction() time, its reserved space shouldn't be freed from + * related qgroups. + * + * But we should release the range from io_tree, to allow further write to be + * COWed. + * + * NOTE: This function may sleep for memory allocation. + */ +int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) +{ + return __btrfs_qgroup_release_data(inode, start, len, 0); +} + +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes) +{ + int ret; + + if (!root->fs_info->quota_enabled || !is_fstree(root->objectid) || + num_bytes == 0) + return 0; + + BUG_ON(num_bytes != round_down(num_bytes, root->nodesize)); + ret = qgroup_reserve(root, num_bytes); + if (ret < 0) + return ret; + atomic_add(num_bytes, &root->qgroup_meta_rsv); + return ret; +} + +void btrfs_qgroup_free_meta_all(struct btrfs_root *root) +{ + int reserved; + + if (!root->fs_info->quota_enabled || !is_fstree(root->objectid)) + return; + + reserved = atomic_xchg(&root->qgroup_meta_rsv, 0); + if (reserved == 0) + return; + qgroup_free(root, reserved); +} + +void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) +{ + if (!root->fs_info->quota_enabled || !is_fstree(root->objectid)) + return; + + BUG_ON(num_bytes != round_down(num_bytes, root->nodesize)); + WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes); + atomic_sub(num_bytes, &root->qgroup_meta_rsv); + qgroup_free(root, num_bytes); +} + +/* + * Check qgroup reserved space leaking, normally at destory inode + * time + */ +void btrfs_qgroup_check_reserved_leak(struct inode *inode) +{ + struct extent_changeset changeset; + struct ulist_node *unode; + struct ulist_iterator iter; + int ret; + + changeset.bytes_changed = 0; + changeset.range_changed = ulist_alloc(GFP_NOFS); + if (WARN_ON(!changeset.range_changed)) + return; + + ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_QGROUP_RESERVED, GFP_NOFS, &changeset); + + WARN_ON(ret < 0); + if (WARN_ON(changeset.bytes_changed)) { + ULIST_ITER_INIT(&iter); + while ((unode = ulist_next(changeset.range_changed, &iter))) { + btrfs_warn(BTRFS_I(inode)->root->fs_info, + "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", + inode->i_ino, unode->val, unode->aux); + } + qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed); + } + ulist_free(changeset.range_changed); +}