--- zzzz-none-000/linux-3.10.107/fs/ceph/xattr.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/ceph/xattr.c 2021-02-04 17:41:59.000000000 +0000 @@ -1,4 +1,5 @@ #include +#include #include "super.h" #include "mds_client.h" @@ -6,16 +7,33 @@ #include #include +#include #include #define XATTR_CEPH_PREFIX "ceph." #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1) +static int __remove_xattr(struct ceph_inode_info *ci, + struct ceph_inode_xattr *xattr); + +/* + * List of handlers for synthetic system.* attributes. Other + * attributes are handled directly. + */ +const struct xattr_handler *ceph_xattr_handlers[] = { +#ifdef CONFIG_CEPH_FS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, +#endif + NULL, +}; + static bool ceph_is_valid_xattr(const char *name) { return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) || !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) || + !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } @@ -47,32 +65,48 @@ } static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, - size_t size) + size_t size) { int ret; struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); struct ceph_osd_client *osdc = &fsc->client->osdc; s64 pool = ceph_file_layout_pg_pool(ci->i_layout); const char *pool_name; + char buf[128]; dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); down_read(&osdc->map_sem); pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); - if (pool_name) - ret = snprintf(val, size, - "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", + if (pool_name) { + size_t len = strlen(pool_name); + ret = snprintf(buf, sizeof(buf), + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=", (unsigned long long)ceph_file_layout_su(ci->i_layout), (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), - (unsigned long long)ceph_file_layout_object_size(ci->i_layout), - pool_name); - else - ret = snprintf(val, size, + (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); + if (!size) { + ret += len; + } else if (ret + len > size) { + ret = -ERANGE; + } else { + memcpy(val, buf, ret); + memcpy(val + ret, pool_name, len); + ret += len; + } + } else { + ret = snprintf(buf, sizeof(buf), "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", (unsigned long long)ceph_file_layout_su(ci->i_layout), (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), (unsigned long long)ceph_file_layout_object_size(ci->i_layout), (unsigned long long)pool); - + if (size) { + if (ret <= size) + memcpy(val, buf, ret); + else + ret = -ERANGE; + } + } up_read(&osdc->map_sem); return ret; } @@ -198,7 +232,7 @@ .name_size = sizeof("ceph.dir.layout"), .getxattr_cb = ceph_vxattrcb_layout, .readonly = false, - .hidden = false, + .hidden = true, .exists_cb = ceph_vxattrcb_layout_exists, }, XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), @@ -225,7 +259,7 @@ .name_size = sizeof("ceph.file.layout"), .getxattr_cb = ceph_vxattrcb_layout, .readonly = false, - .hidden = false, + .hidden = true, .exists_cb = ceph_vxattrcb_layout_exists, }, XATTR_LAYOUT_FIELD(file, layout, stripe_unit), @@ -251,8 +285,7 @@ return ceph_dir_vxattrs_name_size; if (vxattrs == ceph_file_vxattrs) return ceph_file_vxattrs_name_size; - BUG(); - + BUG_ON(vxattrs); return 0; } @@ -305,8 +338,7 @@ static int __set_xattr(struct ceph_inode_info *ci, const char *name, int name_len, const char *val, int val_len, - int dirty, - int should_free_name, int should_free_val, + int flags, int update_xattr, struct ceph_inode_xattr **newxattr) { struct rb_node **p; @@ -335,12 +367,31 @@ xattr = NULL; } + if (update_xattr) { + int err = 0; + if (xattr && (flags & XATTR_CREATE)) + err = -EEXIST; + else if (!xattr && (flags & XATTR_REPLACE)) + err = -ENODATA; + if (err) { + kfree(name); + kfree(val); + return err; + } + if (update_xattr < 0) { + if (xattr) + __remove_xattr(ci, xattr); + kfree(name); + return 0; + } + } + if (!xattr) { new = 1; xattr = *newxattr; xattr->name = name; xattr->name_len = name_len; - xattr->should_free_name = should_free_name; + xattr->should_free_name = update_xattr; ci->i_xattrs.count++; dout("__set_xattr count=%d\n", ci->i_xattrs.count); @@ -350,7 +401,7 @@ if (xattr->should_free_val) kfree((void *)xattr->val); - if (should_free_name) { + if (update_xattr) { kfree((void *)name); name = xattr->name; } @@ -365,8 +416,8 @@ xattr->val = ""; xattr->val_len = val_len; - xattr->dirty = dirty; - xattr->should_free_val = (val && should_free_val); + xattr->dirty = update_xattr; + xattr->should_free_val = (val && update_xattr); if (new) { rb_link_node(&xattr->node, parent, p); @@ -428,7 +479,7 @@ struct ceph_inode_xattr *xattr) { if (!xattr) - return -EOPNOTSUPP; + return -ENODATA; rb_erase(&xattr->node, &ci->i_xattrs.index); @@ -541,12 +592,12 @@ xattr_version = ci->i_xattrs.version; spin_unlock(&ci->i_ceph_lock); - xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), + xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *), GFP_NOFS); err = -ENOMEM; if (!xattrs) goto bad_lock; - memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *)); + for (i = 0; i < numattr; i++) { xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS); @@ -574,7 +625,7 @@ p += len; err = __set_xattr(ci, name, namelen, val, len, - 0, 0, 0, &xattrs[numattr]); + 0, 0, &xattrs[numattr]); if (err < 0) goto bad; @@ -663,10 +714,9 @@ } } -ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, +ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, size_t size) { - struct inode *inode = dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); int err; struct ceph_inode_xattr *xattr; @@ -675,7 +725,6 @@ if (!ceph_is_valid_xattr(name)) return -ENODATA; - /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { @@ -687,24 +736,20 @@ dout("getxattr %p ver=%lld index_ver=%lld\n", inode, ci->i_xattrs.version, ci->i_xattrs.index_version); - if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && - (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { - goto get_xattr; - } else { + if (ci->i_xattrs.version == 0 || + !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { spin_unlock(&ci->i_ceph_lock); /* get xattrs from mds (if we don't already have them) */ - err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); + err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); if (err) return err; + spin_lock(&ci->i_ceph_lock); } - spin_lock(&ci->i_ceph_lock); - err = __build_xattrs(inode); if (err < 0) goto out; -get_xattr: err = -ENODATA; /* == ENOATTR */ xattr = __get_xattr(ci, name); if (!xattr) @@ -725,9 +770,18 @@ return err; } +ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, + size_t size) +{ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_getxattr(dentry, name, value, size); + + return __ceph_getxattr(d_inode(dentry), name, value, size); +} + ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode); u32 vir_namelen = 0; @@ -740,23 +794,18 @@ dout("listxattr %p ver=%lld index_ver=%lld\n", inode, ci->i_xattrs.version, ci->i_xattrs.index_version); - if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && - (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { - goto list_xattr; - } else { + if (ci->i_xattrs.version == 0 || + !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { spin_unlock(&ci->i_ceph_lock); - err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); + err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); if (err) return err; + spin_lock(&ci->i_ceph_lock); } - spin_lock(&ci->i_ceph_lock); - err = __build_xattrs(inode); if (err < 0) goto out; - -list_xattr: /* * Start with virtual dir xattr names (if any) (including * terminating '\0' characters for each). @@ -798,33 +847,25 @@ const char *value, size_t size, int flags) { struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct ceph_inode_info *ci = ceph_inode(inode); - struct inode *parent_inode; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_pagelist *pagelist = NULL; int err; - int i, nr_pages; - struct page **pages = NULL; - void *kaddr; - - /* copy value into some pages */ - nr_pages = calc_pages_for(0, size); - if (nr_pages) { - pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS); - if (!pages) + + if (size > 0) { + /* copy value into pagelist */ + pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS); + if (!pagelist) return -ENOMEM; - err = -ENOMEM; - for (i = 0; i < nr_pages; i++) { - pages[i] = __page_cache_alloc(GFP_NOFS); - if (!pages[i]) { - nr_pages = i; - goto out; - } - kaddr = kmap(pages[i]); - memcpy(kaddr, value + i*PAGE_CACHE_SIZE, - min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE)); - } + + ceph_pagelist_init(pagelist); + err = ceph_pagelist_append(pagelist, value, size); + if (err) + goto out; + } else if (!value) { + flags |= CEPH_XATTR_REMOVE; } dout("setxattr value=%.*s\n", (int)size, value); @@ -836,51 +877,52 @@ err = PTR_ERR(req); goto out; } - req->r_inode = inode; - ihold(inode); - req->r_inode_drop = CEPH_CAP_XATTR_SHARED; - req->r_num_caps = 1; + req->r_args.setxattr.flags = cpu_to_le32(flags); req->r_path2 = kstrdup(name, GFP_NOFS); + if (!req->r_path2) { + ceph_mdsc_put_request(req); + err = -ENOMEM; + goto out; + } - req->r_pages = pages; - req->r_num_pages = nr_pages; - req->r_data_len = size; + req->r_pagelist = pagelist; + pagelist = NULL; + + req->r_inode = inode; + ihold(inode); + req->r_num_caps = 1; + req->r_inode_drop = CEPH_CAP_XATTR_SHARED; dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); - parent_inode = ceph_get_dentry_parent_inode(dentry); - err = ceph_mdsc_do_request(mdsc, parent_inode, req); - iput(parent_inode); + err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); out: - if (pages) { - for (i = 0; i < nr_pages; i++) - __free_page(pages[i]); - kfree(pages); - } + if (pagelist) + ceph_pagelist_release(pagelist); return err; } -int ceph_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int __ceph_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; - int dirty; + int dirty = 0; int name_len = strlen(name); int val_len = size; char *newname = NULL; char *newval = NULL; struct ceph_inode_xattr *xattr = NULL; int required_blob_size; - - if (ceph_snap(inode) != CEPH_NOSNAP) - return -EROFS; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -909,12 +951,27 @@ if (!xattr) goto out; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + goto out; + spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); - dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); - if (!(issued & CEPH_CAP_XATTR_EXCL)) + if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + + dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, name_len, val_len); @@ -927,7 +984,7 @@ dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -935,35 +992,58 @@ goto retry; } - err = __set_xattr(ci, newname, name_len, newval, - val_len, 1, 1, 1, &xattr); + err = __set_xattr(ci, newname, name_len, newval, val_len, + flags, value ? 1 : -1, &xattr); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); - ci->i_xattrs.dirty = true; - inode->i_ctime = CURRENT_TIME; + if (!err) { + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); + ci->i_xattrs.dirty = true; + inode->i_ctime = CURRENT_TIME; + } spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); err = ceph_sync_setxattr(dentry, name, value, size, flags); out: + ceph_free_cap_flush(prealloc_cf); kfree(newname); kfree(newval); kfree(xattr); return err; } +int ceph_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) + return -EROFS; + + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_setxattr(dentry, name, value, size, flags); + + if (size == 0) + value = ""; /* empty EA, do not remove */ + + return __ceph_setxattr(dentry, name, value, size, flags); +} + static int ceph_send_removexattr(struct dentry *dentry, const char *name) { struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct inode *inode = dentry->d_inode; - struct inode *parent_inode; + struct inode *inode = d_inode(dentry); struct ceph_mds_request *req; int err; @@ -971,31 +1051,31 @@ USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); + req->r_path2 = kstrdup(name, GFP_NOFS); + if (!req->r_path2) + return -ENOMEM; + req->r_inode = inode; ihold(inode); - req->r_inode_drop = CEPH_CAP_XATTR_SHARED; req->r_num_caps = 1; - req->r_path2 = kstrdup(name, GFP_NOFS); - - parent_inode = ceph_get_dentry_parent_inode(dentry); - err = ceph_mdsc_do_request(mdsc, parent_inode, req); - iput(parent_inode); + req->r_inode_drop = CEPH_CAP_XATTR_SHARED; + err = ceph_mdsc_do_request(mdsc, NULL, req); ceph_mdsc_put_request(req); return err; } -int ceph_removexattr(struct dentry *dentry, const char *name) +int __ceph_removexattr(struct dentry *dentry, const char *name) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct ceph_vxattr *vxattr; struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; + struct ceph_cap_flush *prealloc_cf = NULL; int issued; int err; int required_blob_size; int dirty; - - if (ceph_snap(inode) != CEPH_NOSNAP) - return -EROFS; + bool lock_snap_rwsem = false; if (!ceph_is_valid_xattr(name)) return -EOPNOTSUPP; @@ -1008,14 +1088,29 @@ if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) goto do_sync_unlocked; + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: issued = __ceph_caps_issued(ci, NULL); + if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) + goto do_sync; + + if (!lock_snap_rwsem && !ci->i_head_snapc) { + lock_snap_rwsem = true; + if (!down_read_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&ci->i_ceph_lock); + down_read(&mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + goto retry; + } + } + dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); - if (!(issued & CEPH_CAP_XATTR_EXCL)) - goto do_sync; __build_xattrs(inode); required_blob_size = __get_required_blob_size(ci, 0, 0); @@ -1028,7 +1123,7 @@ dout(" preaallocating new blob size=%d\n", required_blob_size); blob = ceph_buffer_new(required_blob_size, GFP_NOFS); if (!blob) - goto out; + goto do_sync_unlocked; spin_lock(&ci->i_ceph_lock); if (ci->i_xattrs.prealloc_blob) ceph_buffer_put(ci->i_xattrs.prealloc_blob); @@ -1038,18 +1133,34 @@ err = __remove_xattr_by_name(ceph_inode(inode), name); - dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL, + &prealloc_cf); ci->i_xattrs.dirty = true; inode->i_ctime = CURRENT_TIME; spin_unlock(&ci->i_ceph_lock); + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); if (dirty) __mark_inode_dirty(inode, dirty); + ceph_free_cap_flush(prealloc_cf); return err; do_sync: spin_unlock(&ci->i_ceph_lock); do_sync_unlocked: + if (lock_snap_rwsem) + up_read(&mdsc->snap_rwsem); + ceph_free_cap_flush(prealloc_cf); err = ceph_send_removexattr(dentry, name); -out: return err; } +int ceph_removexattr(struct dentry *dentry, const char *name) +{ + if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP) + return -EROFS; + + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_removexattr(dentry, name); + + return __ceph_removexattr(dentry, name); +}