--- zzzz-none-000/linux-3.10.107/fs/xfs/xfs_iops.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/xfs/xfs_iops.c 2021-02-04 17:41:59.000000000 +0000 @@ -17,37 +17,47 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_acl.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_bmap_util.h" +#include "xfs_acl.h" +#include "xfs_quota.h" #include "xfs_error.h" -#include "xfs_itable.h" #include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_vnodeops.h" -#include "xfs_inode_item.h" +#include "xfs_trans.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_symlink.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_trans_space.h" +#include "xfs_pnfs.h" #include #include -#include #include #include #include #include +/* + * Directories have different lock order w.r.t. mmap_sem compared to regular + * files. This is due to readdir potentially triggering page faults on a user + * buffer inside filldir(), and this happens with the ilock on the directory + * held. For regular files, the lock order is the other way around - the + * mmap_sem is taken during the page fault, and then we lock the ilock to do + * block mapping. Hence we need a different class for the directory ilock so + * that lockdep can tell them apart. + */ +static struct lock_class_key xfs_nondir_ilock_class; +static struct lock_class_key xfs_dir_ilock_class; + static int xfs_initxattrs( struct inode *inode, @@ -60,7 +70,7 @@ for (xattr = xattr_array; xattr->name != NULL; xattr++) { error = xfs_attr_set(ip, xattr->name, xattr->value, - xattr->value_len, ATTR_SECURE); + xattr->value_len, ATTR_SECURE); if (error < 0) break; } @@ -81,16 +91,18 @@ const struct qstr *qstr) { return security_inode_init_security(inode, dir, qstr, - &xfs_initxattrs, NULL); + &xfs_initxattrs, NULL); } static void xfs_dentry_to_name( struct xfs_name *namep, - struct dentry *dentry) + struct dentry *dentry, + int mode) { namep->name = dentry->d_name.name; namep->len = dentry->d_name.len; + namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT]; } STATIC void @@ -106,22 +118,22 @@ * xfs_init_security we must back out. * ENOSPC can hit here, among other things. */ - xfs_dentry_to_name(&teardown, dentry); + xfs_dentry_to_name(&teardown, dentry, 0); xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); - iput(inode); } STATIC int -xfs_vn_mknod( +xfs_generic_create( struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev) + dev_t rdev, + bool tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; - struct posix_acl *default_acl = NULL; + struct posix_acl *default_acl, *acl; struct xfs_name name; int error; @@ -137,17 +149,16 @@ rdev = 0; } - if (IS_POSIXACL(dir)) { - default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(default_acl)) - return PTR_ERR(default_acl); + error = posix_acl_create(dir, &mode, &default_acl, &acl); + if (error) + return error; - if (!default_acl) - mode &= ~current_umask(); + if (!tmpfile) { + xfs_dentry_to_name(&name, dentry, mode); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + } else { + error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); } - - xfs_dentry_to_name(&name, dentry); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); if (unlikely(error)) goto out_free_acl; @@ -157,22 +168,49 @@ if (unlikely(error)) goto out_cleanup_inode; +#ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = -xfs_inherit_acl(inode, default_acl); - default_acl = NULL; - if (unlikely(error)) + error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + if (error) + goto out_cleanup_inode; + } + if (acl) { + error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + if (error) goto out_cleanup_inode; } +#endif + if (tmpfile) + d_tmpfile(dentry, inode); + else + d_instantiate(dentry, inode); - d_instantiate(dentry, inode); - return -error; + xfs_finish_inode_setup(ip); - out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); out_free_acl: - posix_acl_release(default_acl); - return -error; + if (default_acl) + posix_acl_release(default_acl); + if (acl) + posix_acl_release(acl); + return error; + + out_cleanup_inode: + xfs_finish_inode_setup(ip); + if (!tmpfile) + xfs_cleanup_inode(dir, inode, dentry); + iput(inode); + goto out_free_acl; +} + +STATIC int +xfs_vn_mknod( + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev) +{ + return xfs_generic_create(dir, dentry, mode, rdev, false); } STATIC int @@ -207,11 +245,11 @@ if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&name, dentry); + xfs_dentry_to_name(&name, dentry, 0); error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); + if (unlikely(error != -ENOENT)) + return ERR_PTR(error); d_add(dentry, NULL); return NULL; } @@ -234,11 +272,11 @@ if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); - xfs_dentry_to_name(&xname, dentry); + xfs_dentry_to_name(&xname, dentry, 0); error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); + if (unlikely(error != -ENOENT)) + return ERR_PTR(error); /* * call d_add(dentry, NULL) here when d_drop_negative_children * is called in xfs_vn_mknod (ie. allow negative dentries @@ -265,15 +303,15 @@ struct inode *dir, struct dentry *dentry) { - struct inode *inode = old_dentry->d_inode; + struct inode *inode = d_inode(old_dentry); struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry); + xfs_dentry_to_name(&name, dentry, inode->i_mode); error = xfs_link(XFS_I(dir), XFS_I(inode), &name); if (unlikely(error)) - return -error; + return error; ihold(inode); d_instantiate(dentry, inode); @@ -288,9 +326,9 @@ struct xfs_name name; int error; - xfs_dentry_to_name(&name, dentry); + xfs_dentry_to_name(&name, dentry, 0); - error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); + error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry))); if (error) return error; @@ -318,7 +356,7 @@ mode = S_IFLNK | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); - xfs_dentry_to_name(&name, dentry); + xfs_dentry_to_name(&name, dentry, mode); error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) @@ -331,12 +369,15 @@ goto out_cleanup_inode; d_instantiate(dentry, inode); + xfs_finish_inode_setup(cip); return 0; out_cleanup_inode: + xfs_finish_inode_setup(cip); xfs_cleanup_inode(dir, inode, dentry); + iput(inode); out: - return -error; + return error; } STATIC int @@ -344,18 +385,27 @@ struct inode *odir, struct dentry *odentry, struct inode *ndir, - struct dentry *ndentry) + struct dentry *ndentry, + unsigned int flags) { - struct inode *new_inode = ndentry->d_inode; + struct inode *new_inode = d_inode(ndentry); + int omode = 0; struct xfs_name oname; struct xfs_name nname; - xfs_dentry_to_name(&oname, odentry); - xfs_dentry_to_name(&nname, ndentry); + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) + return -EINVAL; - return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), - XFS_I(ndir), &nname, new_inode ? - XFS_I(new_inode) : NULL); + /* if we are exchanging files, we need to set i_mode of both files */ + if (flags & RENAME_EXCHANGE) + omode = d_inode(ndentry)->i_mode; + + xfs_dentry_to_name(&oname, odentry, omode); + xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode); + + return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)), + XFS_I(ndir), &nname, + new_inode ? XFS_I(new_inode) : NULL, flags); } /* @@ -363,10 +413,10 @@ * we need to be very careful about how much stack we use. * uio is kmalloced for this reason... */ -STATIC void * +STATIC const char * xfs_vn_follow_link( struct dentry *dentry, - struct nameidata *nd) + void **cookie) { char *link; int error = -ENOMEM; @@ -375,30 +425,16 @@ if (!link) goto out_err; - error = -xfs_readlink(XFS_I(dentry->d_inode), link); + error = xfs_readlink(XFS_I(d_inode(dentry)), link); if (unlikely(error)) goto out_kfree; - nd_set_link(nd, link); - return NULL; + return *cookie = link; out_kfree: kfree(link); out_err: - nd_set_link(nd, ERR_PTR(error)); - return NULL; -} - -STATIC void -xfs_vn_put_link( - struct dentry *dentry, - struct nameidata *nd, - void *p) -{ - char *s = nd_get_link(nd); - - if (!IS_ERR(s)) - kfree(s); + return ERR_PTR(error); } STATIC int @@ -407,21 +443,21 @@ struct dentry *dentry, struct kstat *stat) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; trace_xfs_getattr(ip); if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); + return -EIO; stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; stat->mode = ip->i_d.di_mode; stat->nlink = ip->i_d.di_nlink; - stat->uid = ip->i_d.di_uid; - stat->gid = ip->i_d.di_gid; + stat->uid = inode->i_uid; + stat->gid = inode->i_gid; stat->ino = ip->i_ino; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; @@ -457,19 +493,14 @@ static void xfs_setattr_mode( - struct xfs_trans *tp, struct xfs_inode *ip, struct iattr *iattr) { - struct inode *inode = VFS_I(ip); - umode_t mode = iattr->ia_mode; + struct inode *inode = VFS_I(ip); + umode_t mode = iattr->ia_mode; - ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - ip->i_d.di_mode &= S_IFMT; ip->i_d.di_mode |= mode & ~S_IFMT; @@ -477,6 +508,32 @@ inode->i_mode |= mode & ~S_IFMT; } +void +xfs_setattr_time( + struct xfs_inode *ip, + struct iattr *iattr) +{ + struct inode *inode = VFS_I(ip); + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (iattr->ia_valid & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + } + if (iattr->ia_valid & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + } + if (iattr->ia_valid & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + } +} + int xfs_setattr_nonsize( struct xfs_inode *ip, @@ -488,22 +545,25 @@ int mask = iattr->ia_valid; xfs_trans_t *tp; int error; - uid_t uid = 0, iuid = 0; - gid_t gid = 0, igid = 0; + kuid_t uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID; + kgid_t gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID; struct xfs_dquot *udqp = NULL, *gdqp = NULL; struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; trace_xfs_setattr(ip); - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + /* If acls are being inherited, we already have this checked */ + if (!(flags & XFS_ATTR_NOACL)) { + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -EROFS; - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); + error = inode_change_ok(inode, iattr); + if (error) + return error; + } ASSERT((mask & ATTR_SIZE) == 0); @@ -522,13 +582,13 @@ uid = iattr->ia_uid; qflags |= XFS_QMOPT_UQUOTA; } else { - uid = ip->i_d.di_uid; + uid = inode->i_uid; } if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { gid = iattr->ia_gid; qflags |= XFS_QMOPT_GQUOTA; } else { - gid = ip->i_d.di_gid; + gid = inode->i_gid; } /* @@ -538,16 +598,18 @@ */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), - qflags, &udqp, &gdqp); + error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid), + xfs_kgid_to_gid(gid), + xfs_get_projid(ip), + qflags, &udqp, &gdqp, NULL); if (error) return error; } tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) - goto out_dqrele; + goto out_trans_cancel; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -561,8 +623,8 @@ * while we didn't have the inode locked, inode's dquot(s) * would have changed also. */ - iuid = ip->i_d.di_uid; - igid = ip->i_d.di_gid; + iuid = inode->i_uid; + igid = inode->i_gid; gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; @@ -571,14 +633,14 @@ * going to change. */ if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { + ((XFS_IS_UQUOTA_ON(mp) && !uid_eq(iuid, uid)) || + (XFS_IS_GQUOTA_ON(mp) && !gid_eq(igid, gid)))) { ASSERT(tp); error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? + NULL, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (error) /* out of quota */ - goto out_trans_cancel; + goto out_unlock; } } @@ -602,61 +664,42 @@ * Change the ownerships and register quota modifications * in the transaction. */ - if (iuid != uid) { + if (!uid_eq(iuid, uid)) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } - ip->i_d.di_uid = uid; + ip->i_d.di_uid = xfs_kuid_to_uid(uid); inode->i_uid = uid; } - if (igid != gid) { + if (!gid_eq(igid, gid)) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || + !XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - ip->i_d.di_gid = gid; + ip->i_d.di_gid = xfs_kgid_to_gid(gid); inode->i_gid = gid; } } - /* - * Change file access modes. - */ if (mask & ATTR_MODE) - xfs_setattr_mode(tp, ip, iattr); - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - } + xfs_setattr_mode(ip, iattr); + if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -669,7 +712,7 @@ xfs_qm_dqrele(gdqp); if (error) - return XFS_ERROR(error); + return error; /* * XXX(hch): Updating the ACL entries is not atomic vs the i_mode @@ -679,17 +722,17 @@ * Posix ACL code seems to care about this issue either. */ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = -xfs_acl_chmod(inode); + error = posix_acl_chmod(inode, inode->i_mode); if (error) - return XFS_ERROR(error); + return error; } return 0; -out_trans_cancel: - xfs_trans_cancel(tp, 0); +out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_dqrele: +out_trans_cancel: + xfs_trans_cancel(tp); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return error; @@ -701,38 +744,33 @@ int xfs_setattr_size( struct xfs_inode *ip, - struct iattr *iattr, - int flags) + struct iattr *iattr) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; xfs_off_t oldsize, newsize; struct xfs_trans *tp; int error; uint lock_flags = 0; - uint commit_flags = 0; + bool did_zeroing = false; trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + return -EIO; - error = -inode_change_ok(inode, iattr); + error = inode_change_ok(inode, iattr); if (error) - return XFS_ERROR(error); + return error; + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - - if (!(flags & XFS_ATTR_NOLOCK)) { - lock_flags |= XFS_IOLOCK_EXCL; - xfs_ilock(ip, lock_flags); - } + ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; @@ -741,13 +779,12 @@ * Short circuit the truncate case for zero length files. */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) - goto out_unlock; + if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) + return 0; /* * Use the regular setattr path to update the timestamps. */ - xfs_iunlock(ip, lock_flags); iattr->ia_valid &= ~ATTR_SIZE; return xfs_setattr_nonsize(ip, iattr, 0); } @@ -757,25 +794,21 @@ */ error = xfs_qm_dqattach(ip, 0); if (error) - goto out_unlock; + return error; /* - * Now we can make the changes. Before we join the inode to the - * transaction, take care of the part of the truncation that must be - * done without the inode lock. This needs to be done before joining - * the inode to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. + * File data changes must be complete before we start the transaction to + * modify the inode. This needs to be done before joining the inode to + * the transaction because the inode cannot be unlocked once it is a + * part of the transaction. + * + * Start with zeroing any data block beyond EOF that we may expose on + * file extension. */ if (newsize > oldsize) { - /* - * Do the first part of growing a file: zero any data in the - * last block that is beyond the old EOF. We need to do this - * before the inode is joined to the transaction to modify - * i_size. - */ - error = xfs_zero_eof(ip, newsize, oldsize); + error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); if (error) - goto out_unlock; + return error; } /* @@ -783,43 +816,53 @@ * any previous writes that are beyond the on disk EOF and the new * EOF that have not been written out need to be written here. If we * do not write the data out, we expose ourselves to the null files - * problem. - * - * Only flush from the on disk size to the smaller of the in memory - * file size or the new size as that's the range we really care about - * here and prevents waiting for other data not within the range we - * care about here. + * problem. Note that this includes any block zeroing we did above; + * otherwise those blocks may not be zeroed after a crash. */ - if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { - error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + if (newsize > ip->i_d.di_size && + (oldsize != ip->i_d.di_size || did_zeroing)) { + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) - goto out_unlock; + return error; } - /* - * Wait for all direct I/O to complete. - */ + /* Now wait for all direct I/O to complete. */ inode_dio_wait(inode); - error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); + /* + * We've already locked out new page faults, so now we can safely remove + * pages from the page cache knowing they won't get refaulted until we + * drop the XFS_MMAP_EXCL lock after the extent manipulations are + * complete. The truncate_setsize() call also cleans partial EOF page + * PTEs on extending truncates and hence ensures sub-page block size + * filesystems are correctly handled, too. + * + * We have to do all the page cache truncate work outside the + * transaction context as the "lock" order is page lock->log space + * reservation as defined by extent allocation in the writeback path. + * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but + * having already truncated the in-memory version of the file (i.e. made + * user visible changes). There's not much we can do about this, except + * to hope that the caller sees ENOMEM and retries the truncate + * operation. + */ + if (IS_DAX(inode)) + error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct); + else + error = block_truncate_page(inode->i_mapping, newsize, + xfs_get_blocks); if (error) - goto out_unlock; + return error; + truncate_setsize(inode, newsize); tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) goto out_trans_cancel; - truncate_setsize(inode, newsize); - - commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); /* @@ -832,10 +875,11 @@ * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ - if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + if (newsize != oldsize && + !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; + iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } /* @@ -856,7 +900,7 @@ if (newsize <= oldsize) { error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); if (error) - goto out_trans_abort; + goto out_trans_cancel; /* * Truncated "down", so we're removing references to old data @@ -871,51 +915,54 @@ xfs_inode_clear_eofblocks_tag(ip); } - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) - xfs_setattr_mode(tp, ip, iattr); - - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - } + if (iattr->ia_valid & ATTR_MODE) + xfs_setattr_mode(ip, iattr); + if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - XFS_STATS_INC(xs_ig_attrchg); + XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); return error; -out_trans_abort: - commit_flags |= XFS_TRANS_ABORT; out_trans_cancel: - xfs_trans_cancel(tp, commit_flags); + xfs_trans_cancel(tp); goto out_unlock; } STATIC int xfs_vn_setattr( - struct dentry *dentry, - struct iattr *iattr) + struct dentry *dentry, + struct iattr *iattr) { - if (iattr->ia_valid & ATTR_SIZE) - return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); - return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); + struct xfs_inode *ip = XFS_I(d_inode(dentry)); + int error; + + if (iattr->ia_valid & ATTR_SIZE) { + uint iolock = XFS_IOLOCK_EXCL; + + xfs_ilock(ip, iolock); + error = xfs_break_layouts(d_inode(dentry), &iolock, true); + if (!error) { + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + iolock |= XFS_MMAPLOCK_EXCL; + + error = xfs_setattr_size(ip, iattr); + } + xfs_iunlock(ip, iolock); + } else { + error = xfs_setattr_nonsize(ip, iattr, 0); + } + + return error; } STATIC int @@ -932,10 +979,10 @@ trace_xfs_update_time(ip); tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); if (error) { - xfs_trans_cancel(tp, 0); - return -error; + xfs_trans_cancel(tp); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -956,7 +1003,7 @@ } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); - return -xfs_trans_commit(tp, 0); + return xfs_trans_commit(tp); } #define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) @@ -987,7 +1034,8 @@ if (bmv->bmv_oflags & BMV_OF_PREALLOC) fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { - fiemap_flags |= FIEMAP_EXTENT_DELALLOC; + fiemap_flags |= (FIEMAP_EXTENT_DELALLOC | + FIEMAP_EXTENT_UNKNOWN); physical = 0; /* no block yet */ } if (bmv->bmv_oflags & BMV_OF_LAST) @@ -1000,7 +1048,7 @@ *full = 1; /* user array now full */ } - return -error; + return error; } STATIC int @@ -1019,12 +1067,12 @@ return error; /* Set up bmap header for xfs internal routine */ - bm.bmv_offset = BTOBB(start); + bm.bmv_offset = BTOBBT(start); /* Special case for whole file */ if (length == FIEMAP_MAX_OFFSET) bm.bmv_length = -1LL; else - bm.bmv_length = BTOBB(length); + bm.bmv_length = BTOBB(start + length) - bm.bmv_offset; /* We add one because in getbmap world count includes the header */ bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : @@ -1039,13 +1087,23 @@ error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); if (error) - return -error; + return error; return 0; } +STATIC int +xfs_vn_tmpfile( + struct inode *dir, + struct dentry *dentry, + umode_t mode) +{ + return xfs_generic_create(dir, dentry, mode, 0, true); +} + static const struct inode_operations xfs_inode_operations = { .get_acl = xfs_get_acl, + .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1071,8 +1129,9 @@ */ .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, + .rename2 = xfs_vn_rename, .get_acl = xfs_get_acl, + .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1080,6 +1139,7 @@ .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, + .tmpfile = xfs_vn_tmpfile, }; static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1097,8 +1157,9 @@ */ .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, + .rename2 = xfs_vn_rename, .get_acl = xfs_get_acl, + .set_acl = xfs_set_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1106,13 +1167,13 @@ .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .update_time = xfs_vn_update_time, + .tmpfile = xfs_vn_tmpfile, }; static const struct inode_operations xfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = xfs_vn_follow_link, - .put_link = xfs_vn_put_link, - .get_acl = xfs_get_acl, + .put_link = kfree_put_link, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1127,41 +1188,38 @@ struct inode *inode, struct xfs_inode *ip) { - if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + uint16_t flags = ip->i_d.di_flags; + + inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC | + S_NOATIME | S_DAX); + + if (flags & XFS_DIFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + if (flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) + if (flags & XFS_DIFLAG_SYNC) inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) + if (flags & XFS_DIFLAG_NOATIME) inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; + /* XXX: Also needs an on-disk per inode flag! */ + if (ip->i_mount->m_flags & XFS_MOUNT_DAX) + inode->i_flags |= S_DAX; } /* - * Initialize the Linux inode, set up the operation vectors and - * unlock the inode. - * - * When reading existing inodes from disk this is called directly - * from xfs_iget, when creating a new inode it is called from - * xfs_ialloc after setting up the inode. + * Initialize the Linux inode and set up the operation vectors. * - * We are always called with an uninitialised linux inode here. - * We need to initialise the necessary fields and take a reference - * on it. + * When reading existing inodes from disk this is called directly from xfs_iget, + * when creating a new inode it is called from xfs_ialloc after setting up the + * inode. These callers have different criteria for clearing XFS_INEW, so leave + * it up to the caller to deal with unlocking the inode appropriately. */ void xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; + gfp_t gfp_mask; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; @@ -1172,8 +1230,8 @@ inode->i_mode = ip->i_d.di_mode; set_nlink(inode, ip->i_d.di_nlink); - inode->i_uid = ip->i_d.di_uid; - inode->i_gid = ip->i_d.di_gid; + inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid); + inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid); switch (inode->i_mode & S_IFMT) { case S_IFBLK: @@ -1197,6 +1255,8 @@ inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); + ip->d_ops = ip->i_mount->m_nondir_inode_ops; + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; @@ -1204,11 +1264,13 @@ inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: + lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) inode->i_op = &xfs_dir_ci_inode_operations; else inode->i_op = &xfs_dir_inode_operations; inode->i_fop = &xfs_dir_file_operations; + ip->d_ops = ip->i_mount->m_dir_inode_ops; break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; @@ -1222,6 +1284,14 @@ } /* + * Ensure all page cache allocations are done from GFP_NOFS context to + * prevent direct reclaim recursion back into the filesystem and blowing + * stacks or deadlocking. + */ + gfp_mask = mapping_gfp_mask(inode->i_mapping); + mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + + /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ @@ -1229,9 +1299,4 @@ inode_has_no_xattr(inode); cache_no_acl(inode); } - - xfs_iflags_clear(ip, XFS_INEW); - barrier(); - - unlock_new_inode(inode); }