--- zzzz-none-000/linux-3.10.107/fs/exportfs/expfs.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/exportfs/expfs.c 2021-02-04 17:41:59.000000000 +0000 @@ -69,152 +69,170 @@ return NULL; } -/* - * Find root of a disconnected subtree and return a reference to it. - */ -static struct dentry * -find_disconnected_root(struct dentry *dentry) +static bool dentry_connected(struct dentry *dentry) { dget(dentry); - while (!IS_ROOT(dentry)) { + while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); - if (!(parent->d_flags & DCACHE_DISCONNECTED)) { + dput(dentry); + if (IS_ROOT(dentry)) { dput(parent); - break; + return false; } + dentry = parent; + } + dput(dentry); + return true; +} + +static void clear_disconnected(struct dentry *dentry) +{ + dget(dentry); + while (dentry->d_flags & DCACHE_DISCONNECTED) { + struct dentry *parent = dget_parent(dentry); + + WARN_ON_ONCE(IS_ROOT(dentry)); + + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_DISCONNECTED; + spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } - return dentry; + dput(dentry); +} + +/* + * Reconnect a directory dentry with its parent. + * + * This can return a dentry, or NULL, or an error. + * + * In the first case the returned dentry is the parent of the given + * dentry, and may itself need to be reconnected to its parent. + * + * In the NULL case, a concurrent VFS operation has either renamed or + * removed this directory. The concurrent operation has reconnected our + * dentry, so we no longer need to. + */ +static struct dentry *reconnect_one(struct vfsmount *mnt, + struct dentry *dentry, char *nbuf) +{ + struct dentry *parent; + struct dentry *tmp; + int err; + + parent = ERR_PTR(-EACCES); + mutex_lock(&dentry->d_inode->i_mutex); + if (mnt->mnt_sb->s_export_op->get_parent) + parent = mnt->mnt_sb->s_export_op->get_parent(dentry); + mutex_unlock(&dentry->d_inode->i_mutex); + + if (IS_ERR(parent)) { + dprintk("%s: get_parent of %ld failed, err %d\n", + __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); + return parent; + } + + dprintk("%s: find name of %lu in %lu\n", __func__, + dentry->d_inode->i_ino, parent->d_inode->i_ino); + err = exportfs_get_name(mnt, parent, nbuf, dentry); + if (err == -ENOENT) + goto out_reconnected; + if (err) + goto out_err; + dprintk("%s: found name: %s\n", __func__, nbuf); + mutex_lock(&parent->d_inode->i_mutex); + tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); + mutex_unlock(&parent->d_inode->i_mutex); + if (IS_ERR(tmp)) { + dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + goto out_err; + } + if (tmp != dentry) { + dput(tmp); + goto out_reconnected; + } + dput(tmp); + if (IS_ROOT(dentry)) { + err = -ESTALE; + goto out_err; + } + return parent; + +out_err: + dput(parent); + return ERR_PTR(err); +out_reconnected: + dput(parent); + /* + * Someone must have renamed our entry into another parent, in + * which case it has been reconnected by the rename. + * + * Or someone removed it entirely, in which case filehandle + * lookup will succeed but the directory is now IS_DEAD and + * subsequent operations on it will fail. + * + * Alternatively, maybe there was no race at all, and the + * filesystem is just corrupt and gave us a parent that doesn't + * actually contain any entry pointing to this inode. So, + * double check that this worked and return -ESTALE if not: + */ + if (!dentry_connected(dentry)) + return ERR_PTR(-ESTALE); + return NULL; } /* * Make sure target_dir is fully connected to the dentry tree. * - * It may already be, as the flag isn't always updated when connection happens. + * On successful return, DCACHE_DISCONNECTED will be cleared on + * target_dir, and target_dir->d_parent->...->d_parent will reach the + * root of the filesystem. + * + * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. + * But the converse is not true: target_dir may have DCACHE_DISCONNECTED + * set but already be connected. In that case we'll verify the + * connection to root and then clear the flag. + * + * Note that target_dir could be removed by a concurrent operation. In + * that case reconnect_path may still succeed with target_dir fully + * connected, but further operations using the filehandle will fail when + * necessary (due to S_DEAD being set on the directory). */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { - int noprogress = 0; - int err = -ESTALE; + struct dentry *dentry, *parent; - /* - * It is possible that a confused file system might not let us complete - * the path to the root. For example, if get_parent returns a directory - * in which we cannot find a name for the child. While this implies a - * very sick filesystem we don't want it to cause knfsd to spin. Hence - * the noprogress counter. If we go through the loop 10 times (2 is - * probably enough) without getting anywhere, we just give up - */ - while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { - struct dentry *pd = find_disconnected_root(target_dir); + dentry = dget(target_dir); - if (!IS_ROOT(pd)) { - /* must have found a connected parent - great */ - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else if (pd == mnt->mnt_sb->s_root) { - printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); - spin_lock(&pd->d_lock); - pd->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&pd->d_lock); - noprogress = 0; - } else { - /* - * We have hit the top of a disconnected path, try to - * find parent and connect. - * - * Racing with some other process renaming a directory - * isn't much of a problem here. If someone renames - * the directory, it will end up properly connected, - * which is what we want - * - * Getting the parent can't be supported generically, - * the locking is too icky. - * - * Instead we just return EACCES. If server reboots - * or inodes get flushed, you lose - */ - struct dentry *ppd = ERR_PTR(-EACCES); - struct dentry *npd; - - mutex_lock(&pd->d_inode->i_mutex); - if (mnt->mnt_sb->s_export_op->get_parent) - ppd = mnt->mnt_sb->s_export_op->get_parent(pd); - mutex_unlock(&pd->d_inode->i_mutex); - - if (IS_ERR(ppd)) { - err = PTR_ERR(ppd); - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, pd->d_inode->i_ino, err); - dput(pd); - break; - } - - dprintk("%s: find name of %lu in %lu\n", __func__, - pd->d_inode->i_ino, ppd->d_inode->i_ino); - err = exportfs_get_name(mnt, ppd, nbuf, pd); - if (err) { - dput(ppd); - dput(pd); - if (err == -ENOENT) - /* some race between get_parent and - * get_name? just try again - */ - continue; - break; - } - dprintk("%s: found name: %s\n", __func__, nbuf); - mutex_lock(&ppd->d_inode->i_mutex); - npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); - mutex_unlock(&ppd->d_inode->i_mutex); - if (IS_ERR(npd)) { - err = PTR_ERR(npd); - dprintk("%s: lookup failed: %d\n", - __func__, err); - dput(ppd); - dput(pd); - break; - } - /* we didn't really want npd, we really wanted - * a side-effect of the lookup. - * hopefully, npd == pd, though it isn't really - * a problem if it isn't - */ - if (npd == pd) - noprogress = 0; - else - printk("%s: npd != pd\n", __func__); - dput(npd); - dput(ppd); - if (IS_ROOT(pd)) { - /* something went wrong, we have to give up */ - dput(pd); - break; - } - } - dput(pd); - } + while (dentry->d_flags & DCACHE_DISCONNECTED) { + BUG_ON(dentry == mnt->mnt_sb->s_root); - if (target_dir->d_flags & DCACHE_DISCONNECTED) { - /* something went wrong - oh-well */ - if (!err) - err = -ESTALE; - return err; - } + if (IS_ROOT(dentry)) + parent = reconnect_one(mnt, dentry, nbuf); + else + parent = dget_parent(dentry); + if (!parent) + break; + dput(dentry); + if (IS_ERR(parent)) + return PTR_ERR(parent); + dentry = parent; + } + dput(dentry); + clear_disconnected(target_dir); return 0; } struct getdents_callback { + struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ - unsigned long ino; /* the inum we are looking for */ + u64 ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; @@ -223,14 +241,15 @@ * A rather strange filldir function to capture * the name matching the specified inode number. */ -static int filldir_one(void * __buf, const char * name, int len, +static int filldir_one(struct dir_context *ctx, const char *name, int len, loff_t pos, u64 ino, unsigned int d_type) { - struct getdents_callback *buf = __buf; + struct getdents_callback *buf = + container_of(ctx, struct getdents_callback, ctx); int result = 0; buf->sequence++; - if (buf->ino == ino) { + if (buf->ino == ino && len <= NAME_MAX) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; @@ -241,7 +260,7 @@ /** * get_name - default export_operations->get_name function - * @dentry: the directory in which to find a name + * @path: the directory in which to find a name * @name: a pointer to a %NAME_MAX+1 char buffer to store the name * @child: the dentry for the child directory. * @@ -254,7 +273,15 @@ struct inode *dir = path->dentry->d_inode; int error; struct file *file; - struct getdents_callback buffer; + struct kstat stat; + struct path child_path = { + .mnt = path->mnt, + .dentry = child, + }; + struct getdents_callback buffer = { + .ctx.actor = filldir_one, + .name = name, + }; error = -ENOTDIR; if (!dir || !S_ISDIR(dir->i_mode)) @@ -263,6 +290,16 @@ if (!dir->i_fop) goto out; /* + * inode->i_ino is unsigned long, kstat->ino is u64, so the + * former would be insufficient on 32-bit hosts when the + * filesystem supports 64-bit inode numbers. So we need to + * actually call ->getattr, not just read i_ino: + */ + error = vfs_getattr_nosec(&child_path, &stat); + if (error) + return error; + buffer.ino = stat.ino; + /* * Open the directory ... */ file = dentry_open(path, O_RDONLY, cred); @@ -271,17 +308,14 @@ goto out; error = -EINVAL; - if (!file->f_op->readdir) + if (!file->f_op->iterate) goto out_close; - buffer.name = name; - buffer.ino = child->d_inode->i_ino; - buffer.found = 0; buffer.sequence = 0; while (1) { int old_seq = buffer.sequence; - error = vfs_readdir(file, filldir_one, &buffer); + error = iterate_dir(file, &buffer.ctx); if (buffer.found) { error = 0; break; @@ -304,7 +338,7 @@ /** * export_encode_fh - default export_operations->encode_fh function * @inode: the object to encode - * @fh: where to store the file handle fragment + * @fid: where to store the file handle fragment * @max_len: maximum length to store there * @parent: parent directory inode, if wanted * @@ -395,7 +429,7 @@ if (IS_ERR(result)) return result; - if (S_ISDIR(result->d_inode->i_mode)) { + if (d_is_dir(result)) { /* * This request is for a directory. *