--- zzzz-none-000/linux-3.10.107/fs/nfsd/nfs4proc.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/nfsd/nfs4proc.c 2021-02-04 17:41:59.000000000 +0000 @@ -33,6 +33,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include +#include #include #include "idmap.h" @@ -41,6 +42,39 @@ #include "vfs.h" #include "current_stateid.h" #include "netns.h" +#include "acl.h" +#include "pnfs.h" +#include "trace.h" + +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include + +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = d_inode(resfh->fh_dentry); + int status; + + mutex_lock(&inode->i_mutex); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + mutex_unlock(&inode->i_mutex); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } +#endif #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -76,7 +110,7 @@ * in current environment or not. */ if (bmval[0] & FATTR4_WORD0_ACL) { - if (!IS_POSIXACL(dentry->d_inode)) + if (!IS_POSIXACL(d_inode(dentry))) return nfserr_attrnotsupp; } @@ -146,7 +180,7 @@ fh_put(dst); dget(src->fh_dentry); if (src->fh_export) - cache_get(&src->fh_export->h); + exp_get(src->fh_export); *dst = *src; } @@ -175,7 +209,7 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) { - umode_t mode = fh->fh_dentry->d_inode->i_mode; + umode_t mode = d_inode(fh->fh_dentry)->i_mode; if (S_ISREG(mode)) return nfs_ok; @@ -200,17 +234,16 @@ } static __be32 -do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh) { struct svc_fh *current_fh = &cstate->current_fh; - struct svc_fh *resfh; int accmode; __be32 status; - resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); - if (!resfh) + *resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + if (!*resfh) return nfserr_jukebox; - fh_init(resfh, NFS4_FHSIZE); + fh_init(*resfh, NFS4_FHSIZE); open->op_truncate = 0; if (open->op_create) { @@ -235,42 +268,47 @@ */ status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, - resfh, open->op_createmode, + *resfh, open->op_createmode, (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); + /* - * Following rfc 3530 14.2.16, use the returned bitmask - * to indicate which attributes we used to store the - * verifier: + * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 + * use the returned bitmask to indicate which attributes + * we used to store the verifier: + */ + if (nfsd_create_is_exclusive(open->op_createmode) && status == 0) + open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_MODIFY); + } else + /* + * Note this may exit with the parent still locked. + * We will hold the lock until nfsd4_open's final + * lookup, to prevent renames or unlinks until we've had + * a chance to an acquire a delegation if appropriate. */ - if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) - open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | - FATTR4_WORD1_TIME_MODIFY); - } else { status = nfsd_lookup(rqstp, current_fh, - open->op_fname.data, open->op_fname.len, resfh); - fh_unlock(current_fh); - } + open->op_fname.data, open->op_fname.len, *resfh); if (status) goto out; - status = nfsd_check_obj_isreg(resfh); + status = nfsd_check_obj_isreg(*resfh); if (status) goto out; if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); + do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); - nfsd4_set_open_owner_reply_cache(cstate, open, resfh); + nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); accmode = NFSD_MAY_NOP; - if (open->op_created) + if (open->op_created || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) accmode |= NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, resfh, open, accmode); + status = do_open_permission(rqstp, *resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); - fh_dup2(current_fh, resfh); out: - fh_put(resfh); - kfree(resfh); return status; } @@ -323,7 +361,7 @@ struct nfsd4_open *open) { __be32 status; - struct nfsd4_compoundres *resp; + struct svc_fh *resfh = NULL; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -349,11 +387,8 @@ if (nfsd4_has_session(cstate)) copy_clientid(&open->op_clientid, cstate->session); - nfs4_lock_state(); - /* check seqid for replay. set nfs4_owner */ - resp = rqstp->rq_resp; - status = nfsd4_process_open1(&resp->cstate, open, nn); + status = nfsd4_process_open1(cstate, open, nn); if (status == nfserr_replay_me) { struct nfs4_replay *rp = &open->op_openowner->oo_owner.so_replay; fh_put(&cstate->current_fh); @@ -380,35 +415,34 @@ /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (locks_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (opens_in_grace(net) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!locks_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!opens_in_grace(net) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_DELEGATE_CUR: case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, cstate, open); + status = do_open_lookup(rqstp, cstate, open, &resfh); if (status) goto out; break; case NFS4_OPEN_CLAIM_PREVIOUS: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; status = nfs4_check_open_reclaim(&open->op_clientid, - cstate->minorversion, - nn); + cstate, nn); if (status) goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: status = do_open_fhandle(rqstp, cstate, open); if (status) goto out; + resfh = &cstate->current_fh; break; case NFS4_OPEN_CLAIM_DELEG_PREV_FH: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; dprintk("NFSD: unsupported OPEN claim type %d\n", open->op_claim_type); status = nfserr_notsupp; @@ -424,15 +458,18 @@ * successful, it (1) truncates the file if open->op_truncate was * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ - status = nfsd4_process_open2(rqstp, &cstate->current_fh, open); - WARN_ON(status && open->op_created); + status = nfsd4_process_open2(rqstp, resfh, open); + WARN(status && open->op_created, + "nfsd4_process_open2 failed to open newly-created file! status=%u\n", + be32_to_cpu(status)); out: - nfsd4_cleanup_open_state(open, status); - if (open->op_openowner && !nfsd4_has_session(cstate)) - cstate->replay_owner = &open->op_openowner->oo_owner; + if (resfh && resfh != &cstate->current_fh) { + fh_dup2(&cstate->current_fh, resfh); + fh_put(resfh); + kfree(resfh); + } + nfsd4_cleanup_open_state(cstate, open); nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } @@ -540,8 +577,12 @@ __be32 verf[2]; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - verf[0] = (__be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__be32)nn->nfssvc_boot.tv_usec; + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy + */ + verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; memcpy(verifier->data, verf, sizeof(verifier->data)); } @@ -578,8 +619,7 @@ case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_linkname, create->cr_linklen, - &resfh, &create->cr_iattr); + create->cr_data, &resfh); break; case NF4BLK: @@ -628,6 +668,9 @@ if (status) goto out; + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (create->cr_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, create->cr_bmval); @@ -715,8 +758,6 @@ { __be32 status; - /* no need to check permission - this will be done in nfsd_read() */ - read->rd_filp = NULL; if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; @@ -730,21 +771,17 @@ * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - rqstp->rq_splice_ok = false; + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); - nfs4_lock_state(); /* check stateid */ - if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), - cstate, &read->rd_stateid, - RD_STATE, &read->rd_filp))) { + status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid, + RD_STATE, &read->rd_filp, &read->rd_tmp_file); + if (status) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; } - if (read->rd_filp) - get_file(read->rd_filp); status = nfs_ok; out: - nfs4_unlock_state(); read->rd_rqstp = rqstp; read->rd_fhp = &cstate->current_fh; return status; @@ -790,7 +827,7 @@ { __be32 status; - if (locks_in_grace(SVC_NET(rqstp))) + if (opens_in_grace(SVC_NET(rqstp))) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); @@ -809,7 +846,7 @@ if (!cstate->save_fh.fh_dentry) return status; - if (locks_in_grace(SVC_NET(rqstp)) && + if (opens_in_grace(SVC_NET(rqstp)) && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, @@ -840,7 +877,8 @@ &exp, &dentry); if (err) return err; - if (dentry->d_inode == NULL) { + fh_unlock(&cstate->current_fh); + if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; } else @@ -869,8 +907,8 @@ default: return nfserr_inval; } - exp_get(cstate->current_fh.fh_export); - sin->sin_exp = cstate->current_fh.fh_export; + + sin->sin_exp = exp_get(cstate->current_fh.fh_export); fh_put(&cstate->current_fh); return nfs_ok; } @@ -883,10 +921,8 @@ int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { - nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, - &setattr->sa_stateid, WR_STATE, NULL); - nfs4_unlock_state(); + status = nfs4_preprocess_stateid_op(rqstp, cstate, + &setattr->sa_stateid, WR_STATE, NULL, NULL); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -907,6 +943,11 @@ setattr->sa_acl); if (status) goto out; + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); out: @@ -942,22 +983,15 @@ unsigned long cnt; int nvecs; - /* no need to check permission - this will be done in nfsd_write() */ - if (write->wr_offset >= OFFSET_MAX) return nfserr_inval; - nfs4_lock_state(); - status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), - cstate, stateid, WR_STATE, &filp); + status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE, + &filp, NULL); if (status) { - nfs4_unlock_state(); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } - if (filp) - get_file(filp); - nfs4_unlock_state(); cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; @@ -966,17 +1000,97 @@ nvecs = fill_in_write_vector(rqstp->rq_vec, write); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - status = nfsd_write(rqstp, &cstate->current_fh, filp, - write->wr_offset, rqstp->rq_vec, nvecs, - &cnt, &write->wr_how_written); - if (filp) - fput(filp); + status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp, + write->wr_offset, rqstp->rq_vec, nvecs, &cnt, + &write->wr_how_written); + fput(filp); write->wr_bytes_written = cnt; return status; } +static __be32 +nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_fallocate *fallocate, int flags) +{ + __be32 status = nfserr_notsupp; + struct file *file; + + status = nfs4_preprocess_stateid_op(rqstp, cstate, + &fallocate->falloc_stateid, + WR_STATE, &file, NULL); + if (status != nfs_ok) { + dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); + return status; + } + + status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file, + fallocate->falloc_offset, + fallocate->falloc_length, + flags); + fput(file); + return status; +} + +static __be32 +nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_fallocate *fallocate) +{ + return nfsd4_fallocate(rqstp, cstate, fallocate, 0); +} + +static __be32 +nfsd4_deallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_fallocate *fallocate) +{ + return nfsd4_fallocate(rqstp, cstate, fallocate, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE); +} + +static __be32 +nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_seek *seek) +{ + int whence; + __be32 status; + struct file *file; + + status = nfs4_preprocess_stateid_op(rqstp, cstate, + &seek->seek_stateid, + RD_STATE, &file, NULL); + if (status) { + dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); + return status; + } + + switch (seek->seek_whence) { + case NFS4_CONTENT_DATA: + whence = SEEK_DATA; + break; + case NFS4_CONTENT_HOLE: + whence = SEEK_HOLE; + break; + default: + status = nfserr_union_notsupp; + goto out; + } + + /* + * Note: This call does change file->f_pos, but nothing in NFSD + * should ever file->f_pos. + */ + seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence); + if (seek->seek_pos < 0) + status = nfserrno(seek->seek_pos); + else if (seek->seek_pos >= i_size_read(file_inode(file))) + seek->seek_eof = true; + +out: + fput(file); + return status; +} + /* This routine never returns NFS_OK! If there are no other errors, it * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the * attributes matched. VERIFY is implemented by mapping NFSERR_SAME @@ -1013,13 +1127,15 @@ return nfserr_jukebox; p = buf; - status = nfsd4_encode_fattr(&cstate->current_fh, + status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, &p, - count, verify->ve_bmval, + cstate->current_fh.fh_dentry, + verify->ve_bmval, rqstp, 0); - - /* this means that nfsd4_encode_fattr() ran out of space */ + /* + * If nfsd4_encode_fattr() ran out of space, assume that's because + * the attributes are longer (hence different) than those given: + */ if (status == nfserr_resource) status = nfserr_not_same; if (status) @@ -1058,6 +1174,259 @@ return status == nfserr_same ? nfs_ok : status; } +#ifdef CONFIG_NFSD_PNFS +static const struct nfsd4_layout_ops * +nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) +{ + if (!exp->ex_layout_type) { + dprintk("%s: export does not support pNFS\n", __func__); + return NULL; + } + + if (exp->ex_layout_type != layout_type) { + dprintk("%s: layout type %d not supported\n", + __func__, layout_type); + return NULL; + } + + return nfsd4_layout_ops[layout_type]; +} + +static __be32 +nfsd4_getdeviceinfo(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_getdeviceinfo *gdp) +{ + const struct nfsd4_layout_ops *ops; + struct nfsd4_deviceid_map *map; + struct svc_export *exp; + __be32 nfserr; + + dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n", + __func__, + gdp->gd_layout_type, + gdp->gd_devid.fsid_idx, gdp->gd_devid.generation, + gdp->gd_maxcount); + + map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx); + if (!map) { + dprintk("%s: couldn't find device ID to export mapping!\n", + __func__); + return nfserr_noent; + } + + exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid); + if (IS_ERR(exp)) { + dprintk("%s: could not find device id\n", __func__); + return nfserr_noent; + } + + nfserr = nfserr_layoutunavailable; + ops = nfsd4_layout_verify(exp, gdp->gd_layout_type); + if (!ops) + goto out; + + nfserr = nfs_ok; + if (gdp->gd_maxcount != 0) + nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); + + gdp->gd_notify_types &= ops->notify_types; +out: + exp_put(exp); + return nfserr; +} + +static __be32 +nfsd4_layoutget(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_layoutget *lgp) +{ + struct svc_fh *current_fh = &cstate->current_fh; + const struct nfsd4_layout_ops *ops; + struct nfs4_layout_stateid *ls; + __be32 nfserr; + int accmode; + + switch (lgp->lg_seg.iomode) { + case IOMODE_READ: + accmode = NFSD_MAY_READ; + break; + case IOMODE_RW: + accmode = NFSD_MAY_READ | NFSD_MAY_WRITE; + break; + default: + dprintk("%s: invalid iomode %d\n", + __func__, lgp->lg_seg.iomode); + nfserr = nfserr_badiomode; + goto out; + } + + nfserr = fh_verify(rqstp, current_fh, 0, accmode); + if (nfserr) + goto out; + + nfserr = nfserr_layoutunavailable; + ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type); + if (!ops) + goto out; + + /* + * Verify minlength and range as per RFC5661: + * o If loga_length is less than loga_minlength, + * the metadata server MUST return NFS4ERR_INVAL. + * o If the sum of loga_offset and loga_minlength exceeds + * NFS4_UINT64_MAX, and loga_minlength is not + * NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result. + * o If the sum of loga_offset and loga_length exceeds + * NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX, + * the error NFS4ERR_INVAL MUST result. + */ + nfserr = nfserr_inval; + if (lgp->lg_seg.length < lgp->lg_minlength || + (lgp->lg_minlength != NFS4_MAX_UINT64 && + lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) || + (lgp->lg_seg.length != NFS4_MAX_UINT64 && + lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset)) + goto out; + if (lgp->lg_seg.length == 0) + goto out; + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid, + true, lgp->lg_layout_type, &ls); + if (nfserr) { + trace_layout_get_lookup_fail(&lgp->lg_sid); + goto out; + } + + nfserr = nfserr_recallconflict; + if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls)) + goto out_put_stid; + + nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry), + current_fh, lgp); + if (nfserr) + goto out_put_stid; + + nfserr = nfsd4_insert_layout(lgp, ls); + +out_put_stid: + mutex_unlock(&ls->ls_mutex); + nfs4_put_stid(&ls->ls_stid); +out: + return nfserr; +} + +static __be32 +nfsd4_layoutcommit(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_layoutcommit *lcp) +{ + const struct nfsd4_layout_seg *seg = &lcp->lc_seg; + struct svc_fh *current_fh = &cstate->current_fh; + const struct nfsd4_layout_ops *ops; + loff_t new_size = lcp->lc_last_wr + 1; + struct inode *inode; + struct nfs4_layout_stateid *ls; + __be32 nfserr; + + nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE); + if (nfserr) + goto out; + + nfserr = nfserr_layoutunavailable; + ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type); + if (!ops) + goto out; + inode = d_inode(current_fh->fh_dentry); + + nfserr = nfserr_inval; + if (new_size <= seg->offset) { + dprintk("pnfsd: last write before layout segment\n"); + goto out; + } + if (new_size > seg->offset + seg->length) { + dprintk("pnfsd: last write beyond layout segment\n"); + goto out; + } + if (!lcp->lc_newoffset && new_size > i_size_read(inode)) { + dprintk("pnfsd: layoutcommit beyond EOF\n"); + goto out; + } + + nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid, + false, lcp->lc_layout_type, + &ls); + if (nfserr) { + trace_layout_commit_lookup_fail(&lcp->lc_sid); + /* fixup error code as per RFC5661 */ + if (nfserr == nfserr_bad_stateid) + nfserr = nfserr_badlayout; + goto out; + } + + /* LAYOUTCOMMIT does not require any serialization */ + mutex_unlock(&ls->ls_mutex); + + if (new_size > i_size_read(inode)) { + lcp->lc_size_chg = 1; + lcp->lc_newsize = new_size; + } else { + lcp->lc_size_chg = 0; + } + + nfserr = ops->proc_layoutcommit(inode, lcp); + nfs4_put_stid(&ls->ls_stid); +out: + return nfserr; +} + +static __be32 +nfsd4_layoutreturn(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_layoutreturn *lrp) +{ + struct svc_fh *current_fh = &cstate->current_fh; + __be32 nfserr; + + nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP); + if (nfserr) + goto out; + + nfserr = nfserr_layoutunavailable; + if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type)) + goto out; + + switch (lrp->lr_seg.iomode) { + case IOMODE_READ: + case IOMODE_RW: + case IOMODE_ANY: + break; + default: + dprintk("%s: invalid iomode %d\n", __func__, + lrp->lr_seg.iomode); + nfserr = nfserr_inval; + goto out; + } + + switch (lrp->lr_return_type) { + case RETURN_FILE: + nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp); + break; + case RETURN_FSID: + case RETURN_ALL: + nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp); + break; + default: + dprintk("%s: invalid return_type %d\n", __func__, + lrp->lr_return_type); + nfserr = nfserr_inval; + break; + } +out: + return nfserr; +} +#endif /* CONFIG_NFSD_PNFS */ + /* * NULL call. */ @@ -1121,9 +1490,7 @@ static struct nfsd4_operation nfsd4_ops[]; -#ifdef NFSD_DEBUG static const char *nfsd4_op_name(unsigned opnum); -#endif /* * Enforce NFSv4.1 COMPOUND ordering rules: @@ -1165,6 +1532,8 @@ bool nfsd4_cache_this_op(struct nfsd4_op *op) { + if (op->opnum == OP_ILLEGAL) + return false; return OPDESC(op)->op_flags & OP_CACHEME; } @@ -1202,6 +1571,25 @@ return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } +static void svcxdr_init_encode(struct svc_rqst *rqstp, + struct nfsd4_compoundres *resp) +{ + struct xdr_stream *xdr = &resp->xdr; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *head = buf->head; + + xdr->buf = buf; + xdr->iov = head; + xdr->p = head->iov_base + head->iov_len; + xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + /* Tail and page_len should be zero at this point: */ + buf->len = buf->head[0].iov_len; + xdr->scratch.iov_len = 0; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) + - rqstp->rq_auth_slack; +} + /* * COMPOUND call. */ @@ -1213,37 +1601,31 @@ struct nfsd4_op *op; struct nfsd4_operation *opdesc; struct nfsd4_compound_state *cstate = &resp->cstate; - int slack_bytes; - u32 plen = 0; + struct svc_fh *current_fh = &cstate->current_fh; + struct svc_fh *save_fh = &cstate->save_fh; __be32 status; - resp->xbuf = &rqstp->rq_res; - resp->p = rqstp->rq_res.head[0].iov_base + - rqstp->rq_res.head[0].iov_len; - resp->tagp = resp->p; + svcxdr_init_encode(rqstp, resp); + resp->tagp = resp->xdr.p; /* reserve space for: taglen, tag, and opcnt */ - resp->p += 2 + XDR_QUADLEN(args->taglen); - resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + xdr_reserve_space(&resp->xdr, 8 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; - resp->opcnt = 0; resp->rqstp = rqstp; - resp->cstate.minorversion = args->minorversion; - resp->cstate.replay_owner = NULL; - resp->cstate.session = NULL; - fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); - fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); + cstate->minorversion = args->minorversion; + fh_init(current_fh, NFS4_FHSIZE); + fh_init(save_fh, NFS4_FHSIZE); /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - rqstp->rq_usedeferral = 0; + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > nfsd_supported_minorversion) + if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; status = nfs41_check_op_ordering(args); @@ -1270,37 +1652,30 @@ goto encode_op; } - /* We must be able to encode a successful response to - * this operation, with enough room left over to encode a - * failed response to the next operation. If we don't - * have enough room, fail with ERR_RESOURCE. - */ - slack_bytes = (char *)resp->end - (char *)resp->p; - if (slack_bytes < COMPOUND_SLACK_SPACE - + COMPOUND_ERR_SLACK_SPACE) { - BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE); - op->status = nfserr_resource; - goto encode_op; - } - opdesc = OPDESC(op); - if (!cstate->current_fh.fh_dentry) { + if (!current_fh->fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } - } else if (cstate->current_fh.fh_export->ex_fslocs.migrated && + } else if (current_fh->fh_export->ex_fslocs.migrated && !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; } + fh_clear_wcc(current_fh); + /* If op is non-idempotent */ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { - plen = opdesc->op_rsize_bop(rqstp, op); /* - * If there's still another operation, make sure + * Don't execute this op if we couldn't encode a + * succesful reply: + */ + u32 plen = opdesc->op_rsize_bop(rqstp, op); + /* + * Plus if there's another operation, make sure * we'll have space to at least encode an error: */ if (resp->opcnt < args->opcnt) @@ -1323,19 +1698,19 @@ clear_current_stateid(cstate); if (need_wrongsec_check(rqstp)) - op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + op->status = check_nfsd_access(current_fh->fh_export, rqstp); } encode_op: /* Only from SEQUENCE */ - if (resp->cstate.status == nfserr_replay_cache) { + if (cstate->status == nfserr_replay_cache) { dprintk("%s NFS4.1 replay from cache\n", __func__); status = op->status; goto out; } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(resp, op); + nfsd4_encode_replay(&resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); @@ -1346,24 +1721,17 @@ args->ops, args->opcnt, resp->opcnt, op->opnum, be32_to_cpu(status)); - if (cstate->replay_owner) { - nfs4_unlock_state(); - cstate->replay_owner = NULL; - } - /* XXX Ugh, we need to get rid of this kind of special case: */ - if (op->opnum == OP_READ && op->u.read.rd_filp) - fput(op->u.read.rd_filp); - + nfsd4_cstate_clear_replay(cstate); nfsd4_increment_op_stats(op->opnum); } - resp->cstate.status = status; - fh_put(&resp->cstate.current_fh); - fh_put(&resp->cstate.save_fh); - BUG_ON(resp->cstate.replay_owner); + cstate->status = status; + fh_put(current_fh); + fh_put(save_fh); + BUG_ON(cstate->replay_owner); out: /* Reset deferral mechanism for RPC deferrals */ - rqstp->rq_usedeferral = 1; + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } @@ -1374,7 +1742,8 @@ #define op_encode_change_info_maxsz (5) #define nfs4_fattr_bitmap_maxsz (4) -#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) +/* We'll fall back on returning no lockowner if run out of space: */ +#define op_encode_lockowner_maxsz (0) #define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz) #define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -1406,6 +1775,49 @@ + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } +/* + * Note since this is an idempotent operation we won't insist on failing + * the op prematurely if the estimate is too large. We may turn off splice + * reads unnecessarily. + */ +static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + u32 *bmap = op->u.getattr.ga_bmval; + u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; + u32 ret = 0; + + if (bmap0 & FATTR4_WORD0_ACL) + return svc_max_payload(rqstp); + if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) + return svc_max_payload(rqstp); + + if (bmap1 & FATTR4_WORD1_OWNER) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER; + } + if (bmap1 & FATTR4_WORD1_OWNER_GROUP) { + ret += IDMAP_NAMESZ + 4; + bmap1 &= ~FATTR4_WORD1_OWNER_GROUP; + } + if (bmap0 & FATTR4_WORD0_FILEHANDLE) { + ret += NFS4_FHSIZE + 4; + bmap0 &= ~FATTR4_WORD0_FILEHANDLE; + } + if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) { + ret += NFS4_MAXLABELLEN + 12; + bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL; + } + /* + * Largest of remaining attributes are 16 bytes (e.g., + * supported_attributes) + */ + ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2)); + /* bitmask, length */ + ret += 20; + return ret; +} + static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) @@ -1431,23 +1843,20 @@ u32 maxcount = 0, rlen = 0; maxcount = svc_max_payload(rqstp); - rlen = op->u.read.rd_length; - - if (rlen > maxcount) - rlen = maxcount; + rlen = min(op->u.read.rd_length, maxcount); - return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen; + return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 rlen = op->u.readdir.rd_maxcount; + u32 maxcount = 0, rlen = 0; - if (rlen > PAGE_SIZE) - rlen = PAGE_SIZE; + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.readdir.rd_maxcount, maxcount); - return (op_encode_hdr_size + op_encode_verifier_maxsz) - * sizeof(__be32) + rlen; + return (op_encode_hdr_size + op_encode_verifier_maxsz + + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) @@ -1462,6 +1871,13 @@ + op_encode_change_info_maxsz) * sizeof(__be32); } +static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); +} + static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -1475,13 +1891,14 @@ static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); + return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ - 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ + 1 + 1 + /* eir_flags, spr_how */\ + 4 + /* spo_must_enforce & _allow with bitmap */\ 2 + /*eir_server_owner.so_minor_id */\ /* eir_server_owner.so_major_id<> */\ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ @@ -1507,6 +1924,36 @@ op_encode_channel_attrs_maxsz) * sizeof(__be32); } +#ifdef CONFIG_NFSD_PNFS +/* + * At this stage we don't really know what layout driver will handle the request, + * so we need to define an arbitrary upper bound here. + */ +#define MAX_LAYOUT_SIZE 128 +static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 1 /* logr_return_on_close */ + + op_encode_stateid_maxsz + + 1 /* nr of layouts */ + + MAX_LAYOUT_SIZE) * sizeof(__be32); +} + +static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 1 /* locr_newsize */ + + 2 /* ns_size */) * sizeof(__be32); +} + +static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 1 /* lrs_stateid */ + + op_encode_stateid_maxsz) * sizeof(__be32); +} +#endif /* CONFIG_NFSD_PNFS */ + static struct nfsd4_operation nfsd4_ops[] = { [OP_ACCESS] = { .op_func = (nfsd4op_func)nfsd4_access, @@ -1542,6 +1989,7 @@ [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, .op_flags = ALLOWED_ON_ABSENT_FS, + .op_rsize_bop = nfsd4_getattr_rsize, .op_name = "OP_GETATTR", }, [OP_GETFH] = { @@ -1611,37 +2059,32 @@ [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTPUBFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING - | OP_CLEAR_STATEID, + | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID, .op_name = "OP_PUTROOTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_READ] = { .op_func = (nfsd4op_func)nfsd4_read, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READ", .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, - .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READDIR", .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize, }, @@ -1766,6 +2209,7 @@ .op_func = (nfsd4op_func)nfsd4_sequence, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_SEQUENCE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize, }, [OP_DESTROY_CLIENTID] = { .op_func = (nfsd4op_func)nfsd4_destroy_clientid, @@ -1797,16 +2241,78 @@ .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, +#ifdef CONFIG_NFSD_PNFS + [OP_GETDEVICEINFO] = { + .op_func = (nfsd4op_func)nfsd4_getdeviceinfo, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_GETDEVICEINFO", + }, + [OP_LAYOUTGET] = { + .op_func = (nfsd4op_func)nfsd4_layoutget, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_LAYOUTGET", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize, + }, + [OP_LAYOUTCOMMIT] = { + .op_func = (nfsd4op_func)nfsd4_layoutcommit, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_LAYOUTCOMMIT", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize, + }, + [OP_LAYOUTRETURN] = { + .op_func = (nfsd4op_func)nfsd4_layoutreturn, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_LAYOUTRETURN", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize, + }, +#endif /* CONFIG_NFSD_PNFS */ + + /* NFSv4.2 operations */ + [OP_ALLOCATE] = { + .op_func = (nfsd4op_func)nfsd4_allocate, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_ALLOCATE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_DEALLOCATE] = { + .op_func = (nfsd4op_func)nfsd4_deallocate, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_DEALLOCATE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, + }, + [OP_SEEK] = { + .op_func = (nfsd4op_func)nfsd4_seek, + .op_name = "OP_SEEK", + }, }; -#ifdef NFSD_DEBUG +int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) +{ + struct nfsd4_operation *opdesc; + nfsd4op_rsize estimator; + + if (op->opnum == OP_ILLEGAL) + return op_encode_hdr_size * sizeof(__be32); + opdesc = OPDESC(op); + estimator = opdesc->op_rsize_bop; + return estimator ? estimator(rqstp, op) : PAGE_SIZE; +} + +void warn_on_nonidempotent_op(struct nfsd4_op *op) +{ + if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { + pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + op->opnum, nfsd4_op_name(op->opnum)); + WARN_ON_ONCE(1); + } +} + static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) return nfsd4_ops[opnum].op_name; return "unknown_operation"; } -#endif #define nfsd4_voidres nfsd4_voidargs struct nfsd4_voidargs { int dummy; }; @@ -1838,6 +2344,7 @@ .vs_proc = nfsd_procedures4, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS4_SVC_XDRSIZE, + .vs_rpcb_optnl = 1, }; /*