--- zzzz-none-000/linux-3.10.107/fs/cifs/file.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/cifs/file.c 2021-02-04 17:41:59.000000000 +0000 @@ -140,8 +140,7 @@ posix_flags = cifs_posix_convert_flags(f_flags); rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, poplock, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (rc) @@ -183,6 +182,7 @@ int create_options = CREATE_NOT_DIR; FILE_ALL_INFO *buf; struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_open_parms oparms; if (!server->ops->open) return -ENOSYS; @@ -224,9 +224,16 @@ if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - rc = server->ops->open(xid, tcon, full_path, disposition, - desired_access, create_options, fid, oplock, buf, - cifs_sb); + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = desired_access; + oparms.create_options = create_options; + oparms.disposition = disposition; + oparms.path = full_path; + oparms.fid = fid; + oparms.reconnect = false; + + rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) goto out; @@ -236,7 +243,7 @@ xid); else rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, - xid, &fid->netfid); + xid, fid); out: kfree(buf); @@ -265,7 +272,7 @@ struct tcon_link *tlink, __u32 oplock) { struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifsFileInfo *cfile; struct cifs_fid_locks *fdlocks; @@ -298,6 +305,7 @@ cfile->tlink = cifs_get_tlink(tlink); INIT_WORK(&cfile->oplock_break, cifs_oplock_break); mutex_init(&cfile->fh_mutex); + spin_lock_init(&cfile->file_info_lock); cifs_sb_active(inode->i_sb); @@ -305,26 +313,30 @@ * If the server returned a read oplock and we have mandatory brlocks, * set oplock level to None. */ - if (oplock == server->vals->oplock_read && - cifs_has_mand_locks(cinode)) { + if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); oplock = 0; } - spin_lock(&cifs_file_list_lock); + spin_lock(&tcon->open_file_lock); if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) oplock = fid->pending_open->oplock; list_del(&fid->pending_open->olist); + fid->purge_cache = false; server->ops->set_fid(cfile, fid, oplock); list_add(&cfile->tlist, &tcon->openFileList); + /* if readable file instance put first in list*/ if (file->f_mode & FMODE_READ) list_add(&cfile->flist, &cinode->openFileList); else list_add_tail(&cfile->flist, &cinode->openFileList); - spin_unlock(&cifs_file_list_lock); + spin_unlock(&tcon->open_file_lock); + + if (fid->purge_cache) + cifs_zap_mapping(inode); file->private_data = cfile; return cfile; @@ -333,20 +345,20 @@ struct cifsFileInfo * cifsFileInfo_get(struct cifsFileInfo *cifs_file) { - spin_lock(&cifs_file_list_lock); + spin_lock(&cifs_file->file_info_lock); cifsFileInfo_get_locked(cifs_file); - spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_file->file_info_lock); return cifs_file; } /* * Release a reference on the file private data. This may involve closing * the filehandle out on the server. Must be called without holding - * cifs_file_list_lock. + * tcon->open_file_lock and cifs_file->file_info_lock. */ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { - struct inode *inode = cifs_file->dentry->d_inode; + struct inode *inode = d_inode(cifs_file->dentry); struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); struct TCP_Server_Info *server = tcon->ses->server; struct cifsInodeInfo *cifsi = CIFS_I(inode); @@ -355,12 +367,17 @@ struct cifsLockInfo *li, *tmp; struct cifs_fid fid; struct cifs_pending_open open; + bool oplock_break_cancelled; - spin_lock(&cifs_file_list_lock); + spin_lock(&tcon->open_file_lock); + + spin_lock(&cifs_file->file_info_lock); if (--cifs_file->count > 0) { - spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_file->file_info_lock); + spin_unlock(&tcon->open_file_lock); return; } + spin_unlock(&cifs_file->file_info_lock); if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &fid); @@ -374,19 +391,20 @@ if (list_empty(&cifsi->openFileList)) { cifs_dbg(FYI, "closing last open instance for inode %p\n", - cifs_file->dentry->d_inode); + d_inode(cifs_file->dentry)); /* * In strict cache mode we need invalidate mapping on the last * close because it may cause a error when we open this file * again and get at least level II oplock. */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) - CIFS_I(inode)->invalid_mapping = true; + set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); cifs_set_oplock_level(cifsi, 0); } - spin_unlock(&cifs_file_list_lock); - cancel_work_sync(&cifs_file->oplock_break); + spin_unlock(&tcon->open_file_lock); + + oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break); if (!tcon->need_reconnect && !cifs_file->invalidHandle) { struct TCP_Server_Info *server = tcon->ses->server; @@ -398,6 +416,9 @@ _free_xid(xid); } + if (oplock_break_cancelled) + cifs_done_oplock_break(cifsi); + cifs_del_pending_open(&open); /* @@ -456,6 +477,14 @@ cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", inode, file->f_flags, full_path); + if (file->f_flags & O_DIRECT && + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + file->f_op = &cifs_file_direct_nobrl_ops; + else + file->f_op = &cifs_file_direct_ops; + } + if (server->oplocks) oplock = REQ_OPLOCK; else @@ -549,7 +578,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) { struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); - struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; @@ -586,7 +615,7 @@ int desired_access; int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; - struct cifs_fid fid; + struct cifs_open_parms oparms; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -597,7 +626,7 @@ return rc; } - inode = cfile->dentry->d_inode; + inode = d_inode(cfile->dentry); cifs_sb = CIFS_SB(inode->i_sb); tcon = tlink_tcon(cfile->tlink); server = tcon->ses->server; @@ -636,9 +665,10 @@ rc = cifs_posix_open(full_path, NULL, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, - oflags, &oplock, &fid.netfid, xid); + oflags, &oplock, &cfile->fid.netfid, xid); if (rc == 0) { cifs_dbg(FYI, "posix reopen succeeded\n"); + oparms.reconnect = true; goto reopen_success; } /* @@ -653,18 +683,32 @@ create_options |= CREATE_OPEN_BACKUP_INTENT; if (server->ops->get_lease_key) - server->ops->get_lease_key(inode, &fid); + server->ops->get_lease_key(inode, &cfile->fid); + + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = desired_access; + oparms.create_options = create_options; + oparms.disposition = disposition; + oparms.path = full_path; + oparms.fid = &cfile->fid; + oparms.reconnect = true; /* * Can not refresh inode by passing in file_info buf to be returned by - * CIFSSMBOpen and then calling get_inode_info with returned buf since + * ops->open and then calling get_inode_info with returned buf since * file might have write behind data that needs to be flushed and server * version of file size can be stale. If we knew for sure that inode was * not dirty locally we could do this. */ - rc = server->ops->open(xid, tcon, full_path, disposition, - desired_access, create_options, &fid, &oplock, - NULL, cifs_sb); + rc = server->ops->open(xid, &oparms, &oplock, NULL); + if (rc == -ENOENT && oparms.reconnect == false) { + /* durable handle timeout is expired - open the file again */ + rc = server->ops->open(xid, &oparms, &oplock, NULL); + /* indicate that we need to relock the file */ + oparms.reconnect = true; + } + if (rc) { mutex_unlock(&cfile->fh_mutex); cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); @@ -695,8 +739,9 @@ * to the server to get the new inode info. */ - server->ops->set_fid(cfile, &fid, oplock); - cifs_relock_file(cfile); + server->ops->set_fid(cfile, &cfile->fid, oplock); + if (oparms.reconnect) + cifs_relock_file(cfile); reopen_error_exit: kfree(full_path); @@ -734,10 +779,10 @@ server = tcon->ses->server; cifs_dbg(FYI, "Freeing private data in close dir\n"); - spin_lock(&cifs_file_list_lock); + spin_lock(&cfile->file_info_lock); if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; - spin_unlock(&cifs_file_list_lock); + spin_unlock(&cfile->file_info_lock); if (server->ops->close_dir) rc = server->ops->close_dir(xid, tcon, &cfile->fid); else @@ -746,7 +791,7 @@ /* not much we can do if it fails anyway, ignore rc */ rc = 0; } else - spin_unlock(&cifs_file_list_lock); + spin_unlock(&cfile->file_info_lock); buf = cfile->srch_inf.ntwrk_buf_start; if (buf) { @@ -835,7 +880,7 @@ { bool rc = false; struct cifs_fid_locks *cur; - struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); list_for_each_entry(cur, &cinode->llist, llist) { rc = cifs_find_fid_lock_conflict(cur, offset, length, type, @@ -860,7 +905,7 @@ { int rc = 0; struct cifsLockInfo *conf_lock; - struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; bool exist; @@ -888,7 +933,7 @@ static void cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { - struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); down_write(&cinode->lock_sem); list_add_tail(&lock->llist, &cfile->llist->locks); up_write(&cinode->lock_sem); @@ -905,7 +950,7 @@ bool wait) { struct cifsLockInfo *conf_lock; - struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); bool exist; int rc = 0; @@ -998,7 +1043,7 @@ rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next); if (!rc) goto try_again; - locks_delete_block(flock); + posix_unblock_lock(flock); } return rc; } @@ -1031,7 +1076,7 @@ max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); - buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); + buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { free_xid(xid); return -ENOMEM; @@ -1074,11 +1119,6 @@ return rc; } -/* copied from fs/locks.c with a name change */ -#define cifs_for_each_lock(inode, lockp) \ - for (lockp = &inode->i_flock; *lockp != NULL; \ - lockp = &(*lockp)->fl_next) - struct lock_to_push { struct list_head llist; __u64 offset; @@ -1091,9 +1131,11 @@ static int cifs_push_posix_locks(struct cifsFileInfo *cfile) { + struct inode *inode = d_inode(cfile->dentry); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - struct file_lock *flock, **before; - unsigned int count = 0, i = 0; + struct file_lock *flock; + struct file_lock_context *flctx = inode->i_flctx; + unsigned int count = 0, i; int rc = 0, xid, type; struct list_head locks_to_send, *el; struct lock_to_push *lck, *tmp; @@ -1101,12 +1143,14 @@ xid = get_xid(); - lock_flocks(); - cifs_for_each_lock(cfile->dentry->d_inode, before) { - if ((*before)->fl_flags & FL_POSIX) - count++; + if (!flctx) + goto out; + + spin_lock(&flctx->flc_lock); + list_for_each(el, &flctx->flc_posix) { + count++; } - unlock_flocks(); + spin_unlock(&flctx->flc_lock); INIT_LIST_HEAD(&locks_to_send); @@ -1115,7 +1159,7 @@ * added to the list while we are holding cinode->lock_sem that * protects locking operations of this inode. */ - for (; i < count; i++) { + for (i = 0; i < count; i++) { lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); if (!lck) { rc = -ENOMEM; @@ -1125,11 +1169,8 @@ } el = locks_to_send.next; - lock_flocks(); - cifs_for_each_lock(cfile->dentry->d_inode, before) { - flock = *before; - if ((flock->fl_flags & FL_POSIX) == 0) - continue; + spin_lock(&flctx->flc_lock); + list_for_each_entry(flock, &flctx->flc_posix, fl_list) { if (el == &locks_to_send) { /* * The list ended. We don't have enough allocated @@ -1149,9 +1190,8 @@ lck->length = length; lck->type = type; lck->offset = flock->fl_start; - el = el->next; } - unlock_flocks(); + spin_unlock(&flctx->flc_lock); list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { int stored_rc; @@ -1180,7 +1220,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) { struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); - struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; @@ -1348,7 +1388,7 @@ unsigned int max_num, num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); - struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); struct cifsLockInfo *li, *tmp; __u64 length = 1 + flock->fl_end - flock->fl_start; struct list_head tmp_llist; @@ -1365,7 +1405,7 @@ max_num = (max_buf - sizeof(struct smb_hdr)) / sizeof(LOCKING_ANDX_RANGE); - buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); + buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1454,7 +1494,7 @@ struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct TCP_Server_Info *server = tcon->ses->server; - struct inode *inode = cfile->dentry->d_inode; + struct inode *inode = d_inode(cfile->dentry); if (posix_lck) { int posix_lock_type; @@ -1499,12 +1539,12 @@ * read won't conflict with non-overlapted locks due to * pagereading. */ - if (!CIFS_I(inode)->clientCanCacheAll && - CIFS_I(inode)->clientCanCacheRead) { - cifs_invalidate_mapping(inode); + if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && + CIFS_CACHE_READ(CIFS_I(inode))) { + cifs_zap_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", inode); - CIFS_I(inode)->clientCanCacheRead = false; + CIFS_I(inode)->oplock = 0; } rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, @@ -1519,8 +1559,8 @@ rc = server->ops->mand_unlock_range(cfile, flock, xid); out: - if (flock->fl_flags & FL_POSIX) - posix_lock_file_wait(file, flock); + if (flock->fl_flags & FL_POSIX && !rc) + rc = locks_lock_file_wait(file, flock); return rc; } @@ -1550,7 +1590,7 @@ cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, tcon->ses->server); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + cifs_sb = CIFS_FILE_SB(file); netfid = cfile->fid.netfid; cinode = CIFS_I(file_inode(file)); @@ -1609,13 +1649,13 @@ struct TCP_Server_Info *server; unsigned int xid; struct dentry *dentry = open_file->dentry; - struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); + struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); struct cifs_io_parms io_parms; cifs_sb = CIFS_SB(dentry->d_sb); - cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n", - write_size, *offset, dentry->d_name.name); + cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", + write_size, *offset, dentry); tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -1642,8 +1682,8 @@ break; } - len = min((size_t)cifs_sb->wsize, - write_size - total_written); + len = min(server->ops->wp_retry_size(d_inode(dentry)), + (unsigned int)write_size - total_written); /* iov[0] is reserved for smb header */ iov[1].iov_base = (char *)write_data + total_written; iov[1].iov_len = len; @@ -1651,8 +1691,8 @@ io_parms.tcon = tcon; io_parms.offset = *offset; io_parms.length = len; - rc = server->ops->sync_write(xid, open_file, &io_parms, - &bytes_written, iov, 1); + rc = server->ops->sync_write(xid, &open_file->fid, + &io_parms, &bytes_written, iov, 1); } if (rc || (bytes_written == 0)) { if (total_written) @@ -1662,9 +1702,9 @@ return rc; } } else { - spin_lock(&dentry->d_inode->i_lock); + spin_lock(&d_inode(dentry)->i_lock); cifs_update_eof(cifsi, *offset, bytes_written); - spin_unlock(&dentry->d_inode->i_lock); + spin_unlock(&d_inode(dentry)->i_lock); *offset += bytes_written; } } @@ -1672,12 +1712,12 @@ cifs_stats_bytes_written(tcon, total_written); if (total_written > 0) { - spin_lock(&dentry->d_inode->i_lock); - if (*offset > dentry->d_inode->i_size) - i_size_write(dentry->d_inode, *offset); - spin_unlock(&dentry->d_inode->i_lock); + spin_lock(&d_inode(dentry)->i_lock); + if (*offset > d_inode(dentry)->i_size) + i_size_write(d_inode(dentry), *offset); + spin_unlock(&d_inode(dentry)->i_lock); } - mark_inode_dirty_sync(dentry->d_inode); + mark_inode_dirty_sync(d_inode(dentry)); free_xid(xid); return total_written; } @@ -1687,12 +1727,13 @@ { struct cifsFileInfo *open_file = NULL; struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); /* only filter by fsuid on multiuser mounts */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; - spin_lock(&cifs_file_list_lock); + spin_lock(&tcon->open_file_lock); /* we could simply get the first_list_entry since write-only entries are always at the end of the list but since the first entry might have a close pending, we go through the whole list */ @@ -1703,8 +1744,8 @@ if (!open_file->invalidHandle) { /* found a good file */ /* lock it so it will not be closed on us */ - cifsFileInfo_get_locked(open_file); - spin_unlock(&cifs_file_list_lock); + cifsFileInfo_get(open_file); + spin_unlock(&tcon->open_file_lock); return open_file; } /* else might as well continue, and look for another, or simply have the caller reopen it @@ -1712,7 +1753,7 @@ } else /* write only file */ break; /* write only files are last so must be done */ } - spin_unlock(&cifs_file_list_lock); + spin_unlock(&tcon->open_file_lock); return NULL; } @@ -1721,6 +1762,7 @@ { struct cifsFileInfo *open_file, *inv_file = NULL; struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; bool any_available = false; int rc; unsigned int refind = 0; @@ -1736,15 +1778,16 @@ } cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); + tcon = cifs_sb_master_tcon(cifs_sb); /* only filter by fsuid on multiuser mounts */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) fsuid_only = false; - spin_lock(&cifs_file_list_lock); + spin_lock(&tcon->open_file_lock); refind_writable: if (refind > MAX_REOPEN_ATT) { - spin_unlock(&cifs_file_list_lock); + spin_unlock(&tcon->open_file_lock); return NULL; } list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { @@ -1755,8 +1798,8 @@ if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ - cifsFileInfo_get_locked(open_file); - spin_unlock(&cifs_file_list_lock); + cifsFileInfo_get(open_file); + spin_unlock(&tcon->open_file_lock); return open_file; } else { if (!inv_file) @@ -1772,24 +1815,24 @@ if (inv_file) { any_available = false; - cifsFileInfo_get_locked(inv_file); + cifsFileInfo_get(inv_file); } - spin_unlock(&cifs_file_list_lock); + spin_unlock(&tcon->open_file_lock); if (inv_file) { rc = cifs_reopen_file(inv_file, false); if (!rc) return inv_file; else { - spin_lock(&cifs_file_list_lock); + spin_lock(&tcon->open_file_lock); list_move_tail(&inv_file->flist, &cifs_inode->openFileList); - spin_unlock(&cifs_file_list_lock); + spin_unlock(&tcon->open_file_lock); cifsFileInfo_put(inv_file); - spin_lock(&cifs_file_list_lock); ++refind; inv_file = NULL; + spin_lock(&tcon->open_file_lock); goto refind_writable; } } @@ -1851,15 +1894,163 @@ return rc; } +static struct cifs_writedata * +wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, + pgoff_t end, pgoff_t *index, + unsigned int *found_pages) +{ + unsigned int nr_pages; + struct page **pages; + struct cifs_writedata *wdata; + + wdata = cifs_writedata_alloc((unsigned int)tofind, + cifs_writev_complete); + if (!wdata) + return NULL; + + /* + * find_get_pages_tag seems to return a max of 256 on each + * iteration, so we must call it several times in order to + * fill the array or the wsize is effectively limited to + * 256 * PAGE_CACHE_SIZE. + */ + *found_pages = 0; + pages = wdata->pages; + do { + nr_pages = find_get_pages_tag(mapping, index, + PAGECACHE_TAG_DIRTY, tofind, + pages); + *found_pages += nr_pages; + tofind -= nr_pages; + pages += nr_pages; + } while (nr_pages && tofind && *index <= end); + + return wdata; +} + +static unsigned int +wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, + struct address_space *mapping, + struct writeback_control *wbc, + pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done) +{ + unsigned int nr_pages = 0, i; + struct page *page; + + for (i = 0; i < found_pages; i++) { + page = wdata->pages[i]; + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + + if (nr_pages == 0) + lock_page(page); + else if (!trylock_page(page)) + break; + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + break; + } + + if (!wbc->range_cyclic && page->index > end) { + *done = true; + unlock_page(page); + break; + } + + if (*next && (page->index != *next)) { + /* Not next consecutive page */ + unlock_page(page); + break; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + break; + } + + /* + * This actually clears the dirty bit in the radix tree. + * See cifs_writepage() for more commentary. + */ + set_page_writeback(page); + if (page_offset(page) >= i_size_read(mapping->host)) { + *done = true; + unlock_page(page); + end_page_writeback(page); + break; + } + + wdata->pages[i] = page; + *next = page->index + 1; + ++nr_pages; + } + + /* reset index to refind any pages skipped */ + if (nr_pages == 0) + *index = wdata->pages[0]->index + 1; + + /* put any pages we aren't going to use */ + for (i = nr_pages; i < found_pages; i++) { + page_cache_release(wdata->pages[i]); + wdata->pages[i] = NULL; + } + + return nr_pages; +} + +static int +wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, + struct address_space *mapping, struct writeback_control *wbc) +{ + int rc = 0; + struct TCP_Server_Info *server; + unsigned int i; + + wdata->sync_mode = wbc->sync_mode; + wdata->nr_pages = nr_pages; + wdata->offset = page_offset(wdata->pages[0]); + wdata->pagesz = PAGE_CACHE_SIZE; + wdata->tailsz = min(i_size_read(mapping->host) - + page_offset(wdata->pages[nr_pages - 1]), + (loff_t)PAGE_CACHE_SIZE); + wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz; + + if (wdata->cfile != NULL) + cifsFileInfo_put(wdata->cfile); + wdata->cfile = find_writable_file(CIFS_I(mapping->host), false); + if (!wdata->cfile) { + cifs_dbg(VFS, "No writable handles for inode\n"); + rc = -EBADF; + } else { + wdata->pid = wdata->cfile->pid; + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + rc = server->ops->async_writev(wdata, cifs_writedata_release); + } + + for (i = 0; i < nr_pages; ++i) + unlock_page(wdata->pages[i]); + + return rc; +} + static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb); + struct TCP_Server_Info *server; bool done = false, scanned = false, range_whole = false; pgoff_t end, index; struct cifs_writedata *wdata; - struct TCP_Server_Info *server; - struct page *page; int rc = 0; /* @@ -1879,151 +2070,50 @@ range_whole = true; scanned = true; } + server = cifs_sb_master_tcon(cifs_sb)->ses->server; retry: while (!done && index <= end) { - unsigned int i, nr_pages, found_pages; - pgoff_t next = 0, tofind; - struct page **pages; + unsigned int i, nr_pages, found_pages, wsize, credits; + pgoff_t next = 0, tofind, saved_index = index; + + rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + &wsize, &credits); + if (rc) + break; - tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1, - end - index) + 1; + tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1; - wdata = cifs_writedata_alloc((unsigned int)tofind, - cifs_writev_complete); + wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index, + &found_pages); if (!wdata) { rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } - /* - * find_get_pages_tag seems to return a max of 256 on each - * iteration, so we must call it several times in order to - * fill the array or the wsize is effectively limited to - * 256 * PAGE_CACHE_SIZE. - */ - found_pages = 0; - pages = wdata->pages; - do { - nr_pages = find_get_pages_tag(mapping, &index, - PAGECACHE_TAG_DIRTY, - tofind, pages); - found_pages += nr_pages; - tofind -= nr_pages; - pages += nr_pages; - } while (nr_pages && tofind && index <= end); - if (found_pages == 0) { kref_put(&wdata->refcount, cifs_writedata_release); + add_credits_and_wake_if(server, credits, 0); break; } - nr_pages = 0; - for (i = 0; i < found_pages; i++) { - page = wdata->pages[i]; - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - - if (nr_pages == 0) - lock_page(page); - else if (!trylock_page(page)) - break; - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - break; - } - - if (!wbc->range_cyclic && page->index > end) { - done = true; - unlock_page(page); - break; - } - - if (next && (page->index != next)) { - /* Not next consecutive page */ - unlock_page(page); - break; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - break; - } - - /* - * This actually clears the dirty bit in the radix tree. - * See cifs_writepage() for more commentary. - */ - set_page_writeback(page); - - if (page_offset(page) >= i_size_read(mapping->host)) { - done = true; - unlock_page(page); - end_page_writeback(page); - break; - } - - wdata->pages[i] = page; - next = page->index + 1; - ++nr_pages; - } - - /* reset index to refind any pages skipped */ - if (nr_pages == 0) - index = wdata->pages[0]->index + 1; - - /* put any pages we aren't going to use */ - for (i = nr_pages; i < found_pages; i++) { - page_cache_release(wdata->pages[i]); - wdata->pages[i] = NULL; - } + nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc, + end, &index, &next, &done); /* nothing to write? */ if (nr_pages == 0) { kref_put(&wdata->refcount, cifs_writedata_release); + add_credits_and_wake_if(server, credits, 0); continue; } - wdata->sync_mode = wbc->sync_mode; - wdata->nr_pages = nr_pages; - wdata->offset = page_offset(wdata->pages[0]); - wdata->pagesz = PAGE_CACHE_SIZE; - wdata->tailsz = - min(i_size_read(mapping->host) - - page_offset(wdata->pages[nr_pages - 1]), - (loff_t)PAGE_CACHE_SIZE); - wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + - wdata->tailsz; - - do { - if (wdata->cfile != NULL) - cifsFileInfo_put(wdata->cfile); - wdata->cfile = find_writable_file(CIFS_I(mapping->host), - false); - if (!wdata->cfile) { - cifs_dbg(VFS, "No writable handles for inode\n"); - rc = -EBADF; - break; - } - wdata->pid = wdata->cfile->pid; - server = tlink_tcon(wdata->cfile->tlink)->ses->server; - rc = server->ops->async_writev(wdata); - } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN); + wdata->credits = credits; - for (i = 0; i < nr_pages; ++i) - unlock_page(wdata->pages[i]); + rc = wdata_send_pages(wdata, nr_pages, mapping, wbc); /* send failure -- clean up the mess */ if (rc != 0) { + add_credits_and_wake_if(server, wdata->credits, 0); for (i = 0; i < nr_pages; ++i) { if (rc == -EAGAIN) redirty_page_for_writepage(wbc, @@ -2038,6 +2128,11 @@ } kref_put(&wdata->refcount, cifs_writedata_release); + if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) { + index = saved_index; + continue; + } + wbc->nr_to_write -= nr_pages; if (wbc->nr_to_write <= 0) done = true; @@ -2186,11 +2281,11 @@ xid = get_xid(); - cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); - if (!CIFS_I(inode)->clientCanCacheRead) { - rc = cifs_invalidate_mapping(inode); + if (!CIFS_CACHE_READ(CIFS_I(inode))) { + rc = cifs_zap_mapping(inode); if (rc) { cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); rc = 0; /* don't care about it in fsync */ @@ -2218,7 +2313,7 @@ struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct inode *inode = file->f_mapping->host; rc = filemap_write_and_wait_range(inode->i_mapping, start, end); @@ -2228,8 +2323,8 @@ xid = get_xid(); - cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { @@ -2304,12 +2399,23 @@ } static void -cifs_uncached_writev_complete(struct work_struct *work) +cifs_uncached_writedata_release(struct kref *refcount) { int i; + struct cifs_writedata *wdata = container_of(refcount, + struct cifs_writedata, refcount); + + for (i = 0; i < wdata->nr_pages; i++) + put_page(wdata->pages[i]); + cifs_writedata_release(refcount); +} + +static void +cifs_uncached_writev_complete(struct work_struct *work) +{ struct cifs_writedata *wdata = container_of(work, struct cifs_writedata, work); - struct inode *inode = wdata->cfile->dentry->d_inode; + struct inode *inode = d_inode(wdata->cfile->dentry); struct cifsInodeInfo *cifsi = CIFS_I(inode); spin_lock(&inode->i_lock); @@ -2320,132 +2426,112 @@ complete(&wdata->done); - if (wdata->result != -EAGAIN) { - for (i = 0; i < wdata->nr_pages; i++) - put_page(wdata->pages[i]); - } - - kref_put(&wdata->refcount, cifs_writedata_release); + kref_put(&wdata->refcount, cifs_uncached_writedata_release); } -/* attempt to send write to server, retry on any -EAGAIN errors */ static int -cifs_uncached_retry_writev(struct cifs_writedata *wdata) +wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, + size_t *len, unsigned long *num_pages) { - int rc; - struct TCP_Server_Info *server; + size_t save_len, copied, bytes, cur_len = *len; + unsigned long i, nr_pages = *num_pages; - server = tlink_tcon(wdata->cfile->tlink)->ses->server; + save_len = cur_len; + for (i = 0; i < nr_pages; i++) { + bytes = min_t(const size_t, cur_len, PAGE_SIZE); + copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from); + cur_len -= copied; + /* + * If we didn't copy as much as we expected, then that + * may mean we trod into an unmapped area. Stop copying + * at that point. On the next pass through the big + * loop, we'll likely end up getting a zero-length + * write and bailing out of it. + */ + if (copied < bytes) + break; + } + cur_len = save_len - cur_len; + *len = cur_len; - do { - if (wdata->cfile->invalidHandle) { - rc = cifs_reopen_file(wdata->cfile, false); - if (rc != 0) - continue; - } - rc = server->ops->async_writev(wdata); - } while (rc == -EAGAIN); + /* + * If we have no data to send, then that probably means that + * the copy above failed altogether. That's most likely because + * the address in the iovec was bogus. Return -EFAULT and let + * the caller free anything we allocated and bail out. + */ + if (!cur_len) + return -EFAULT; - return rc; + /* + * i + 1 now represents the number of pages we actually used in + * the copy phase above. + */ + *num_pages = i + 1; + return 0; } -static ssize_t -cifs_iovec_write(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *poffset) +static int +cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, + struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *wdata_list) { - unsigned long nr_pages, i; - size_t bytes, copied, len, cur_len; - ssize_t total_written = 0; - loff_t offset; - struct iov_iter it; - struct cifsFileInfo *open_file; - struct cifs_tcon *tcon; - struct cifs_sb_info *cifs_sb; - struct cifs_writedata *wdata, *tmp; - struct list_head wdata_list; - int rc; + int rc = 0; + size_t cur_len; + unsigned long nr_pages, num_pages, i; + struct cifs_writedata *wdata; + struct iov_iter saved_from; + loff_t saved_offset = offset; pid_t pid; - - len = iov_length(iov, nr_segs); - if (!len) - return 0; - - rc = generic_write_checks(file, poffset, &len, 0); - if (rc) - return rc; - - INIT_LIST_HEAD(&wdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_writev) - return -ENOSYS; - - offset = *poffset; + struct TCP_Server_Info *server; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; - iov_iter_init(&it, iov, nr_segs, len, 0); + server = tlink_tcon(open_file->tlink)->ses->server; + memcpy(&saved_from, from, sizeof(struct iov_iter)); + do { - size_t save_len; + unsigned int wsize, credits; - nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len); + rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize, + &wsize, &credits); + if (rc) + break; + + nr_pages = get_numpages(wsize, len, &cur_len); wdata = cifs_writedata_alloc(nr_pages, cifs_uncached_writev_complete); if (!wdata) { rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } rc = cifs_write_allocate_pages(wdata->pages, nr_pages); if (rc) { kfree(wdata); + add_credits_and_wake_if(server, credits, 0); break; } - save_len = cur_len; - for (i = 0; i < nr_pages; i++) { - bytes = min_t(const size_t, cur_len, PAGE_SIZE); - copied = iov_iter_copy_from_user(wdata->pages[i], &it, - 0, bytes); - cur_len -= copied; - iov_iter_advance(&it, copied); - /* - * If we didn't copy as much as we expected, then that - * may mean we trod into an unmapped area. Stop copying - * at that point. On the next pass through the big - * loop, we'll likely end up getting a zero-length - * write and bailing out of it. - */ - if (copied < bytes) - break; - } - cur_len = save_len - cur_len; - - /* - * If we have no data to send, then that probably means that - * the copy above failed altogether. That's most likely because - * the address in the iovec was bogus. Set the rc to -EFAULT, - * free anything we allocated and bail out. - */ - if (!cur_len) { + num_pages = nr_pages; + rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages); + if (rc) { for (i = 0; i < nr_pages; i++) put_page(wdata->pages[i]); kfree(wdata); - rc = -EFAULT; + add_credits_and_wake_if(server, credits, 0); break; } /* - * i + 1 now represents the number of pages we actually used in - * the copy phase above. Bring nr_pages down to that, and free - * any pages that we didn't use. + * Bring nr_pages down to the number of pages we actually used, + * and free any pages that we didn't use. */ - for ( ; nr_pages > i + 1; nr_pages--) + for ( ; nr_pages > num_pages; nr_pages--) put_page(wdata->pages[nr_pages - 1]); wdata->sync_mode = WB_SYNC_ALL; @@ -2456,17 +2542,68 @@ wdata->bytes = cur_len; wdata->pagesz = PAGE_SIZE; wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); - rc = cifs_uncached_retry_writev(wdata); + wdata->credits = credits; + + if (!wdata->cfile->invalidHandle || + !cifs_reopen_file(wdata->cfile, false)) + rc = server->ops->async_writev(wdata, + cifs_uncached_writedata_release); if (rc) { - kref_put(&wdata->refcount, cifs_writedata_release); + add_credits_and_wake_if(server, wdata->credits, 0); + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); + if (rc == -EAGAIN) { + memcpy(from, &saved_from, + sizeof(struct iov_iter)); + iov_iter_advance(from, offset - saved_offset); + continue; + } break; } - list_add_tail(&wdata->list, &wdata_list); + list_add_tail(&wdata->list, wdata_list); offset += cur_len; len -= cur_len; } while (len > 0); + return rc; +} + +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + ssize_t total_written = 0; + struct cifsFileInfo *open_file; + struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; + struct cifs_writedata *wdata, *tmp; + struct list_head wdata_list; + struct iov_iter saved_from; + int rc; + + /* + * BB - optimize the way when signing is disabled. We can drop this + * extra memory-to-memory copying and use iovec buffers for constructing + * write request. + */ + + rc = generic_write_checks(iocb, from); + if (rc <= 0) + return rc; + + INIT_LIST_HEAD(&wdata_list); + cifs_sb = CIFS_FILE_SB(file); + open_file = file->private_data; + tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_writev) + return -ENOSYS; + + memcpy(&saved_from, from, sizeof(struct iov_iter)); + + rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from, + open_file, cifs_sb, &wdata_list); + /* * If at least one write was successfully sent, then discard any rc * value from the later writes. If the other write succeeds, then @@ -2495,86 +2632,82 @@ /* resend call if it's a retryable error */ if (rc == -EAGAIN) { - rc = cifs_uncached_retry_writev(wdata); + struct list_head tmp_list; + struct iov_iter tmp_from; + + INIT_LIST_HEAD(&tmp_list); + list_del_init(&wdata->list); + + memcpy(&tmp_from, &saved_from, + sizeof(struct iov_iter)); + iov_iter_advance(&tmp_from, + wdata->offset - iocb->ki_pos); + + rc = cifs_write_from_iter(wdata->offset, + wdata->bytes, &tmp_from, + open_file, cifs_sb, &tmp_list); + + list_splice(&tmp_list, &wdata_list); + + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); goto restart_loop; } } list_del_init(&wdata->list); - kref_put(&wdata->refcount, cifs_writedata_release); + kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - if (total_written > 0) - *poffset += total_written; + if (unlikely(!total_written)) + return rc; + iocb->ki_pos += total_written; + set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags); cifs_stats_bytes_written(tcon, total_written); - return total_written ? total_written : (ssize_t)rc; -} - -ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - ssize_t written; - struct inode *inode; - - inode = file_inode(iocb->ki_filp); - - /* - * BB - optimize the way when signing is disabled. We can drop this - * extra memory-to-memory copying and use iovec buffers for constructing - * write request. - */ - - written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); - if (written > 0) { - CIFS_I(inode)->invalid_mapping = true; - iocb->ki_pos = pos; - } - - return written; + return total_written; } static ssize_t -cifs_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +cifs_writev(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; - ssize_t rc = -EACCES; - - BUG_ON(iocb->ki_pos != pos); + ssize_t rc; /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents writing. */ down_read(&cinode->lock_sem); - if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), - server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) { - mutex_lock(&inode->i_mutex); - rc = __generic_file_aio_write(iocb, iov, nr_segs, - &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); - } + mutex_lock(&inode->i_mutex); - if (rc > 0 || rc == -EIOCBQUEUED) { - ssize_t err; + rc = generic_write_checks(iocb, from); + if (rc <= 0) + goto out; - err = generic_write_sync(file, pos, rc); - if (err < 0 && rc > 0) + if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + server->vals->exclusive_lock_type, NULL, + CIFS_WRITE_OP)) + rc = __generic_file_write_iter(iocb, from); + else + rc = -EACCES; +out: + mutex_unlock(&inode->i_mutex); + + if (rc > 0) { + ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc); + if (err < 0) rc = err; } - up_read(&cinode->lock_sem); return rc; } ssize_t -cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); @@ -2584,12 +2717,19 @@ struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); ssize_t written; - if (cinode->clientCanCacheAll) { + written = cifs_get_writer(cinode); + if (written) + return written; + + if (CIFS_CACHE_WRITE(cinode)) { if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) - && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) - return generic_file_aio_write(iocb, iov, nr_segs, pos); - return cifs_writev(iocb, iov, nr_segs, pos); + && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + written = generic_file_write_iter(iocb, from); + goto out; + } + written = cifs_writev(iocb, from); + goto out; } /* * For non-oplocked files in strict cache mode we need to write the data @@ -2597,18 +2737,20 @@ * affected pages because it may cause a error with mandatory locks on * these pages but not on the region from pos to ppos+len-1. */ - written = cifs_user_writev(iocb, iov, nr_segs, pos); - if (written > 0 && cinode->clientCanCacheRead) { + written = cifs_user_writev(iocb, from); + if (written > 0 && CIFS_CACHE_READ(cinode)) { /* * Windows 7 server can delay breaking level2 oplock if a write * request comes - break it on the client to prevent reading * an old data. */ - cifs_invalidate_mapping(inode); + cifs_zap_mapping(inode); cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n", inode); - cinode->clientCanCacheRead = false; + cinode->oplock = 0; } +out: + cifs_put_writer(cinode); return written; } @@ -2680,79 +2822,30 @@ cifs_readdata_release(refcount); } -static int -cifs_retry_async_readv(struct cifs_readdata *rdata) -{ - int rc; - struct TCP_Server_Info *server; - - server = tlink_tcon(rdata->cfile->tlink)->ses->server; - - do { - if (rdata->cfile->invalidHandle) { - rc = cifs_reopen_file(rdata->cfile, true); - if (rc != 0) - continue; - } - rc = server->ops->async_readv(rdata); - } while (rc == -EAGAIN); - - return rc; -} - /** * cifs_readdata_to_iov - copy data from pages in response to an iovec * @rdata: the readdata response with list of pages holding data - * @iov: vector in which we should copy the data - * @nr_segs: number of segments in vector - * @offset: offset into file of the first iovec - * @copied: used to return the amount of data copied to the iov + * @iter: destination for our data * * This function copies data from a list of pages in a readdata response into * an array of iovecs. It will first calculate where the data should go * based on the info in the readdata and then copy the data into that spot. */ -static ssize_t -cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, - unsigned long nr_segs, loff_t offset, ssize_t *copied) +static int +cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) { - int rc = 0; - struct iov_iter ii; - size_t pos = rdata->offset - offset; - ssize_t remaining = rdata->bytes; - unsigned char *pdata; + size_t remaining = rdata->got_bytes; unsigned int i; - /* set up iov_iter and advance to the correct offset */ - iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0); - iov_iter_advance(&ii, pos); - - *copied = 0; for (i = 0; i < rdata->nr_pages; i++) { - ssize_t copy; struct page *page = rdata->pages[i]; - - /* copy a whole page or whatever's left */ - copy = min_t(ssize_t, remaining, PAGE_SIZE); - - /* ...but limit it to whatever space is left in the iov */ - copy = min_t(ssize_t, copy, iov_iter_count(&ii)); - - /* go while there's data to be copied and no errors */ - if (copy && !rc) { - pdata = kmap(page); - rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset, - (int)copy); - kunmap(page); - if (!rc) { - *copied += copy; - remaining -= copy; - iov_iter_advance(&ii, copy); - } - } + size_t copy = min_t(size_t, remaining, PAGE_SIZE); + size_t written = copy_page_to_iter(page, 0, copy, iter); + remaining -= written; + if (written < copy && iov_iter_count(iter) > 0) + break; } - - return rc; + return remaining ? -EFAULT : 0; } static void @@ -2769,11 +2862,12 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, struct cifs_readdata *rdata, unsigned int len) { - int total_read = 0, result = 0; + int result = 0; unsigned int i; unsigned int nr_pages = rdata->nr_pages; struct kvec iov; + rdata->got_bytes = 0; rdata->tailsz = PAGE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; @@ -2807,61 +2901,47 @@ if (result < 0) break; - total_read += result; + rdata->got_bytes += result; } - return total_read > 0 && result != -EAGAIN ? total_read : result; + return rdata->got_bytes > 0 && result != -ECONNABORTED ? + rdata->got_bytes : result; } -static ssize_t -cifs_iovec_read(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *poffset) +static int +cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *rdata_list) { - ssize_t rc; - size_t len, cur_len; - ssize_t total_read = 0; - loff_t offset = *poffset; - unsigned int npages; - struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; - struct cifsFileInfo *open_file; - struct cifs_readdata *rdata, *tmp; - struct list_head rdata_list; + struct cifs_readdata *rdata; + unsigned int npages, rsize, credits; + size_t cur_len; + int rc; pid_t pid; + struct TCP_Server_Info *server; - if (!nr_segs) - return 0; - - len = iov_length(iov, nr_segs); - if (!len) - return 0; - - INIT_LIST_HEAD(&rdata_list); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - open_file = file->private_data; - tcon = tlink_tcon(open_file->tlink); - - if (!tcon->ses->server->ops->async_readv) - return -ENOSYS; + server = tlink_tcon(open_file->tlink)->ses->server; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; - if ((file->f_flags & O_ACCMODE) == O_WRONLY) - cifs_dbg(FYI, "attempting read on write only file instance\n"); - do { - cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize); + rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + &rsize, &credits); + if (rc) + break; + + cur_len = min_t(const size_t, len, rsize); npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); /* allocate a readdata struct */ rdata = cifs_readdata_alloc(npages, cifs_uncached_readv_complete); if (!rdata) { + add_credits_and_wake_if(server, credits, 0); rc = -ENOMEM; - goto error; + break; } rc = cifs_read_allocate_pages(rdata, npages); @@ -2875,78 +2955,135 @@ rdata->pid = pid; rdata->pagesz = PAGE_SIZE; rdata->read_into_pages = cifs_uncached_read_into_pages; + rdata->credits = credits; - rc = cifs_retry_async_readv(rdata); + if (!rdata->cfile->invalidHandle || + !cifs_reopen_file(rdata->cfile, true)) + rc = server->ops->async_readv(rdata); error: if (rc) { + add_credits_and_wake_if(server, rdata->credits, 0); kref_put(&rdata->refcount, cifs_uncached_readdata_release); + if (rc == -EAGAIN) + continue; break; } - list_add_tail(&rdata->list, &rdata_list); + list_add_tail(&rdata->list, rdata_list); offset += cur_len; len -= cur_len; } while (len > 0); + return rc; +} + +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + ssize_t rc; + size_t len; + ssize_t total_read = 0; + loff_t offset = iocb->ki_pos; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct cifsFileInfo *open_file; + struct cifs_readdata *rdata, *tmp; + struct list_head rdata_list; + + len = iov_iter_count(to); + if (!len) + return 0; + + INIT_LIST_HEAD(&rdata_list); + cifs_sb = CIFS_FILE_SB(file); + open_file = file->private_data; + tcon = tlink_tcon(open_file->tlink); + + if (!tcon->ses->server->ops->async_readv) + return -ENOSYS; + + if ((file->f_flags & O_ACCMODE) == O_WRONLY) + cifs_dbg(FYI, "attempting read on write only file instance\n"); + + rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list); + /* if at least one read request send succeeded, then reset rc */ if (!list_empty(&rdata_list)) rc = 0; + len = iov_iter_count(to); /* the loop below should proceed in the order of increasing offsets */ -restart_loop: +again: list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { if (!rc) { - ssize_t copied; - /* FIXME: freezable sleep too? */ rc = wait_for_completion_killable(&rdata->done); if (rc) rc = -EINTR; - else if (rdata->result) + else if (rdata->result == -EAGAIN) { + /* resend call if it's a retryable error */ + struct list_head tmp_list; + unsigned int got_bytes = rdata->got_bytes; + + list_del_init(&rdata->list); + INIT_LIST_HEAD(&tmp_list); + + /* + * Got a part of data and then reconnect has + * happened -- fill the buffer and continue + * reading. + */ + if (got_bytes && got_bytes < rdata->bytes) { + rc = cifs_readdata_to_iov(rdata, to); + if (rc) { + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + continue; + } + } + + rc = cifs_send_async_read( + rdata->offset + got_bytes, + rdata->bytes - got_bytes, + rdata->cfile, cifs_sb, + &tmp_list); + + list_splice(&tmp_list, &rdata_list); + + kref_put(&rdata->refcount, + cifs_uncached_readdata_release); + goto again; + } else if (rdata->result) rc = rdata->result; - else { - rc = cifs_readdata_to_iov(rdata, iov, - nr_segs, *poffset, - &copied); - total_read += copied; - } + else + rc = cifs_readdata_to_iov(rdata, to); - /* resend call if it's a retryable error */ - if (rc == -EAGAIN) { - rc = cifs_retry_async_readv(rdata); - goto restart_loop; - } + /* if there was a short read -- discard anything left */ + if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) + rc = -ENODATA; } list_del_init(&rdata->list); kref_put(&rdata->refcount, cifs_uncached_readdata_release); } + total_read = len - iov_iter_count(to); + cifs_stats_bytes_read(tcon, total_read); - *poffset += total_read; /* mask nodata case */ if (rc == -ENODATA) rc = 0; - return total_read ? total_read : rc; -} - -ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - ssize_t read; - - read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos); - if (read > 0) - iocb->ki_pos = pos; - - return read; + if (total_read) { + iocb->ki_pos += total_read; + return total_read; + } + return rc; } ssize_t -cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); @@ -2964,23 +3101,23 @@ * on pages affected by this read but not on the region from pos to * pos+len-1. */ - if (!cinode->clientCanCacheRead) - return cifs_user_readv(iocb, iov, nr_segs, pos); + if (!CIFS_CACHE_READ(cinode)) + return cifs_user_readv(iocb, to); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) - return generic_file_aio_read(iocb, iov, nr_segs, pos); + return generic_file_read_iter(iocb, to); /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents reading. */ down_read(&cinode->lock_sem); - if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), tcon->ses->server->vals->shared_lock_type, NULL, CIFS_READ_OP)) - rc = generic_file_aio_read(iocb, iov, nr_segs, pos); + rc = generic_file_read_iter(iocb, to); up_read(&cinode->lock_sem); return rc; } @@ -3004,7 +3141,7 @@ __u32 pid; xid = get_xid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); + cifs_sb = CIFS_FILE_SB(file); /* FIXME: set up handlers for larger reads and/or convert to async */ rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize); @@ -3033,18 +3170,19 @@ for (total_read = 0, cur_offset = read_data; read_size > total_read; total_read += bytes_read, cur_offset += bytes_read) { - current_read_size = min_t(uint, read_size - total_read, rsize); - /* - * For windows me and 9x we do not want to request more than it - * negotiated since it will refuse the read then. - */ - if ((tcon->ses) && !(tcon->ses->capabilities & + do { + current_read_size = min_t(uint, read_size - total_read, + rsize); + /* + * For windows me and 9x we do not want to request more + * than it negotiated since it will refuse the read + * then. + */ + if ((tcon->ses) && !(tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files)) { - current_read_size = min_t(uint, current_read_size, - CIFSMaxBufSize); - } - rc = -EAGAIN; - while (rc == -EAGAIN) { + current_read_size = min_t(uint, + current_read_size, CIFSMaxBufSize); + } if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, true); if (rc != 0) @@ -3054,10 +3192,11 @@ io_parms.tcon = tcon; io_parms.offset = *offset; io_parms.length = current_read_size; - rc = server->ops->sync_read(xid, open_file, &io_parms, + rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, &bytes_read, &cur_offset, &buf_type); - } + } while (rc == -EAGAIN); + if (rc || (bytes_read == 0)) { if (total_read) { break; @@ -3087,10 +3226,10 @@ return VM_FAULT_LOCKED; } -static struct vm_operations_struct cifs_file_vm_ops = { +static const struct vm_operations_struct cifs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = cifs_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) @@ -3100,8 +3239,8 @@ xid = get_xid(); - if (!CIFS_I(inode)->clientCanCacheRead) { - rc = cifs_invalidate_mapping(inode); + if (!CIFS_CACHE_READ(CIFS_I(inode))) { + rc = cifs_zap_mapping(inode); if (rc) return rc; } @@ -3135,25 +3274,30 @@ static void cifs_readv_complete(struct work_struct *work) { - unsigned int i; + unsigned int i, got_bytes; struct cifs_readdata *rdata = container_of(work, struct cifs_readdata, work); + got_bytes = rdata->got_bytes; for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; lru_cache_add_file(page); - if (rdata->result == 0) { + if (rdata->result == 0 || + (rdata->result == -EAGAIN && got_bytes)) { flush_dcache_page(page); SetPageUptodate(page); } unlock_page(page); - if (rdata->result == 0) + if (rdata->result == 0 || + (rdata->result == -EAGAIN && got_bytes)) cifs_readpage_to_fscache(rdata->mapping->host, page); + got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes); + page_cache_release(page); rdata->pages[i] = NULL; } @@ -3164,7 +3308,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server, struct cifs_readdata *rdata, unsigned int len) { - int total_read = 0, result = 0; + int result = 0; unsigned int i; u64 eof; pgoff_t eof_index; @@ -3176,6 +3320,7 @@ eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0; cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index); + rdata->got_bytes = 0; rdata->tailsz = PAGE_CACHE_SIZE; for (i = 0; i < nr_pages; i++) { struct page *page = rdata->pages[i]; @@ -3230,10 +3375,70 @@ if (result < 0) break; - total_read += result; + rdata->got_bytes += result; } - return total_read > 0 && result != -EAGAIN ? total_read : result; + return rdata->got_bytes > 0 && result != -ECONNABORTED ? + rdata->got_bytes : result; +} + +static int +readpages_get_pages(struct address_space *mapping, struct list_head *page_list, + unsigned int rsize, struct list_head *tmplist, + unsigned int *nr_pages, loff_t *offset, unsigned int *bytes) +{ + struct page *page, *tpage; + unsigned int expected_index; + int rc; + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); + + INIT_LIST_HEAD(tmplist); + + page = list_entry(page_list->prev, struct page, lru); + + /* + * Lock the page and put it in the cache. Since no one else + * should have access to this page, we're safe to simply set + * PG_locked without checking it first. + */ + __set_page_locked(page); + rc = add_to_page_cache_locked(page, mapping, + page->index, gfp); + + /* give up if we can't stick it in the cache */ + if (rc) { + __clear_page_locked(page); + return rc; + } + + /* move first page to the tmplist */ + *offset = (loff_t)page->index << PAGE_CACHE_SHIFT; + *bytes = PAGE_CACHE_SIZE; + *nr_pages = 1; + list_move_tail(&page->lru, tmplist); + + /* now try and add more pages onto the request */ + expected_index = page->index + 1; + list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { + /* discontinuity ? */ + if (page->index != expected_index) + break; + + /* would this page push the read over the rsize? */ + if (*bytes + PAGE_CACHE_SIZE > rsize) + break; + + __set_page_locked(page); + if (add_to_page_cache_locked(page, mapping, page->index, gfp)) { + __clear_page_locked(page); + break; + } + list_move_tail(&page->lru, tmplist); + (*bytes) += PAGE_CACHE_SIZE; + expected_index++; + (*nr_pages)++; + } + return rc; } static int cifs_readpages(struct file *file, struct address_space *mapping, @@ -3242,22 +3447,16 @@ int rc; struct list_head tmplist; struct cifsFileInfo *open_file = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - unsigned int rsize = cifs_sb->rsize; + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct TCP_Server_Info *server; pid_t pid; /* - * Give up immediately if rsize is too small to read an entire page. - * The VFS will fall back to readpage. We should never reach this - * point however since we set ra_pages to 0 when the rsize is smaller - * than a cache page. - */ - if (unlikely(rsize < PAGE_CACHE_SIZE)) - return 0; - - /* * Reads as many pages as possible from fscache. Returns -ENOBUFS * immediately if the cookie is negative + * + * After this point, every page in the list might have PG_fscache set, + * so we will need to clean that up off of every page we don't use. */ rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, &num_pages); @@ -3270,7 +3469,7 @@ pid = current->tgid; rc = 0; - INIT_LIST_HEAD(&tmplist); + server = tlink_tcon(open_file->tlink)->ses->server; cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", __func__, file, mapping, num_pages); @@ -3287,58 +3486,35 @@ * the rdata->pages, then we want them in increasing order. */ while (!list_empty(page_list)) { - unsigned int i; - unsigned int bytes = PAGE_CACHE_SIZE; - unsigned int expected_index; - unsigned int nr_pages = 1; + unsigned int i, nr_pages, bytes, rsize; loff_t offset; struct page *page, *tpage; struct cifs_readdata *rdata; + unsigned credits; - page = list_entry(page_list->prev, struct page, lru); + rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize, + &rsize, &credits); + if (rc) + break; /* - * Lock the page and put it in the cache. Since no one else - * should have access to this page, we're safe to simply set - * PG_locked without checking it first. + * Give up immediately if rsize is too small to read an entire + * page. The VFS will fall back to readpage. We should never + * reach this point however since we set ra_pages to 0 when the + * rsize is smaller than a cache page. */ - __set_page_locked(page); - rc = add_to_page_cache_locked(page, mapping, - page->index, GFP_KERNEL); + if (unlikely(rsize < PAGE_CACHE_SIZE)) { + add_credits_and_wake_if(server, credits, 0); + return 0; + } - /* give up if we can't stick it in the cache */ + rc = readpages_get_pages(mapping, page_list, rsize, &tmplist, + &nr_pages, &offset, &bytes); if (rc) { - __clear_page_locked(page); + add_credits_and_wake_if(server, credits, 0); break; } - /* move first page to the tmplist */ - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; - list_move_tail(&page->lru, &tmplist); - - /* now try and add more pages onto the request */ - expected_index = page->index + 1; - list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { - /* discontinuity ? */ - if (page->index != expected_index) - break; - - /* would this page push the read over the rsize? */ - if (bytes + PAGE_CACHE_SIZE > rsize) - break; - - __set_page_locked(page); - if (add_to_page_cache_locked(page, mapping, - page->index, GFP_KERNEL)) { - __clear_page_locked(page); - break; - } - list_move_tail(&page->lru, &tmplist); - bytes += PAGE_CACHE_SIZE; - expected_index++; - nr_pages++; - } - rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); if (!rdata) { /* best to give up if we're out of mem */ @@ -3349,6 +3525,7 @@ page_cache_release(page); } rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); break; } @@ -3359,20 +3536,25 @@ rdata->pid = pid; rdata->pagesz = PAGE_CACHE_SIZE; rdata->read_into_pages = cifs_readpages_read_into_pages; + rdata->credits = credits; list_for_each_entry_safe(page, tpage, &tmplist, lru) { list_del(&page->lru); rdata->pages[rdata->nr_pages++] = page; } - rc = cifs_retry_async_readv(rdata); - if (rc != 0) { + if (!rdata->cfile->invalidHandle || + !cifs_reopen_file(rdata->cfile, true)) + rc = server->ops->async_readv(rdata); + if (rc) { + add_credits_and_wake_if(server, rdata->credits, 0); for (i = 0; i < rdata->nr_pages; i++) { page = rdata->pages[i]; lru_cache_add_file(page); unlock_page(page); page_cache_release(page); } + /* Fallback to the readpage in error/reconnect cases */ kref_put(&rdata->refcount, cifs_readdata_release); break; } @@ -3380,9 +3562,17 @@ kref_put(&rdata->refcount, cifs_readdata_release); } + /* Any pages that have been shown to fscache but didn't get added to + * the pagecache must be uncached before they get returned to the + * allocator. + */ + cifs_fscache_readpages_cancel(mapping->host, page_list); return rc; } +/* + * cifs_readpage_worker must be called with the page pinned + */ static int cifs_readpage_worker(struct file *file, struct page *page, loff_t *poffset) { @@ -3394,7 +3584,6 @@ if (rc == 0) goto read_complete; - page_cache_get(page); read_data = kmap(page); /* for reads over a certain size could initiate async read ahead */ @@ -3421,7 +3610,7 @@ io_error: kunmap(page); - page_cache_release(page); + unlock_page(page); read_complete: return rc; @@ -3446,8 +3635,6 @@ rc = cifs_readpage_worker(file, page, &offset); - unlock_page(page); - free_xid(xid); return rc; } @@ -3455,15 +3642,17 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode) { struct cifsFileInfo *open_file; + struct cifs_tcon *tcon = + cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb)); - spin_lock(&cifs_file_list_lock); + spin_lock(&tcon->open_file_lock); list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { - spin_unlock(&cifs_file_list_lock); + spin_unlock(&tcon->open_file_lock); return 1; } } - spin_unlock(&cifs_file_list_lock); + spin_unlock(&tcon->open_file_lock); return 0; } @@ -3501,6 +3690,7 @@ loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int oncethru = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; loff_t offset = pos & (PAGE_CACHE_SIZE - 1); loff_t page_start = pos & PAGE_MASK; @@ -3510,6 +3700,7 @@ cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); +start: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { rc = -ENOMEM; @@ -3533,7 +3724,7 @@ * is, when the page lies beyond the EOF, or straddles the EOF * and the write will cover all of the existing data. */ - if (CIFS_I(mapping->host)->clientCanCacheRead) { + if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { i_size = i_size_read(mapping->host); if (page_start >= i_size || (offset == 0 && (pos + len) >= i_size)) { @@ -3551,13 +3742,16 @@ } } - if ((file->f_flags & O_ACCMODE) != O_WRONLY) { + if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { /* * might as well read a page, it is fast enough. If we get * an error, we don't need to return it. cifs_write_end will * do a sync write instead since PG_uptodate isn't set. */ cifs_readpage_worker(file, page, &page_start); + page_cache_release(page); + oncethru = 1; + goto start; } else { /* we could try using another file handle if there is one - but how would we lock it to prevent close of that handle @@ -3577,11 +3771,12 @@ return cifs_fscache_release_page(page, gfp); } -static void cifs_invalidate_page(struct page *page, unsigned long offset) +static void cifs_invalidate_page(struct page *page, unsigned int offset, + unsigned int length) { struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); - if (offset == 0) + if (offset == 0 && length == PAGE_CACHE_SIZE) cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); } @@ -3610,28 +3805,35 @@ { struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, oplock_break); - struct inode *inode = cfile->dentry->d_inode; + struct inode *inode = d_inode(cfile->dentry); struct cifsInodeInfo *cinode = CIFS_I(inode); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; int rc = 0; - if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead && + wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, + TASK_UNINTERRUPTIBLE); + + server->ops->downgrade_oplock(server, cinode, + test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags)); + + if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && cifs_has_mand_locks(cinode)) { cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", inode); - cinode->clientCanCacheRead = false; + cinode->oplock = 0; } if (inode && S_ISREG(inode->i_mode)) { - if (cinode->clientCanCacheRead) + if (CIFS_CACHE_READ(cinode)) break_lease(inode, O_RDONLY); else break_lease(inode, O_WRONLY); rc = filemap_fdatawrite(inode->i_mapping); - if (cinode->clientCanCacheRead == 0) { + if (!CIFS_CACHE_READ(cinode)) { rc = filemap_fdatawait(inode->i_mapping); mapping_set_error(inode->i_mapping, rc); - cifs_invalidate_mapping(inode); + cifs_zap_mapping(inode); } cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); } @@ -3651,8 +3853,29 @@ cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } + cifs_done_oplock_break(cinode); +} + +/* + * The presence of cifs_direct_io() in the address space ops vector + * allowes open() O_DIRECT flags which would have failed otherwise. + * + * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests + * so this method should never be called. + * + * Direct IO is not yet supported in the cached mode. + */ +static ssize_t +cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +{ + /* + * FIXME + * Eventually need to support direct IO for non forcedirectio mounts + */ + return -EINVAL; } + const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, @@ -3662,6 +3885,7 @@ .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = cifs_release_page, + .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, };