--- zzzz-none-000/linux-3.10.107/fs/block_dev.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/block_dev.c 2021-02-04 17:41:59.000000000 +0000 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,7 @@ #include #include #include -#include +#include #include #include "internal.h" @@ -43,39 +44,30 @@ return container_of(inode, struct bdev_inode, vfs_inode); } -inline struct block_device *I_BDEV(struct inode *inode) +struct block_device *I_BDEV(struct inode *inode) { return &BDEV_I(inode)->bdev; } EXPORT_SYMBOL(I_BDEV); -/* - * Move the inode from its current bdi to a new bdi. If the inode is dirty we - * need to move it onto the dirty list of @dst so that the inode is always on - * the right list. - */ -static void bdev_inode_switch_bdi(struct inode *inode, - struct backing_dev_info *dst) +static void bdev_write_inode(struct block_device *bdev) { - struct backing_dev_info *old = inode->i_data.backing_dev_info; - bool wakeup_bdi = false; + struct inode *inode = bdev->bd_inode; + int ret; - if (unlikely(dst == old)) /* deadlock avoidance */ - return; - bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); - inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) { - if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) - wakeup_bdi = true; - list_move(&inode->i_wb_list, &dst->wb.b_dirty); + while (inode->i_state & I_DIRTY) { + spin_unlock(&inode->i_lock); + ret = write_inode_now(inode, true); + if (ret) { + char name[BDEVNAME_SIZE]; + pr_warn_ratelimited("VFS: Dirty inode writeback failed " + "for block device %s (err=%d).\n", + bdevname(bdev, name), ret); + } + spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); - spin_unlock(&old->wb.list_lock); - spin_unlock(&dst->wb.list_lock); - - if (wakeup_bdi) - bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ @@ -83,7 +75,7 @@ { struct address_space *mapping = bdev->bd_inode->i_mapping; - if (mapping->nrpages == 0) + if (mapping->nrpages == 0 && mapping->nrshadows == 0) return; invalidate_bh_lrus(); @@ -165,14 +157,17 @@ } static ssize_t -blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, - nr_segs, blkdev_get_block, NULL, NULL, 0); + if (IS_DAX(inode)) + return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, + NULL, DIO_SKIP_DIO_COUNT); + return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, + blkdev_get_block, NULL, NULL, + DIO_SKIP_DIO_COUNT); } int __sync_blockdev(struct block_device *bdev, int wait) @@ -244,7 +239,10 @@ sb = get_active_super(bdev); if (!sb) goto out; - error = freeze_super(sb); + if (sb->s_op->freeze_super) + error = sb->s_op->freeze_super(sb); + else + error = freeze_super(sb); if (error) { deactivate_super(sb); bdev->bd_fsfreeze_count--; @@ -281,7 +279,10 @@ if (!sb) goto out; - error = thaw_super(sb); + if (sb->s_op->thaw_super) + error = sb->s_op->thaw_super(sb); + else + error = thaw_super(sb); if (error) { bdev->bd_fsfreeze_count++; mutex_unlock(&bdev->bd_fsfreeze_mutex); @@ -303,6 +304,12 @@ return block_read_full_page(page, blkdev_get_block); } +static int blkdev_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); +} + static int blkdev_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -332,31 +339,10 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence) { struct inode *bd_inode = file->f_mapping->host; - loff_t size; loff_t retval; mutex_lock(&bd_inode->i_mutex); - size = i_size_read(bd_inode); - - retval = -EINVAL; - switch (whence) { - case SEEK_END: - offset += size; - break; - case SEEK_CUR: - offset += file->f_pos; - case SEEK_SET: - break; - default: - goto out; - } - if (offset >= 0 && offset <= size) { - if (offset != file->f_pos) { - file->f_pos = offset; - } - retval = offset; - } -out: + retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); mutex_unlock(&bd_inode->i_mutex); return retval; } @@ -384,6 +370,129 @@ } EXPORT_SYMBOL(blkdev_fsync); +/** + * bdev_read_page() - Start reading a page from a block device + * @bdev: The device to read the page from + * @sector: The offset on the device to read the page to (need not be aligned) + * @page: The page to read + * + * On entry, the page should be locked. It will be unlocked when the page + * has been read. If the block driver implements rw_page synchronously, + * that will be true on exit from this function, but it need not be. + * + * Errors returned by this function are usually "soft", eg out of memory, or + * queue full; callers should try a different route to read this page rather + * than propagate an error back up the stack. + * + * Return: negative errno if an error occurs, 0 if submission was successful. + */ +int bdev_read_page(struct block_device *bdev, sector_t sector, + struct page *page) +{ + const struct block_device_operations *ops = bdev->bd_disk->fops; + int result = -EOPNOTSUPP; + + if (!ops->rw_page || bdev_get_integrity(bdev)) + return result; + + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + blk_queue_exit(bdev->bd_queue); + return result; +} +EXPORT_SYMBOL_GPL(bdev_read_page); + +/** + * bdev_write_page() - Start writing a page to a block device + * @bdev: The device to write the page to + * @sector: The offset on the device to write the page to (need not be aligned) + * @page: The page to write + * @wbc: The writeback_control for the write + * + * On entry, the page should be locked and not currently under writeback. + * On exit, if the write started successfully, the page will be unlocked and + * under writeback. If the write failed already (eg the driver failed to + * queue the page to the device), the page will still be locked. If the + * caller is a ->writepage implementation, it will need to unlock the page. + * + * Errors returned by this function are usually "soft", eg out of memory, or + * queue full; callers should try a different route to write this page rather + * than propagate an error back up the stack. + * + * Return: negative errno if an error occurs, 0 if submission was successful. + */ +int bdev_write_page(struct block_device *bdev, sector_t sector, + struct page *page, struct writeback_control *wbc) +{ + int result; + int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; + const struct block_device_operations *ops = bdev->bd_disk->fops; + + if (!ops->rw_page || bdev_get_integrity(bdev)) + return -EOPNOTSUPP; + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + + set_page_writeback(page); + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); + if (result) + end_page_writeback(page); + else + unlock_page(page); + blk_queue_exit(bdev->bd_queue); + return result; +} +EXPORT_SYMBOL_GPL(bdev_write_page); + +/** + * bdev_direct_access() - Get the address for directly-accessibly memory + * @bdev: The device containing the memory + * @sector: The offset within the device + * @addr: Where to put the address of the memory + * @pfn: The Page Frame Number for the memory + * @size: The number of bytes requested + * + * If a block device is made up of directly addressable memory, this function + * will tell the caller the PFN and the address of the memory. The address + * may be directly dereferenced within the kernel without the need to call + * ioremap(), kmap() or similar. The PFN is suitable for inserting into + * page tables. + * + * Return: negative errno if an error occurs, otherwise the number of bytes + * accessible at this address. + */ +long bdev_direct_access(struct block_device *bdev, sector_t sector, + void __pmem **addr, unsigned long *pfn, long size) +{ + long avail; + const struct block_device_operations *ops = bdev->bd_disk->fops; + + /* + * The device driver is allowed to sleep, in order to make the + * memory directly accessible. + */ + might_sleep(); + + if (size < 0) + return size; + if (!ops->direct_access) + return -EOPNOTSUPP; + if ((sector + DIV_ROUND_UP(size, 512)) > + part_nr_sects_read(bdev->bd_part)) + return -ERANGE; + sector += get_start_sect(bdev); + if (sector % (PAGE_SIZE / 512)) + return -EINVAL; + avail = ops->direct_access(bdev, sector, addr, pfn); + if (!avail) + return -ERANGE; + return min(avail, size); +} +EXPORT_SYMBOL_GPL(bdev_direct_access); + /* * pseudo-fs */ @@ -440,7 +549,7 @@ { struct block_device *bdev = &BDEV_I(inode)->bdev; struct list_head *p; - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); invalidate_inode_buffers(inode); /* is it needed here? */ clear_inode(inode); spin_lock(&bdev_lock); @@ -471,7 +580,8 @@ .kill_sb = kill_anon_super, }; -static struct super_block *blockdev_superblock __read_mostly; +struct super_block *blockdev_superblock __read_mostly; +EXPORT_SYMBOL_GPL(blockdev_superblock); void __init bdev_cache_init(void) { @@ -539,7 +649,6 @@ inode->i_bdev = bdev; inode->i_data.a_ops = &def_blk_aops; mapping_set_gfp_mask(&inode->i_data, GFP_USER); - inode->i_data.backing_dev_info = &default_backing_dev_info; spin_lock(&bdev_lock); list_add(&bdev->bd_list, &all_bdevs); spin_unlock(&bdev_lock); @@ -613,11 +722,6 @@ return bdev; } -static inline int sb_is_blkdev_sb(struct super_block *sb) -{ - return sb == blockdev_superblock; -} - /* Call when you free inode */ void bd_forget(struct inode *inode) @@ -994,7 +1098,7 @@ if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - + blk_integrity_revalidate(disk); bdev = bdget_disk(disk, 0); if (!bdev) return ret; @@ -1099,9 +1203,8 @@ bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; if (!partno) { - struct backing_dev_info *bdi; - ret = -ENXIO; bdev->bd_part = disk_get_part(disk, partno); if (!bdev->bd_part) @@ -1127,13 +1230,8 @@ } } - if (!ret) { + if (!ret) bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); - bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - bdi = &default_backing_dev_info; - bdev_inode_switch_bdi(bdev->bd_inode, bdi); - } /* * If the device is invalidated, rescan partition @@ -1160,8 +1258,6 @@ if (ret) goto out_clear; bdev->bd_contains = whole; - bdev_inode_switch_bdi(bdev->bd_inode, - whole->bd_inode->i_data.backing_dev_info); bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || !bdev->bd_part || !bdev->bd_part->nr_sects) { @@ -1169,6 +1265,13 @@ goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + /* + * If the partition is not aligned on a page + * boundary, we can't do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || + (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { @@ -1201,7 +1304,6 @@ bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; - bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; @@ -1421,11 +1523,14 @@ WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); - /* ->release can cause the old bdi to disappear, - * so must switch it out first + + bdev_write_inode(bdev); + /* + * Detaching bdev inode from its wb in __destroy_inode() + * is too late: the queue which embeds its bdi (along with + * root wb) can be gone as soon as we put_disk() below. */ - bdev_inode_switch_bdi(bdev->bd_inode, - &default_backing_dev_info); + inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) @@ -1529,44 +1634,53 @@ * Does not take i_mutex for the write and thus is not for general purpose * use. */ -ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; + struct inode *bd_inode = file->f_mapping->host; + loff_t size = i_size_read(bd_inode); struct blk_plug plug; ssize_t ret; - BUG_ON(iocb->ki_pos != pos); + if (bdev_read_only(I_BDEV(bd_inode))) + return -EPERM; + + if (!iov_iter_count(from)) + return 0; + + if (iocb->ki_pos >= size) + return -ENOSPC; + + iov_iter_truncate(from, size - iocb->ki_pos); blk_start_plug(&plug); - ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); - if (ret > 0 || ret == -EIOCBQUEUED) { + ret = __generic_file_write_iter(iocb, from); + if (ret > 0) { ssize_t err; - - err = generic_write_sync(file, pos, ret); - if (err < 0 && ret > 0) + err = generic_write_sync(file, iocb->ki_pos - ret, ret); + if (err < 0) ret = err; } blk_finish_plug(&plug); return ret; } -EXPORT_SYMBOL_GPL(blkdev_aio_write); +EXPORT_SYMBOL_GPL(blkdev_write_iter); -static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *bd_inode = file->f_mapping->host; loff_t size = i_size_read(bd_inode); + loff_t pos = iocb->ki_pos; if (pos >= size) return 0; size -= pos; - if (size < iocb->ki_left) - nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); - return generic_file_aio_read(iocb, iov, nr_segs, pos); + iov_iter_truncate(to, size); + return generic_file_read_iter(iocb, to); } +EXPORT_SYMBOL_GPL(blkdev_read_iter); /* * Try to release a page associated with block device when the system @@ -1584,22 +1698,22 @@ static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, + .readpages = blkdev_readpages, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, .writepages = generic_writepages, .releasepage = blkdev_releasepage, .direct_IO = blkdev_direct_IO, + .is_dirty_writeback = buffer_check_dirty_writeback, }; const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = blkdev_aio_read, - .aio_write = blkdev_aio_write, + .read_iter = blkdev_read_iter, + .write_iter = blkdev_write_iter, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, @@ -1607,7 +1721,7 @@ .compat_ioctl = compat_blkdev_ioctl, #endif .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) @@ -1644,7 +1758,7 @@ if (error) return ERR_PTR(error); - inode = path.dentry->d_inode; + inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; @@ -1689,7 +1803,7 @@ { struct inode *inode, *old_inode = NULL; - spin_lock(&inode_sb_list_lock); + spin_lock(&blockdev_superblock->s_inode_list_lock); list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { struct address_space *mapping = inode->i_mapping; struct block_device *bdev; @@ -1702,13 +1816,13 @@ } __iget(inode); spin_unlock(&inode->i_lock); - spin_unlock(&inode_sb_list_lock); + spin_unlock(&blockdev_superblock->s_inode_list_lock); /* * We hold a reference to 'inode' so it couldn't have been * removed from s_inodes list while we dropped the - * inode_sb_list_lock. We cannot iput the inode now as we can + * s_inode_list_lock We cannot iput the inode now as we can * be holding the last reference and we cannot iput it under - * inode_sb_list_lock. So we keep the reference and iput it + * s_inode_list_lock. So we keep the reference and iput it * later. */ iput(old_inode); @@ -1720,8 +1834,8 @@ func(bdev, arg); mutex_unlock(&bdev->bd_mutex); - spin_lock(&inode_sb_list_lock); + spin_lock(&blockdev_superblock->s_inode_list_lock); } - spin_unlock(&inode_sb_list_lock); + spin_unlock(&blockdev_superblock->s_inode_list_lock); iput(old_inode); }