--- zzzz-none-000/linux-3.10.107/fs/ext4/super.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/ext4/super.c 2021-02-04 17:41:59.000000000 +0000 @@ -21,10 +21,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -54,11 +53,10 @@ #define CREATE_TRACE_POINTS #include -static struct proc_dir_entry *ext4_proc_root; -static struct kset *ext4_kset; static struct ext4_lazy_init *ext4_li_info; static struct mutex ext4_li_mtx; -static struct ext4_features *ext4_feat; +static int ext4_mballoc_ready; +static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); @@ -81,9 +79,8 @@ static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); -static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); -#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", @@ -99,7 +96,6 @@ #endif -#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", @@ -110,15 +106,11 @@ MODULE_ALIAS_FS("ext3"); MODULE_ALIAS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) -#else -#define IS_EXT3_SB(sb) (0) -#endif static int ext4_verify_csum_type(struct super_block *sb, struct ext4_super_block *es) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_feature_metadata_csum(sb)) return 1; return es->s_checksum_type == EXT4_CRC32C_CHKSUM; @@ -136,11 +128,10 @@ return cpu_to_le32(csum); } -int ext4_superblock_csum_verify(struct super_block *sb, - struct ext4_super_block *es) +static int ext4_superblock_csum_verify(struct super_block *sb, + struct ext4_super_block *es) { - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return 1; return es->s_checksum == ext4_superblock_csum(sb, es); @@ -150,8 +141,7 @@ { struct ext4_super_block *es = EXT4_SB(sb)->s_es; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_metadata_csum(sb)) return; es->s_checksum = ext4_superblock_csum(sb, es); @@ -161,7 +151,7 @@ { void *ret; - ret = kmalloc(size, flags); + ret = kmalloc(size, flags | __GFP_NOWARN); if (!ret) ret = __vmalloc(size, flags, PAGE_KERNEL); return ret; @@ -171,21 +161,12 @@ { void *ret; - ret = kzalloc(size, flags); + ret = kzalloc(size, flags | __GFP_NOWARN); if (!ret) ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); return ret; } -void ext4_kvfree(void *ptr) -{ - if (is_vmalloc_addr(ptr)) - vfree(ptr); - else - kfree(ptr); - -} - ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { @@ -305,6 +286,8 @@ struct ext4_super_block *es = EXT4_SB(sb)->s_es; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + if (bdev_read_only(sb->s_bdev)) + return; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); es->s_last_error_time = cpu_to_le32(get_seconds()); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); @@ -344,7 +327,7 @@ static int block_device_ejected(struct super_block *sb) { struct inode *bd_inode = sb->s_bdev->bd_inode; - struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; + struct backing_dev_info *bdi = inode_to_bdi(bd_inode); return bdi->dev == NULL; } @@ -398,6 +381,11 @@ } if (test_opt(sb, ERRORS_RO)) { ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); + /* + * Make sure updated value of ->s_mount_flags will be visible + * before ->s_flags update + */ + smp_wmb(); sb->s_flags |= MS_RDONLY; } if (test_opt(sb, ERRORS_PANIC)) { @@ -409,26 +397,32 @@ } } +#define ext4_error_ratelimit(sb) \ + ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ + "EXT4-fs error") + void __ext4_error(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) { struct va_format vaf; va_list args; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", - sb->s_id, function, line, current->comm, &vaf); - va_end(args); + if (ext4_error_ratelimit(sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", + sb->s_id, function, line, current->comm, &vaf); + va_end(args); + } save_error_info(sb, function, line); - ext4_handle_error(sb); } -void ext4_error_inode(struct inode *inode, const char *function, - unsigned int line, ext4_fsblk_t block, - const char *fmt, ...) +void __ext4_error_inode(struct inode *inode, const char *function, + unsigned int line, ext4_fsblk_t block, + const char *fmt, ...) { va_list args; struct va_format vaf; @@ -436,28 +430,29 @@ es->s_last_error_ino = cpu_to_le32(inode->i_ino); es->s_last_error_block = cpu_to_le64(block); + if (ext4_error_ratelimit(inode->i_sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: block %llu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, &vaf); + else + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, &vaf); + va_end(args); + } save_error_info(inode->i_sb, function, line); - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: block %llu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, &vaf); - else - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, &vaf); - va_end(args); - ext4_handle_error(inode->i_sb); } -void ext4_error_file(struct file *file, const char *function, - unsigned int line, ext4_fsblk_t block, - const char *fmt, ...) +void __ext4_error_file(struct file *file, const char *function, + unsigned int line, ext4_fsblk_t block, + const char *fmt, ...) { va_list args; struct va_format vaf; @@ -467,27 +462,28 @@ es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); + if (ext4_error_ratelimit(inode->i_sb)) { + path = file_path(file, pathname, sizeof(pathname)); + if (IS_ERR(path)) + path = "(unknown)"; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "block %llu: comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, path, &vaf); + else + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, path, &vaf); + va_end(args); + } save_error_info(inode->i_sb, function, line); - path = d_path(&(file->f_path), pathname, sizeof(pathname)); - if (IS_ERR(path)) - path = "(unknown)"; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "block %llu: comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, path, &vaf); - else - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, path, &vaf); - va_end(args); - ext4_handle_error(inode->i_sb); } @@ -497,6 +493,12 @@ char *errstr = NULL; switch (errno) { + case -EFSCORRUPTED: + errstr = "Corrupt filesystem"; + break; + case -EFSBADCRC: + errstr = "Filesystem failed CRC"; + break; case -EIO: errstr = "IO failure"; break; @@ -541,11 +543,13 @@ (sb->s_flags & MS_RDONLY)) return; - errstr = ext4_decode_error(sb, errno, nbuf); - printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", - sb->s_id, function, line, errstr); - save_error_info(sb, function, line); + if (ext4_error_ratelimit(sb)) { + errstr = ext4_decode_error(sb, errno, nbuf); + printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", + sb->s_id, function, line, errstr); + } + save_error_info(sb, function, line); ext4_handle_error(sb); } @@ -574,8 +578,13 @@ if ((sb->s_flags & MS_RDONLY) == 0) { ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); - sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; + /* + * Make sure updated value of ->s_mount_flags will be visible + * before ->s_flags update + */ + smp_wmb(); + sb->s_flags |= MS_RDONLY; if (EXT4_SB(sb)->s_journal) jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); @@ -588,11 +597,15 @@ } } -void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) +void __ext4_msg(struct super_block *sb, + const char *prefix, const char *fmt, ...) { struct va_format vaf; va_list args; + if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) + return; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -600,12 +613,19 @@ va_end(args); } +#define ext4_warning_ratelimit(sb) \ + ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \ + "EXT4-fs warning") + void __ext4_warning(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) { struct va_format vaf; va_list args; + if (!ext4_warning_ratelimit(sb)) + return; + va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -614,6 +634,24 @@ va_end(args); } +void __ext4_warning_inode(const struct inode *inode, const char *function, + unsigned int line, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (!ext4_warning_ratelimit(inode->i_sb)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: " + "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id, + function, line, inode->i_ino, current->comm, &vaf); + va_end(args); +} + void __ext4_grp_locked_error(const char *function, unsigned int line, struct super_block *sb, ext4_group_t grp, unsigned long ino, ext4_fsblk_t block, @@ -629,18 +667,20 @@ es->s_last_error_block = cpu_to_le64(block); __save_error_info(sb, function, line); - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", - sb->s_id, function, line, grp); - if (ino) - printk(KERN_CONT "inode %lu: ", ino); - if (block) - printk(KERN_CONT "block %llu:", (unsigned long long) block); - printk(KERN_CONT "%pV\n", &vaf); - va_end(args); + if (ext4_error_ratelimit(sb)) { + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", + sb->s_id, function, line, grp); + if (ino) + printk(KERN_CONT "inode %lu: ", ino); + if (block) + printk(KERN_CONT "block %llu:", + (unsigned long long) block); + printk(KERN_CONT "%pV\n", &vaf); + va_end(args); + } if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); @@ -759,8 +799,8 @@ ext4_unregister_li_request(sb); dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); - flush_workqueue(sbi->dio_unwritten_wq); - destroy_workqueue(sbi->dio_unwritten_wq); + flush_workqueue(sbi->rsv_conversion_wq); + destroy_workqueue(sbi->rsv_conversion_wq); if (sbi->s_journal) { aborted = is_journal_aborted(sbi->s_journal); @@ -770,38 +810,32 @@ ext4_abort(sb, "Couldn't clean up the journal"); } - ext4_es_unregister_shrinker(sb); - del_timer(&sbi->s_err_report); + ext4_unregister_sysfs(sb); + ext4_es_unregister_shrinker(sbi); + del_timer_sync(&sbi->s_err_report); ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY) && !aborted) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); es->s_state = cpu_to_le16(sbi->s_mount_state); } if (!(sb->s_flags & MS_RDONLY)) ext4_commit_super(sb, 1); - if (sbi->s_proc) { - remove_proc_entry("options", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } - kobject_del(&sbi->s_kobj); - for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); - ext4_kvfree(sbi->s_group_desc); - ext4_kvfree(sbi->s_flex_groups); + kvfree(sbi->s_group_desc); + kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); #endif @@ -825,6 +859,10 @@ invalidate_bdev(sbi->journal_bdev); ext4_blkdev_remove(sbi); } + if (sbi->s_mb_cache) { + ext4_xattr_destroy_cache(sbi->s_mb_cache); + sbi->s_mb_cache = NULL; + } if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); sb->s_fs_info = NULL; @@ -854,12 +892,15 @@ return NULL; ei->vfs_inode.i_version = 1; + spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); ext4_es_init_tree(&ei->i_es_tree); rwlock_init(&ei->i_es_lock); - INIT_LIST_HEAD(&ei->i_es_lru); - ei->i_es_lru_nr = 0; + INIT_LIST_HEAD(&ei->i_es_list); + ei->i_es_all_nr = 0; + ei->i_es_shk_nr = 0; + ei->i_es_shrink_lblk = 0; ei->i_reserved_data_blocks = 0; ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; @@ -868,16 +909,19 @@ spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif ei->jinode = NULL; - INIT_LIST_HEAD(&ei->i_completed_io_list); + INIT_LIST_HEAD(&ei->i_rsv_conversion_list); spin_lock_init(&ei->i_completed_io_lock); ei->i_sync_tid = 0; ei->i_datasync_tid = 0; atomic_set(&ei->i_ioend_count, 0); atomic_set(&ei->i_unwritten, 0); - INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work); - + INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); +#ifdef CONFIG_EXT4_FS_ENCRYPTION + ei->i_crypt_info = NULL; +#endif return &ei->vfs_inode; } @@ -916,10 +960,11 @@ INIT_LIST_HEAD(&ei->i_orphan); init_rwsem(&ei->xattr_sem); init_rwsem(&ei->i_data_sem); + init_rwsem(&ei->i_mmap_sem); inode_init_once(&ei->vfs_inode); } -static int init_inodecache(void) +static int __init init_inodecache(void) { ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", sizeof(struct ext4_inode_info), @@ -948,13 +993,16 @@ dquot_drop(inode); ext4_discard_preallocations(inode); ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); - ext4_es_lru_del(inode); if (EXT4_I(inode)->jinode) { jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), EXT4_I(inode)->jinode); jbd2_free_inode(EXT4_I(inode)->jinode); EXT4_I(inode)->jinode = NULL; } +#ifdef CONFIG_EXT4_FS_ENCRYPTION + if (EXT4_I(inode)->i_crypt_info) + ext4_free_encryption_info(inode, EXT4_I(inode)->i_crypt_info); +#endif } static struct inode *ext4_nfs_get_inode(struct super_block *sb, @@ -1016,7 +1064,7 @@ return 0; if (journal) return jbd2_journal_try_to_free_buffers(journal, page, - wait & ~__GFP_WAIT); + wait & ~__GFP_DIRECT_RECLAIM); return try_to_free_buffers(page); } @@ -1031,10 +1079,7 @@ static int ext4_write_info(struct super_block *sb, int type); static int ext4_quota_on(struct super_block *sb, int type, int format_id, struct path *path); -static int ext4_quota_on_sysfile(struct super_block *sb, int type, - int format_id); static int ext4_quota_off(struct super_block *sb, int type); -static int ext4_quota_off_sysfile(struct super_block *sb, int type); static int ext4_quota_on_mount(struct super_block *sb, int type); static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); @@ -1044,6 +1089,11 @@ unsigned int flags); static int ext4_enable_quotas(struct super_block *sb); +static struct dquot **ext4_get_dquots(struct inode *inode) +{ + return EXT4_I(inode)->i_dquot; +} + static const struct dquot_operations ext4_quota_operations = { .get_reserved_space = ext4_get_reserved_space, .write_dquot = ext4_write_dquot, @@ -1059,17 +1109,7 @@ .quota_on = ext4_quota_on, .quota_off = ext4_quota_off, .quota_sync = dquot_quota_sync, - .get_info = dquot_get_dqinfo, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .set_dqblk = dquot_set_dqblk -}; - -static const struct quotactl_ops ext4_qctl_sysfile_operations = { - .quota_on_meta = ext4_quota_on_sysfile, - .quota_off = ext4_quota_off_sysfile, - .quota_sync = dquot_quota_sync, - .get_info = dquot_get_dqinfo, + .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, .get_dqblk = dquot_get_dqblk, .set_dqblk = dquot_set_dqblk @@ -1093,24 +1133,7 @@ #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, .quota_write = ext4_quota_write, -#endif - .bdev_try_to_free_page = bdev_try_to_free_page, -}; - -static const struct super_operations ext4_nojournal_sops = { - .alloc_inode = ext4_alloc_inode, - .destroy_inode = ext4_destroy_inode, - .write_inode = ext4_write_inode, - .dirty_inode = ext4_dirty_inode, - .drop_inode = ext4_drop_inode, - .evict_inode = ext4_evict_inode, - .put_super = ext4_put_super, - .statfs = ext4_statfs, - .remount_fs = ext4_remount, - .show_options = ext4_show_options, -#ifdef CONFIG_QUOTA - .quota_read = ext4_quota_read, - .quota_write = ext4_quota_write, + .get_dquots = ext4_get_dquots, #endif .bdev_try_to_free_page = bdev_try_to_free_page, }; @@ -1127,20 +1150,21 @@ Opt_nouid32, Opt_debug, Opt_removed, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, - Opt_commit, Opt_min_batch_time, Opt_max_batch_time, - Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, + Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, + Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, + Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, - Opt_usrquota, Opt_grpquota, Opt_i_version, + Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, + Opt_lazytime, Opt_nolazytime, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, - Opt_max_dir_size_kb, + Opt_max_dir_size_kb, Opt_nojournal_checksum, }; static const match_table_t tokens = { @@ -1172,7 +1196,9 @@ {Opt_min_batch_time, "min_batch_time=%u"}, {Opt_max_batch_time, "max_batch_time=%u"}, {Opt_journal_dev, "journal_dev=%u"}, + {Opt_journal_path, "journal_path=%s"}, {Opt_journal_checksum, "journal_checksum"}, + {Opt_nojournal_checksum, "nojournal_checksum"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_abort, "abort"}, {Opt_data_journal, "data=journal"}, @@ -1195,8 +1221,11 @@ {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_i_version, "i_version"}, + {Opt_dax, "dax"}, {Opt_stripe, "stripe=%u"}, {Opt_delalloc, "delalloc"}, + {Opt_lazytime, "lazytime"}, + {Opt_nolazytime, "nolazytime"}, {Opt_nodelalloc, "nodelalloc"}, {Opt_removed, "mblk_io_submit"}, {Opt_removed, "nomblk_io_submit"}, @@ -1215,6 +1244,7 @@ {Opt_init_itable, "init_itable"}, {Opt_noinit_itable, "noinit_itable"}, {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, + {Opt_test_dummy_encryption, "test_dummy_encryption"}, {Opt_removed, "check=none"}, /* mount option from ext2/3 */ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ {Opt_removed, "reservation"}, /* mount option from ext2/3 */ @@ -1264,10 +1294,10 @@ "quota options when quota turned on"); return -1; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { - ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " - "when QUOTA feature is enabled"); - return -1; + if (ext4_has_feature_quota(sb)) { + ext4_msg(sb, KERN_INFO, "Journaled quota options " + "ignored when QUOTA feature is enabled"); + return 1; } qname = match_strdup(args); if (!qname) { @@ -1331,6 +1361,7 @@ #define MOPT_NO_EXT2 0x0100 #define MOPT_NO_EXT3 0x0200 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) +#define MOPT_STRING 0x0400 static const struct mount_opts { int token; @@ -1353,11 +1384,13 @@ MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_EXT4_ONLY | MOPT_CLEAR}, + {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, + MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, - MOPT_EXT4_ONLY | MOPT_SET}, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | EXT4_MOUNT_JOURNAL_CHECKSUM), - MOPT_EXT4_ONLY | MOPT_SET}, + MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, @@ -1376,11 +1409,13 @@ {Opt_min_batch_time, 0, MOPT_GTE0}, {Opt_inode_readahead_blks, 0, MOPT_GTE0}, {Opt_init_itable, 0, MOPT_GTE0}, + {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET}, {Opt_stripe, 0, MOPT_GTE0}, {Opt_resuid, 0, MOPT_GTE0}, {Opt_resgid, 0, MOPT_GTE0}, - {Opt_journal_dev, 0, MOPT_GTE0}, - {Opt_journal_ioprio, 0, MOPT_GTE0}, + {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0}, + {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING}, + {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0}, {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, @@ -1411,6 +1446,7 @@ {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, {Opt_max_dir_size_kb, 0, MOPT_GTE0}, + {Opt_test_dummy_encryption, 0, MOPT_GTE0}, {Opt_err, 0, 0} }; @@ -1450,6 +1486,12 @@ case Opt_i_version: sb->s_flags |= MS_I_VERSION; return 1; + case Opt_lazytime: + sb->s_flags |= MS_LAZYTIME; + return 1; + case Opt_nolazytime: + sb->s_flags &= ~MS_LAZYTIME; + return 1; } for (m = ext4_mount_opts; m->token != Opt_err; m++) @@ -1473,12 +1515,18 @@ return -1; } - if (args->from && match_int(args, &arg)) + if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) return -1; if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) return -1; - if (m->flags & MOPT_EXPLICIT) - set_opt2(sb, EXPLICIT_DELALLOC); + if (m->flags & MOPT_EXPLICIT) { + if (m->mount_opt & EXT4_MOUNT_DELALLOC) { + set_opt2(sb, EXPLICIT_DELALLOC); + } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) { + set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM); + } else + return -1; + } if (m->flags & MOPT_CLEAR_ERR) clear_opt(sb, ERRORS_MASK); if (token == Opt_noquota && sb_any_quota_loaded(sb)) { @@ -1535,6 +1583,44 @@ return -1; } *journal_devnum = arg; + } else if (token == Opt_journal_path) { + char *journal_path; + struct inode *journal_inode; + struct path path; + int error; + + if (is_remount) { + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); + return -1; + } + journal_path = match_strdup(&args[0]); + if (!journal_path) { + ext4_msg(sb, KERN_ERR, "error: could not dup " + "journal device string"); + return -1; + } + + error = kern_path(journal_path, LOOKUP_FOLLOW, &path); + if (error) { + ext4_msg(sb, KERN_ERR, "error: could not find " + "journal device path: error %d", error); + kfree(journal_path); + return -1; + } + + journal_inode = d_inode(path.dentry); + if (!S_ISBLK(journal_inode->i_mode)) { + ext4_msg(sb, KERN_ERR, "error: journal path %s " + "is not a block device", journal_path); + path_put(&path); + kfree(journal_path); + return -1; + } + + *journal_devnum = new_encode_dev(journal_inode->i_rdev); + path_put(&path); + kfree(journal_path); } else if (token == Opt_journal_ioprio) { if (arg > 7) { ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" @@ -1543,6 +1629,15 @@ } *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); + } else if (token == Opt_test_dummy_encryption) { +#ifdef CONFIG_EXT4_FS_ENCRYPTION + sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION; + ext4_msg(sb, KERN_WARNING, + "Test dummy encryption mode enabled"); +#else + ext4_msg(sb, KERN_WARNING, + "Test dummy encryption mount option ignored"); +#endif } else if (m->flags & MOPT_DATAJ) { if (is_remount) { if (!sbi->s_journal) @@ -1564,15 +1659,23 @@ "quota options when quota turned on"); return -1; } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_QUOTA)) { - ext4_msg(sb, KERN_ERR, - "Cannot set journaled quota options " + if (ext4_has_feature_quota(sb)) { + ext4_msg(sb, KERN_INFO, + "Quota format mount options ignored " "when QUOTA feature is enabled"); - return -1; + return 1; } sbi->s_jquota_fmt = m->mount_opt; #endif + } else if (token == Opt_dax) { +#ifdef CONFIG_FS_DAX + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= m->mount_opt; +#else + ext4_msg(sb, KERN_INFO, "dax option not supported"); + return -1; +#endif } else { if (!args->from) arg = 1; @@ -1619,13 +1722,13 @@ return 0; } #ifdef CONFIG_QUOTA - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && + if (ext4_has_feature_quota(sb) && (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { - ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " - "feature is enabled"); - return 0; - } - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota " + "mount options ignored."); + clear_opt(sb, USRQUOTA); + clear_opt(sb, GRPQUOTA); + } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sb, USRQUOTA); @@ -1655,6 +1758,12 @@ return 0; } } + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && + test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " + "in data=ordered mode"); + return 0; + } return 1; } @@ -1682,10 +1791,10 @@ } if (sbi->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]); if (sbi->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); + seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]); #endif } @@ -1786,7 +1895,7 @@ return _ext4_show_options(seq, root->d_sb, 0); } -static int options_seq_show(struct seq_file *seq, void *offset) +int ext4_seq_options_show(struct seq_file *seq, void *offset) { struct super_block *sb = seq->private; int rc; @@ -1797,19 +1906,6 @@ return rc; } -static int options_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, options_seq_show, PDE_DATA(inode)); -} - -static const struct file_operations ext4_seq_options_fops = { - .owner = THIS_MODULE, - .open = options_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int read_only) { @@ -1826,7 +1922,7 @@ if (!(sbi->s_mount_state & EXT4_VALID_FS)) ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " "running e2fsck is recommended"); - else if ((sbi->s_mount_state & EXT4_ERROR_FS)) + else if (sbi->s_mount_state & EXT4_ERROR_FS) ext4_msg(sb, KERN_WARNING, "warning: mounting fs with errors, " "running e2fsck is recommended"); @@ -1850,7 +1946,7 @@ es->s_mtime = cpu_to_le32(get_seconds()); ext4_update_dynamic_rev(sb); if (sbi->s_journal) - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_set_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); done: @@ -1892,7 +1988,7 @@ memcpy(new_groups, sbi->s_flex_groups, (sbi->s_flex_groups_allocated * sizeof(struct flex_groups))); - ext4_kvfree(sbi->s_flex_groups); + kvfree(sbi->s_flex_groups); } sbi->s_flex_groups = new_groups; sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); @@ -1904,7 +2000,6 @@ struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; ext4_group_t flex_group; - unsigned int groups_per_flex = 0; int i, err; sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; @@ -1912,7 +2007,6 @@ sbi->s_log_groups_per_flex = 0; return 1; } - groups_per_flex = 1U << sbi->s_log_groups_per_flex; err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); if (err) @@ -1935,15 +2029,15 @@ return 0; } -static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, +static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group, struct ext4_group_desc *gdp) { int offset = offsetof(struct ext4_group_desc, bg_checksum); __u16 crc = 0; __le32 le_group = cpu_to_le32(block_group); + struct ext4_sb_info *sbi = EXT4_SB(sb); - if ((sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { + if (ext4_has_metadata_csum(sbi->s_sb)) { /* Use new metadata_csum algorithm */ __u32 csum32; __u16 dummy_csum = 0; @@ -1963,8 +2057,7 @@ } /* old crc16 code */ - if (!(sbi->s_es->s_feature_ro_compat & - cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM))) + if (!ext4_has_feature_gdt_csum(sb)) return 0; crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); @@ -1972,8 +2065,7 @@ crc = crc16(crc, (__u8 *)gdp, offset); offset += sizeof(gdp->bg_checksum); /* skip checksum */ /* for checksum of struct ext4_group_desc do the rest...*/ - if ((sbi->s_es->s_feature_incompat & - cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && + if (ext4_has_feature_64bit(sb) && offset < le16_to_cpu(sbi->s_es->s_desc_size)) crc = crc16(crc, (__u8 *)gdp + offset, le16_to_cpu(sbi->s_es->s_desc_size) - @@ -1987,8 +2079,7 @@ struct ext4_group_desc *gdp) { if (ext4_has_group_desc_csum(sb) && - (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), - block_group, gdp))) + (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp))) return 0; return 1; @@ -1999,7 +2090,7 @@ { if (!ext4_has_group_desc_csum(sb)) return; - gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); + gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp); } /* Called at mount-time, super-block is locked */ @@ -2016,7 +2107,7 @@ int flexbg_flag = 0; ext4_group_t i, grp = sbi->s_groups_count; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) + if (ext4_has_feature_flex_bg(sb)) flexbg_flag = 1; ext4_debug("Checking group descriptors"); @@ -2075,7 +2166,7 @@ if (!ext4_group_desc_csum_verify(sb, i, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Checksum for group %u failed (%u!=%u)", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + i, le16_to_cpu(ext4_group_desc_csum(sb, i, gdp)), le16_to_cpu(gdp->bg_checksum)); if (!(sb->s_flags & MS_RDONLY)) { ext4_unlock_group(sb, i); @@ -2088,10 +2179,6 @@ } if (NULL != first_not_zeroed) *first_not_zeroed = grp; - - ext4_free_blocks_count_set(sbi->s_es, - EXT4_C2B(sbi, ext4_count_free_clusters(sb))); - sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); return 1; } @@ -2141,7 +2228,7 @@ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { /* don't clear list on RO mount w/ errors */ if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { - jbd_debug(1, "Errors on filesystem, " + ext4_msg(sb, KERN_INFO, "Errors on filesystem, " "clearing orphan list.\n"); es->s_last_orphan = 0; } @@ -2157,7 +2244,7 @@ /* Needed for iput() to work correctly and not trash data */ sb->s_flags |= MS_ACTIVE; /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); if (ret < 0) @@ -2190,19 +2277,22 @@ list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); dquot_initialize(inode); if (inode->i_nlink) { - ext4_msg(sb, KERN_DEBUG, - "%s: truncating inode %lu to %lld bytes", - __func__, inode->i_ino, inode->i_size); + if (test_opt(sb, DEBUG)) + ext4_msg(sb, KERN_DEBUG, + "%s: truncating inode %lu to %lld bytes", + __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); mutex_lock(&inode->i_mutex); + truncate_inode_pages(inode->i_mapping, inode->i_size); ext4_truncate(inode); mutex_unlock(&inode->i_mutex); nr_truncates++; } else { - ext4_msg(sb, KERN_DEBUG, - "%s: deleting unreferenced inode %lu", - __func__, inode->i_ino); + if (test_opt(sb, DEBUG)) + ext4_msg(sb, KERN_DEBUG, + "%s: deleting unreferenced inode %lu", + __func__, inode->i_ino); jbd_debug(2, "deleting unreferenced inode %lu\n", inode->i_ino); nr_orphans++; @@ -2220,7 +2310,7 @@ PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA /* Turn quotas off */ - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { if (sb_dqopt(sb)->files[i]) dquot_quota_off(sb, i); } @@ -2349,13 +2439,22 @@ first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || - nr < first_meta_bg) + if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg) return logical_sb_block + nr + 1; bg = sbi->s_desc_per_block * nr; if (ext4_bg_has_super(sb, bg)) has_super = 1; + /* + * If we have a meta_bg fs with 1k blocks, group 0's GDT is at + * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled + * on modern mke2fs or blksize > 1k on older mke2fs) then we must + * compensate. + */ + if (sb->s_blocksize == 1024 && nr == 0 && + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) + has_super++; + return (has_super + ext4_group_first_block_no(sb, bg)); } @@ -2396,255 +2495,6 @@ return ret; } -/* sysfs supprt */ - -struct ext4_attr { - struct attribute attr; - ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); - ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, - const char *, size_t); - int offset; -}; - -static int parse_strtoull(const char *buf, - unsigned long long max, unsigned long long *value) -{ - int ret; - - ret = kstrtoull(skip_spaces(buf), 0, value); - if (!ret && *value > max) - ret = -EINVAL; - return ret; -} - -static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (s64) EXT4_C2B(sbi, - percpu_counter_sum(&sbi->s_dirtyclusters_counter))); -} - -static ssize_t session_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->s_buddy_cache->i_sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - return snprintf(buf, PAGE_SIZE, "%lu\n", - (part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - sbi->s_sectors_written_start) >> 1); -} - -static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - struct super_block *sb = sbi->s_buddy_cache->i_sb; - - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1))); -} - -static ssize_t inode_readahead_blks_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned long t; - int ret; - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret) - return ret; - - if (t && (!is_power_of_2(t) || t > 0x40000000)) - return -EINVAL; - - sbi->s_inode_readahead_blks = t; - return count; -} - -static ssize_t sbi_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); - - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); -} - -static ssize_t sbi_ui_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); - unsigned long t; - int ret; - - ret = kstrtoul(skip_spaces(buf), 0, &t); - if (ret) - return ret; - *ui = t; - return count; -} - -static ssize_t reserved_clusters_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); -} - -static ssize_t reserved_clusters_store(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - unsigned long long val; - int ret; - - if (parse_strtoull(buf, -1ULL, &val)) - return -EINVAL; - ret = ext4_reserve_clusters(sbi, val); - - return ret ? ret : count; -} - -static ssize_t trigger_test_error(struct ext4_attr *a, - struct ext4_sb_info *sbi, - const char *buf, size_t count) -{ - int len = count; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (len && buf[len-1] == '\n') - len--; - - if (len) - ext4_error(sbi->s_sb, "%.*s", len, buf); - return count; -} - -#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ -static struct ext4_attr ext4_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ - .offset = offsetof(struct ext4_sb_info, _elname), \ -} -#define EXT4_ATTR(name, mode, show, store) \ -static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) - -#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) -#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) -#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) -#define EXT4_RW_ATTR_SBI_UI(name, elname) \ - EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) -#define ATTR_LIST(name) &ext4_attr_##name.attr - -EXT4_RO_ATTR(delayed_allocation_blocks); -EXT4_RO_ATTR(session_write_kbytes); -EXT4_RO_ATTR(lifetime_write_kbytes); -EXT4_RW_ATTR(reserved_clusters); -EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, - inode_readahead_blks_store, s_inode_readahead_blks); -EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); -EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); -EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); -EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); -EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); -EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); -EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); -EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); -EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); - -static struct attribute *ext4_attrs[] = { - ATTR_LIST(delayed_allocation_blocks), - ATTR_LIST(session_write_kbytes), - ATTR_LIST(lifetime_write_kbytes), - ATTR_LIST(reserved_clusters), - ATTR_LIST(inode_readahead_blks), - ATTR_LIST(inode_goal), - ATTR_LIST(mb_stats), - ATTR_LIST(mb_max_to_scan), - ATTR_LIST(mb_min_to_scan), - ATTR_LIST(mb_order2_req), - ATTR_LIST(mb_stream_req), - ATTR_LIST(mb_group_prealloc), - ATTR_LIST(max_writeback_mb_bump), - ATTR_LIST(extent_max_zeroout_kb), - ATTR_LIST(trigger_fs_error), - NULL, -}; - -/* Features this copy of ext4 supports */ -EXT4_INFO_ATTR(lazy_itable_init); -EXT4_INFO_ATTR(batched_discard); -EXT4_INFO_ATTR(meta_bg_resize); - -static struct attribute *ext4_feat_attrs[] = { - ATTR_LIST(lazy_itable_init), - ATTR_LIST(batched_discard), - ATTR_LIST(meta_bg_resize), - NULL, -}; - -static ssize_t ext4_attr_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); - - return a->show ? a->show(a, sbi, buf) : 0; -} - -static ssize_t ext4_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *buf, size_t len) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); - - return a->store ? a->store(a, sbi, buf, len) : 0; -} - -static void ext4_sb_release(struct kobject *kobj) -{ - struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info, - s_kobj); - complete(&sbi->s_kobj_unregister); -} - -static const struct sysfs_ops ext4_attr_ops = { - .show = ext4_attr_show, - .store = ext4_attr_store, -}; - -static struct kobj_type ext4_ktype = { - .default_attrs = ext4_attrs, - .sysfs_ops = &ext4_attr_ops, - .release = ext4_sb_release, -}; - -static void ext4_feat_release(struct kobject *kobj) -{ - complete(&ext4_feat->f_kobj_unregister); -} - -static struct kobj_type ext4_feat_ktype = { - .default_attrs = ext4_feat_attrs, - .sysfs_ops = &ext4_attr_ops, - .release = ext4_feat_release, -}; - /* * Check whether this filesystem can be mounted based on * the features present and the RDONLY/RDWR mount requested. @@ -2653,7 +2503,7 @@ */ static int ext4_feature_set_ok(struct super_block *sb, int readonly) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { + if (ext4_has_unknown_ext4_incompat_features(sb)) { ext4_msg(sb, KERN_ERR, "Couldn't mount because of " "unsupported optional features (%x)", @@ -2665,8 +2515,14 @@ if (readonly) return 1; + if (ext4_has_feature_readonly(sb)) { + ext4_msg(sb, KERN_INFO, "filesystem is read-only"); + sb->s_flags |= MS_RDONLY; + return 1; + } + /* Check that feature set is OK for a read-write mount */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { + if (ext4_has_unknown_ext4_ro_compat_features(sb)) { ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " "unsupported optional features (%x)", (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & @@ -2677,7 +2533,7 @@ * Large file size enabled file system can only be mounted * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (ext4_has_feature_huge_file(sb)) { if (sizeof(blkcnt_t) < sizeof(u64)) { ext4_msg(sb, KERN_ERR, "Filesystem with huge files " "cannot be mounted RDWR without " @@ -2685,8 +2541,7 @@ return 0; } } - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && - !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) { ext4_msg(sb, KERN_ERR, "Can't support bigalloc feature without " "extents feature\n"); @@ -2694,8 +2549,7 @@ } #ifndef CONFIG_QUOTA - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && - !readonly) { + if (ext4_has_feature_quota(sb) && !readonly) { ext4_msg(sb, KERN_ERR, "Filesystem with quota feature cannot be mounted RDWR " "without CONFIG_QUOTA"); @@ -2960,13 +2814,22 @@ ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *gdp = NULL; + if (!ext4_has_group_desc_csum(sb)) + return ngroups; + for (group = 0; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) continue; - if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) + continue; + if (group != 0) break; + ext4_error(sb, "Inode table for bg 0 marked as " + "needing zeroing"); + if (sb->s_flags & MS_RDONLY) + return ngroups; } return group; @@ -2995,7 +2858,6 @@ { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr; - unsigned long rnd; elr = kzalloc(sizeof(*elr), GFP_KERNEL); if (!elr) @@ -3010,10 +2872,8 @@ * spread the inode table initialization requests * better. */ - get_random_bytes(&rnd, sizeof(rnd)); - elr->lr_next_sched = jiffies + (unsigned long)rnd % - (EXT4_DEF_LI_MAX_START_DELAY * HZ); - + elr->lr_next_sched = jiffies + (prandom_u32() % + (EXT4_DEF_LI_MAX_START_DELAY * HZ)); return elr; } @@ -3098,17 +2958,20 @@ int compat, incompat; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { - /* journal checksum v2 */ + if (ext4_has_metadata_csum(sb)) { + /* journal checksum v3 */ compat = 0; - incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; } else { /* journal checksum v1 */ compat = JBD2_FEATURE_COMPAT_CHECKSUM; incompat = 0; } + jbd2_journal_clear_features(sbi->s_journal, + JBD2_FEATURE_COMPAT_CHECKSUM, 0, + JBD2_FEATURE_INCOMPAT_CSUM_V3 | + JBD2_FEATURE_INCOMPAT_CSUM_V2); if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ret = jbd2_journal_set_features(sbi->s_journal, compat, 0, @@ -3121,10 +2984,8 @@ jbd2_journal_clear_features(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); } else { - jbd2_journal_clear_features(sbi->s_journal, - JBD2_FEATURE_COMPAT_CHECKSUM, 0, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | - JBD2_FEATURE_INCOMPAT_CSUM_V2); + jbd2_journal_clear_features(sbi->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); } return ret; @@ -3154,7 +3015,7 @@ ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) + if (!ext4_has_feature_bigalloc(sb)) return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + sbi->s_itb_per_group + 2); @@ -3212,7 +3073,7 @@ struct ext4_super_block *es = sbi->s_es; ext4_group_t i, ngroups = ext4_get_groups_count(sb); ext4_fsblk_t overhead = 0; - char *buf = (char *) get_zeroed_page(GFP_KERNEL); + char *buf = (char *) get_zeroed_page(GFP_NOFS); if (!buf) return -ENOMEM; @@ -3240,8 +3101,8 @@ memset(buf, 0, PAGE_SIZE); cond_resched(); } - /* Add the journal blocks as well */ - if (sbi->s_journal) + /* Add the internal journal blocks as well */ + if (sbi->s_journal && !sbi->journal_bdev) overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); sbi->s_overhead = overhead; @@ -3250,10 +3111,10 @@ return 0; } - -static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) +static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; + struct ext4_sb_info *sbi = EXT4_SB(sb); /* * There's no need to reserve anything when we aren't using extents. @@ -3261,36 +3122,23 @@ * hole punching doesn't need new metadata... This is needed especially * to keep ext2/3 backward compatibility. */ - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) - return 0; + if (!ext4_has_feature_extents(sb)) + return; /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run * out of space like for example punch hole, or converting - * uninitialized extents in delalloc path. In most cases such + * unwritten extents in delalloc path. In most cases such * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ - resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> - EXT4_SB(sb)->s_cluster_bits; + resv_clusters = (ext4_blocks_count(sbi->s_es) >> + sbi->s_cluster_bits); do_div(resv_clusters, 50); resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); - return resv_clusters; -} - - -static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) -{ - ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> - sbi->s_cluster_bits; - - if (count >= clusters) - return -EINVAL; - - atomic64_set(&sbi->s_resv_clusters, count); - return 0; + atomic64_set(&sbi->s_resv_clusters, resv_clusters); } static int ext4_fill_super(struct super_block *sb, void *data, int silent) @@ -3306,7 +3154,6 @@ unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; - char *cp; const char *descr; int ret = -ENOMEM; int blocksize, clustersize; @@ -3335,8 +3182,7 @@ part_stat_read(sb->s_bdev->bd_part, sectors[1]); /* Cleanup superblock name */ - for (cp = sb->s_id; (cp = strchr(cp, '/'));) - *cp = '!'; + strreplace(sb->s_id, '/', '!'); /* -EINVAL is default */ ret = -EINVAL; @@ -3357,7 +3203,7 @@ logical_sb_block = sb_block; } - if (!(bh = sb_bread(sb, logical_sb_block))) { + if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) { ext4_msg(sb, KERN_ERR, "unable to read superblock"); goto out_fail; } @@ -3373,10 +3219,9 @@ sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); /* Warn if metadata_csum and gdt_csum are both set. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && - EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) - ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " + if (ext4_has_feature_metadata_csum(sb) && + ext4_has_feature_gdt_csum(sb)) + ext4_warning(sb, "metadata_csum and uninit_bg are " "redundant flags; please run fsck."); /* Check for a known checksum algorithm */ @@ -3388,8 +3233,7 @@ } /* Load the checksum driver */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_feature_metadata_csum(sb)) { sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); @@ -3404,12 +3248,14 @@ ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "invalid superblock checksum. Run e2fsck?"); silent = 1; + ret = -EFSBADCRC; goto cantfind_ext4; } /* Precompute checksum seed for all metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (ext4_has_feature_csum_seed(sb)) + sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); + else if (ext4_has_metadata_csum(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); @@ -3427,6 +3273,10 @@ #ifdef CONFIG_EXT4_FS_POSIX_ACL set_opt(sb, POSIX_ACL); #endif + /* don't forget to enable journal_csum when metadata_csum is enabled. */ + if (ext4_has_metadata_csum(sb)) + set_opt(sb, JOURNAL_CHECKSUM); + if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) set_opt(sb, JOURNAL_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) @@ -3440,8 +3290,8 @@ set_opt(sb, ERRORS_CONT); else set_opt(sb, ERRORS_RO); - if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) - set_opt(sb, BLOCK_VALIDITY); + /* block_validity enabled by default; disable with noblock_validity */ + set_opt(sb, BLOCK_VALIDITY); if (def_mount_opts & EXT4_DEFM_DISCARD) set_opt(sb, DISCARD); @@ -3501,21 +3351,42 @@ "both data=journal and dioread_nolock"); goto failed_mount; } + if (test_opt(sb, DAX)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dax"); + goto failed_mount; + } + if (ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_WARNING, + "encrypted files will use data=ordered " + "instead of data journaling mode"); + } if (test_opt(sb, DELALLOC)) clear_opt(sb, DELALLOC); + } else { + sb->s_iflags |= SB_I_CGROUPWB; } sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && - (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || - EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) + (ext4_has_compat_features(sb) || + ext4_has_ro_compat_features(sb) || + ext4_has_incompat_features(sb))) ext4_msg(sb, KERN_WARNING, "feature flags set on rev 0 fs, " "running e2fsck is recommended"); + if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { + set_opt2(sb, HURD_COMPAT); + if (ext4_has_feature_64bit(sb)) { + ext4_msg(sb, KERN_ERR, + "The Hurd can't support 64-bit file systems"); + goto failed_mount; + } + } + if (IS_EXT2_SB(sb)) { if (ext2_feature_set_ok(sb)) ext4_msg(sb, KERN_INFO, "mounting ext2 file system " @@ -3562,6 +3433,32 @@ goto failed_mount; } + if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { + ext4_msg(sb, KERN_ERR, + "Number of reserved GDT blocks insanely large: %d", + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); + goto failed_mount; + } + + if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { + if (blocksize != PAGE_SIZE) { + ext4_msg(sb, KERN_ERR, + "error: unsupported blocksize for dax"); + goto failed_mount; + } + if (!sb->s_bdev->bd_disk->fops->direct_access) { + ext4_msg(sb, KERN_ERR, + "error: device does not support dax"); + goto failed_mount; + } + } + + if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { + ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d", + es->s_encryption_level); + goto failed_mount; + } + if (sb->s_blocksize != blocksize) { /* Validate the filesystem blocksize */ if (!sb_set_blocksize(sb, blocksize)) { @@ -3573,7 +3470,7 @@ brelse(bh); logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); - bh = sb_bread(sb, logical_sb_block); + bh = sb_bread_unmovable(sb, logical_sb_block); if (!bh) { ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try"); @@ -3588,8 +3485,7 @@ } } - has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); + has_huge_files = ext4_has_feature_huge_file(sb); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); @@ -3600,6 +3496,11 @@ } else { sbi->s_inode_size = le16_to_cpu(es->s_inode_size); sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + goto failed_mount; + } if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { @@ -3613,7 +3514,7 @@ } sbi->s_desc_size = le16_to_cpu(es->s_desc_size); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { + if (ext4_has_feature_64bit(sb)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || sbi->s_desc_size > EXT4_MAX_DESC_SIZE || !is_power_of_2(sbi->s_desc_size)) { @@ -3648,7 +3549,7 @@ for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + if (ext4_has_feature_dir_index(sb)) { i = le32_to_cpu(es->s_flags); if (i & EXT2_FLAGS_UNSIGNED_HASH) sbi->s_hash_unsigned = 3; @@ -3668,8 +3569,7 @@ /* Handle clustersize */ clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); - has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC); + has_bigalloc = ext4_has_feature_bigalloc(sb); if (has_bigalloc) { if (clustersize < blocksize) { ext4_msg(sb, KERN_ERR, @@ -3779,7 +3679,7 @@ (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG)) { + if (ext4_has_feature_meta_bg(sb)) { if (le32_to_cpu(es->s_first_meta_bg) > db_count) { ext4_msg(sb, KERN_WARNING, "first meta block group too large: %u " @@ -3797,18 +3697,11 @@ goto failed_mount; } - if (ext4_proc_root) - sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); - - if (sbi->s_proc) - proc_create_data("options", S_IRUGO, sbi->s_proc, - &ext4_seq_options_fops, sb); - bgl_lock_init(sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread(sb, block); + sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); if (!sbi->s_group_desc[i]) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); @@ -3818,68 +3711,37 @@ } if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); + ret = -EFSCORRUPTED; goto failed_mount2; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) - if (!ext4_fill_flex_info(sb)) { - ext4_msg(sb, KERN_ERR, - "unable to initialize " - "flex_bg meta info!"); - goto failed_mount2; - } sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); - init_timer(&sbi->s_err_report); - sbi->s_err_report.function = print_daily_error_info; - sbi->s_err_report.data = (unsigned long) sb; + setup_timer(&sbi->s_err_report, print_daily_error_info, + (unsigned long) sb); /* Register extent status tree shrinker */ - ext4_es_register_shrinker(sb); - - err = percpu_counter_init(&sbi->s_freeclusters_counter, - ext4_count_free_clusters(sb)); - if (!err) { - err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext4_count_free_inodes(sb)); - } - if (!err) { - err = percpu_counter_init(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); - } - if (!err) { - err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); - } - if (!err) { - err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0); - } - if (err) { - ext4_msg(sb, KERN_ERR, "insufficient memory"); + if (ext4_es_register_shrinker(sbi)) goto failed_mount3; - } sbi->s_stripe = ext4_get_stripe_size(sbi); - sbi->s_max_writeback_mb_bump = 128; sbi->s_extent_max_zeroout_kb = 32; /* * set up enough so that it can read an inode */ - if (!test_opt(sb, NOLOAD) && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) - sb->s_op = &ext4_sops; - else - sb->s_op = &ext4_nojournal_sops; + sb->s_op = &ext4_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA sb->dq_op = &ext4_quota_operations; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) - sb->s_qcop = &ext4_qctl_sysfile_operations; + if (ext4_has_feature_quota(sb)) + sb->s_qcop = &dquot_quotactl_sysfile_ops; else sb->s_qcop = &ext4_qctl_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); @@ -3889,36 +3751,58 @@ sb->s_root = NULL; needs_recovery = (es->s_last_orphan != 0 || - EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_RECOVER)); + ext4_has_feature_journal_needs_recovery(sb)); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && - !(sb->s_flags & MS_RDONLY)) + if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) - goto failed_mount3; + goto failed_mount3a; /* * The first inode we look at is the journal inode. Don't try * root first: it may be modified in the journal! */ - if (!test_opt(sb, NOLOAD) && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { err = ext4_load_journal(sb, es, journal_devnum); if (err) - goto failed_mount3; + goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && - EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); goto failed_mount_wq; } else { + /* Nojournal mode, all journal mount options are illegal */ + if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_checksum, fs mounted w/o journal"); + goto failed_mount_wq; + } + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_async_commit, fs mounted w/o journal"); + goto failed_mount_wq; + } + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "commit=%lu, fs mounted w/o journal", + sbi->s_commit_interval / HZ); + goto failed_mount_wq; + } + if (EXT4_MOUNT_DATA_FLAGS & + (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "data=, fs mounted w/o journal"); + goto failed_mount_wq; + } + sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM; + clear_opt(sb, JOURNAL_CHECKSUM); clear_opt(sb, DATA_FLAGS); sbi->s_journal = NULL; needs_recovery = 0; goto no_journal; } - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) && + if (ext4_has_feature_64bit(sb) && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); @@ -3961,19 +3845,28 @@ sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; - /* - * The journal may have updated the bg summary counts, so we - * need to update the global counters. - */ - percpu_counter_set(&sbi->s_freeclusters_counter, - ext4_count_free_clusters(sb)); - percpu_counter_set(&sbi->s_freeinodes_counter, - ext4_count_free_inodes(sb)); - percpu_counter_set(&sbi->s_dirs_counter, - ext4_count_dirs(sb)); - percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); - no_journal: + if (ext4_mballoc_ready) { + sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); + if (!sbi->s_mb_cache) { + ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); + goto failed_mount_wq; + } + } + + if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && + (blocksize != PAGE_CACHE_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Unsupported blocksize for fs encryption"); + goto failed_mount_wq; + } + + if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) && + !ext4_has_feature_encrypt(sb)) { + ext4_set_feature_encrypt(sb); + ext4_commit_super(sb, 1); + } + /* * Get the # of file system overhead blocks from the * superblock if present. @@ -3990,12 +3883,12 @@ * The maximum number of concurrent works can be high and * concurrency isn't really necessary. Limit it to 1. */ - EXT4_SB(sb)->dio_unwritten_wq = - alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); - if (!EXT4_SB(sb)->dio_unwritten_wq) { - printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); + EXT4_SB(sb)->rsv_conversion_wq = + alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + if (!EXT4_SB(sb)->rsv_conversion_wq) { + printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); ret = -ENOMEM; - goto failed_mount_wq; + goto failed_mount4; } /* @@ -4029,8 +3922,7 @@ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { + if (ext4_has_feature_extra_isize(sb)) { if (sbi->s_want_extra_isize < le16_to_cpu(es->s_want_extra_isize)) sbi->s_want_extra_isize = @@ -4050,12 +3942,7 @@ "available"); } - err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); - if (err) { - ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " - "reserved pool", ext4_calculate_resv_clusters(sb)); - goto failed_mount4a; - } + ext4_set_resv_clusters(sb); err = ext4_setup_system_zone(sb); if (err) { @@ -4072,21 +3959,47 @@ goto failed_mount5; } + block = ext4_count_free_clusters(sb); + ext4_free_blocks_count_set(sbi->s_es, + EXT4_C2B(sbi, block)); + err = percpu_counter_init(&sbi->s_freeclusters_counter, block, + GFP_KERNEL); + if (!err) { + unsigned long freei = ext4_count_free_inodes(sb); + sbi->s_es->s_free_inodes_count = cpu_to_le32(freei); + err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, + GFP_KERNEL); + } + if (!err) + err = percpu_counter_init(&sbi->s_dirs_counter, + ext4_count_dirs(sb), GFP_KERNEL); + if (!err) + err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, + GFP_KERNEL); + if (err) { + ext4_msg(sb, KERN_ERR, "insufficient memory"); + goto failed_mount6; + } + + if (ext4_has_feature_flex_bg(sb)) + if (!ext4_fill_flex_info(sb)) { + ext4_msg(sb, KERN_ERR, + "unable to initialize " + "flex_bg meta info!"); + goto failed_mount6; + } + err = ext4_register_li_request(sb, first_not_zeroed); if (err) goto failed_mount6; - sbi->s_kobj.kset = ext4_kset; - init_completion(&sbi->s_kobj_unregister); - err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL, - "%s", sb->s_id); + err = ext4_register_sysfs(sb); if (err) goto failed_mount7; #ifdef CONFIG_QUOTA /* Enable quota usage during mount. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && - !(sb->s_flags & MS_RDONLY)) { + if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) { err = ext4_enable_quotas(sb); if (err) goto failed_mount8; @@ -4118,15 +4031,21 @@ "the device does not support discard"); } - ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " - "Opts: %.*s%s%s", descr, - (int) sizeof(sbi->s_es->s_mount_opts), - sbi->s_es->s_mount_opts, - *sbi->s_es->s_mount_opts ? "; " : "", orig_data); + if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount")) + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " + "Opts: %.*s%s%s", descr, + (int) sizeof(sbi->s_es->s_mount_opts), + sbi->s_es->s_mount_opts, + *sbi->s_es->s_mount_opts ? "; " : "", orig_data); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ + /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ + ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); + ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); + ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); + kfree(orig_data); return 0; @@ -4137,12 +4056,18 @@ #ifdef CONFIG_QUOTA failed_mount8: - kobject_del(&sbi->s_kobj); + ext4_unregister_sysfs(sb); #endif failed_mount7: ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); + if (sbi->s_flex_groups) + kvfree(sbi->s_flex_groups); + percpu_counter_destroy(&sbi->s_freeclusters_counter); + percpu_counter_destroy(&sbi->s_freeinodes_counter); + percpu_counter_destroy(&sbi->s_dirs_counter); + percpu_counter_destroy(&sbi->s_dirtyclusters_counter); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); @@ -4151,37 +4076,28 @@ sb->s_root = NULL; failed_mount4: ext4_msg(sb, KERN_ERR, "mount failed"); - destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); + if (EXT4_SB(sb)->rsv_conversion_wq) + destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); failed_mount_wq: if (sbi->s_journal) { jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } +failed_mount3a: + ext4_es_unregister_shrinker(sbi); failed_mount3: - ext4_es_unregister_shrinker(sb); - del_timer(&sbi->s_err_report); - if (sbi->s_flex_groups) - ext4_kvfree(sbi->s_flex_groups); - percpu_counter_destroy(&sbi->s_freeclusters_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); - percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_counter_destroy(&sbi->s_extent_cache_cnt); + del_timer_sync(&sbi->s_err_report); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); - ext4_kvfree(sbi->s_group_desc); + kvfree(sbi->s_group_desc); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); - if (sbi->s_proc) { - remove_proc_entry("options", sbi->s_proc); - remove_proc_entry(sb->s_id, ext4_proc_root); - } #ifdef CONFIG_QUOTA - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); #endif ext4_blkdev_remove(sbi); @@ -4226,7 +4142,7 @@ struct inode *journal_inode; journal_t *journal; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); /* First, test for the existence of a valid inode on disk. Bad * things happen if we iget() an unused inode, as the subsequent @@ -4276,7 +4192,7 @@ struct ext4_super_block *es; struct block_device *bdev; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) @@ -4309,6 +4225,15 @@ goto out_bdev; } + if ((le32_to_cpu(es->s_feature_ro_compat) & + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && + es->s_checksum != ext4_superblock_csum(sb, es)) { + ext4_msg(sb, KERN_ERR, "external journal has " + "corrupt superblock"); + brelse(bh); + goto out_bdev; + } + if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { ext4_msg(sb, KERN_ERR, "journal UUID does not match"); brelse(bh); @@ -4359,7 +4284,7 @@ int err = 0; int really_read_only; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { @@ -4376,7 +4301,7 @@ * crash? For recovery, we need to check in advance whether we * can get read-write access to the device. */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + if (ext4_has_feature_journal_needs_recovery(sb)) { if (sb->s_flags & MS_RDONLY) { ext4_msg(sb, KERN_INFO, "INFO: recovery " "required on readonly filesystem"); @@ -4407,7 +4332,7 @@ if (!(journal->j_flags & JBD2_BARRIER)) ext4_msg(sb, KERN_INFO, "barriers disabled"); - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) + if (!ext4_has_feature_journal_needs_recovery(sb)) err = jbd2_journal_wipe(journal, !really_read_only); if (!err) { char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL); @@ -4483,17 +4408,20 @@ else es->s_kbytes_written = cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); - ext4_free_blocks_count_set(es, + if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) + ext4_free_blocks_count_set(es, EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeclusters_counter))); - es->s_free_inodes_count = - cpu_to_le32(percpu_counter_sum_positive( + if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) + es->s_free_inodes_count = + cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); mark_buffer_dirty(sbh); if (sync) { - error = sync_dirty_buffer(sbh); + error = __sync_dirty_buffer(sbh, + test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); if (error) return error; @@ -4518,7 +4446,7 @@ { journal_t *journal = EXT4_SB(sb)->s_journal; - if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + if (!ext4_has_feature_journal(sb)) { BUG_ON(journal != NULL); return; } @@ -4526,9 +4454,9 @@ if (jbd2_journal_flush(journal) < 0) goto out; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && + if (ext4_has_feature_journal_needs_recovery(sb) && sb->s_flags & MS_RDONLY) { - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_clear_feature_journal_needs_recovery(sb); ext4_commit_super(sb, 1); } @@ -4548,7 +4476,7 @@ int j_errno; const char *errstr; - BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + BUG_ON(!ext4_has_feature_journal(sb)); journal = EXT4_SB(sb)->s_journal; @@ -4594,19 +4522,41 @@ { int ret = 0; tid_t target; + bool needs_barrier = false; struct ext4_sb_info *sbi = EXT4_SB(sb); trace_ext4_sync_fs(sb, wait); - flush_workqueue(sbi->dio_unwritten_wq); + flush_workqueue(sbi->rsv_conversion_wq); /* * Writeback quota in non-journalled quota case - journalled quota has * no dirty dquots */ dquot_writeback_dquots(sb, -1); - if (jbd2_journal_start_commit(sbi->s_journal, &target)) { - if (wait) - jbd2_log_wait_commit(sbi->s_journal, target); + /* + * Data writeback is possible w/o journal transaction, so barrier must + * being sent at the end of the function. But we can skip it if + * transaction_commit will do it for us. + */ + if (sbi->s_journal) { + target = jbd2_get_latest_transaction(sbi->s_journal); + if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && + !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) + needs_barrier = true; + + if (jbd2_journal_start_commit(sbi->s_journal, &target)) { + if (wait) + ret = jbd2_log_wait_commit(sbi->s_journal, + target); + } + } else if (wait && test_opt(sb, BARRIER)) + needs_barrier = true; + if (needs_barrier) { + int err; + err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); + if (!ret) + ret = err; } + return ret; } @@ -4628,23 +4578,27 @@ journal = EXT4_SB(sb)->s_journal; - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); + if (journal) { + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); - /* - * Don't clear the needs_recovery flag if we failed to flush - * the journal. - */ - error = jbd2_journal_flush(journal); - if (error < 0) - goto out; + /* + * Don't clear the needs_recovery flag if we failed to + * flush the journal. + */ + error = jbd2_journal_flush(journal); + if (error < 0) + goto out; + + /* Journal blocked and flushed, clear needs_recovery flag. */ + ext4_clear_feature_journal_needs_recovery(sb); + } - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); error = ext4_commit_super(sb, 1); out: - /* we rely on upper layer to stop further updates */ - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + if (journal) + /* we rely on upper layer to stop further updates */ + jbd2_journal_unlock_updates(journal); return error; } @@ -4657,8 +4611,11 @@ if (sb->s_flags & MS_RDONLY) return 0; - /* Reset the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + if (EXT4_SB(sb)->s_journal) { + /* Reset the needs_recovery flag before the fs is unlocked. */ + ext4_set_feature_journal_needs_recovery(sb); + } + ext4_commit_super(sb, 1); return 0; } @@ -4675,7 +4632,7 @@ u32 s_min_batch_time, s_max_batch_time; #ifdef CONFIG_QUOTA int s_jquota_fmt; - char *s_qf_names[MAXQUOTAS]; + char *s_qf_names[EXT4_MAXQUOTAS]; #endif }; @@ -4705,7 +4662,7 @@ old_opts.s_max_batch_time = sbi->s_max_batch_time; #ifdef CONFIG_QUOTA old_opts.s_jquota_fmt = sbi->s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) if (sbi->s_qf_names[i]) { old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], GFP_KERNEL); @@ -4721,14 +4678,18 @@ if (sbi->s_journal && sbi->s_journal->j_task->io_context) journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; - /* - * Allow the "check" option to be passed as a remount option. - */ if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) { err = -EINVAL; goto restore_opts; } + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ + test_opt(sb, JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "changing journal_checksum " + "during remount not supported; ignoring"); + sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM; + } + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { if (test_opt2(sb, EXPLICIT_DELALLOC)) { ext4_msg(sb, KERN_ERR, "can't mount with " @@ -4742,6 +4703,18 @@ err = -EINVAL; goto restore_opts; } + if (test_opt(sb, DAX)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dax"); + err = -EINVAL; + goto restore_opts; + } + } + + if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { + ext4_msg(sb, KERN_WARNING, "warning: refusing change of " + "dax flag with busy inodes while remounting"); + sbi->s_mount_opt ^= EXT4_MOUNT_DAX; } if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) @@ -4757,6 +4730,9 @@ set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); } + if (*flags & MS_LAZYTIME) + sb->s_flags |= MS_LAZYTIME; + if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) { err = -EROFS; @@ -4764,6 +4740,9 @@ } if (*flags & MS_RDONLY) { + err = sync_filesystem(sb); + if (err < 0) + goto restore_opts; err = dquot_suspend(sb, -1); if (err < 0) goto restore_opts; @@ -4787,7 +4766,8 @@ ext4_mark_recovery_complete(sb, es); } else { /* Make sure we can mount this feature set readwrite */ - if (!ext4_feature_set_ok(sb, 0)) { + if (ext4_has_feature_readonly(sb) || + !ext4_feature_set_ok(sb, 0)) { err = -EROFS; goto restore_opts; } @@ -4802,9 +4782,9 @@ if (!ext4_group_desc_csum_verify(sb, g, gdp)) { ext4_msg(sb, KERN_ERR, "ext4_remount: Checksum for group %u failed (%u!=%u)", - g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), + g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)), le16_to_cpu(gdp->bg_checksum)); - err = -EINVAL; + err = -EFSBADCRC; goto restore_opts; } } @@ -4834,8 +4814,7 @@ sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_MMP)) + if (ext4_has_feature_mmp(sb)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) { err = -EROFS; @@ -4863,13 +4842,12 @@ #ifdef CONFIG_QUOTA /* Release old quota file names */ - for (i = 0; i < MAXQUOTAS; i++) + for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(old_opts.s_qf_names[i]); if (enable_quota) { if (sb_any_quota_suspended(sb)) dquot_resume(sb, -1); - else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_QUOTA)) { + else if (ext4_has_feature_quota(sb)) { err = ext4_enable_quotas(sb); if (err) goto restore_opts; @@ -4877,6 +4855,7 @@ } #endif + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); return 0; @@ -4892,7 +4871,7 @@ sbi->s_max_batch_time = old_opts.s_max_batch_time; #ifdef CONFIG_QUOTA sbi->s_jquota_fmt = old_opts.s_jquota_fmt; - for (i = 0; i < MAXQUOTAS; i++) { + for (i = 0; i < EXT4_MAXQUOTAS; i++) { kfree(sbi->s_qf_names[i]); sbi->s_qf_names[i] = old_opts.s_qf_names[i]; } @@ -5012,7 +4991,7 @@ struct ext4_sb_info *sbi = EXT4_SB(sb); /* Are we journaling quotas? */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || + if (ext4_has_feature_quota(sb) || sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); return ext4_write_dquot(dquot); @@ -5027,7 +5006,7 @@ handle_t *handle; /* Data block + inode block */ - handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2); + handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit_info(sb, type); @@ -5089,7 +5068,7 @@ * all updates to the file when we bypass pagecache... */ if (EXT4_SB(sb)->s_journal && - ext4_should_journal_data(path->dentry->d_inode)) { + ext4_should_journal_data(d_inode(path->dentry))) { /* * We don't need to lock updates but journal_flush() could * otherwise be livelocked... @@ -5113,12 +5092,12 @@ { int err; struct inode *qf_inode; - unsigned long qf_inums[MAXQUOTAS] = { + unsigned long qf_inums[EXT4_MAXQUOTAS] = { le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; - BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); + BUG_ON(!ext4_has_feature_quota(sb)); if (!qf_inums[type]) return -EPERM; @@ -5144,13 +5123,13 @@ static int ext4_enable_quotas(struct super_block *sb) { int type, err = 0; - unsigned long qf_inums[MAXQUOTAS] = { + unsigned long qf_inums[EXT4_MAXQUOTAS] = { le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) }; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; - for (type = 0; type < MAXQUOTAS; type++) { + for (type = 0; type < EXT4_MAXQUOTAS; type++) { if (qf_inums[type]) { err = ext4_quota_enable(sb, type, QFMT_VFS_V1, DQUOT_USAGE_ENABLED); @@ -5166,21 +5145,6 @@ return 0; } -/* - * quota_on function that is used when QUOTA feature is set. - */ -static int ext4_quota_on_sysfile(struct super_block *sb, int type, - int format_id) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) - return -EINVAL; - - /* - * USAGE was enabled at mount time. Only need to enable LIMITS now. - */ - return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED); -} - static int ext4_quota_off(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; @@ -5207,18 +5171,6 @@ return dquot_quota_off(sb, type); } -/* - * quota_off function that is used when QUOTA feature is set. - */ -static int ext4_quota_off_sysfile(struct super_block *sb, int type) -{ - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) - return -EINVAL; - - /* Disable only the limits. */ - return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); -} - /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code * itself serializes the operations (and no one else should touch the files) @@ -5228,7 +5180,6 @@ { struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; int offset = off & (sb->s_blocksize - 1); int tocopy; size_t toread; @@ -5243,9 +5194,9 @@ while (toread > 0) { tocopy = sb->s_blocksize - offset < toread ? sb->s_blocksize - offset : toread; - bh = ext4_bread(NULL, inode, blk, 0, &err); - if (err) - return err; + bh = ext4_bread(NULL, inode, blk, 0); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) /* A hole? */ memset(data, 0, tocopy); else @@ -5266,8 +5217,8 @@ { struct inode *inode = sb_dqopt(sb)->files[type]; ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); - int err = 0; - int offset = off & (sb->s_blocksize - 1); + int err, offset = off & (sb->s_blocksize - 1); + int retries = 0; struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -5288,13 +5239,21 @@ return -EIO; } - bh = ext4_bread(handle, inode, blk, 1, &err); + do { + bh = ext4_bread(handle, inode, blk, + EXT4_GET_BLOCKS_CREATE | + EXT4_GET_BLOCKS_METADATA_NOFAIL); + } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) && + ext4_should_retry_alloc(inode->i_sb, &retries)); + if (IS_ERR(bh)) + return PTR_ERR(bh); if (!bh) goto out; + BUFFER_TRACE(bh, "get write access"); err = ext4_journal_get_write_access(handle, bh); if (err) { brelse(bh); - goto out; + return err; } lock_buffer(bh); memcpy(bh->b_data+offset, data, len); @@ -5303,8 +5262,6 @@ err = ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); out: - if (err) - return err; if (inode->i_size < off + len) { i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; @@ -5321,7 +5278,7 @@ return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); } -#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static inline void register_as_ext2(void) { int err = register_filesystem(&ext2_fs_type); @@ -5337,11 +5294,11 @@ static inline int ext2_feature_set_ok(struct super_block *sb) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) + if (ext4_has_unknown_ext2_incompat_features(sb)) return 0; if (sb->s_flags & MS_RDONLY) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) + if (ext4_has_unknown_ext2_ro_compat_features(sb)) return 0; return 1; } @@ -5351,7 +5308,6 @@ static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } #endif -#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static inline void register_as_ext3(void) { int err = register_filesystem(&ext3_fs_type); @@ -5367,21 +5323,16 @@ static inline int ext3_feature_set_ok(struct super_block *sb) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) + if (ext4_has_unknown_ext3_incompat_features(sb)) return 0; - if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + if (!ext4_has_feature_journal(sb)) return 0; if (sb->s_flags & MS_RDONLY) return 1; - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) + if (ext4_has_unknown_ext3_ro_compat_features(sb)) return 0; return 1; } -#else -static inline void register_as_ext3(void) { } -static inline void unregister_as_ext3(void) { } -static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } -#endif static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, @@ -5392,37 +5343,6 @@ }; MODULE_ALIAS_FS("ext4"); -static int __init ext4_init_feat_adverts(void) -{ - struct ext4_features *ef; - int ret = -ENOMEM; - - ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL); - if (!ef) - goto out; - - ef->f_kobj.kset = ext4_kset; - init_completion(&ef->f_kobj_unregister); - ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL, - "features"); - if (ret) { - kfree(ef); - goto out; - } - - ext4_feat = ef; - ret = 0; -out: - return ret; -} - -static void ext4_exit_feat_adverts(void) -{ - kobject_put(&ext4_feat->f_kobj); - wait_for_completion(&ext4_feat->f_kobj_unregister); - kfree(ext4_feat); -} - /* Shared across all ext4 file systems */ wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; @@ -5431,6 +5351,7 @@ { int i, err; + ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64); ext4_li_info = NULL; mutex_init(&ext4_li_mtx); @@ -5448,29 +5369,21 @@ err = ext4_init_pageio(); if (err) - goto out7; - - err = ext4_init_system_zone(); - if (err) - goto out6; - ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); - if (!ext4_kset) { - err = -ENOMEM; goto out5; - } - ext4_proc_root = proc_mkdir("fs/ext4", NULL); - err = ext4_init_feat_adverts(); + err = ext4_init_system_zone(); if (err) goto out4; - err = ext4_init_mballoc(); + err = ext4_init_sysfs(); if (err) goto out3; - err = ext4_init_xattr(); + err = ext4_init_mballoc(); if (err) goto out2; + else + ext4_mballoc_ready = 1; err = init_inodecache(); if (err) goto out1; @@ -5486,20 +5399,15 @@ unregister_as_ext3(); destroy_inodecache(); out1: - ext4_exit_xattr(); -out2: + ext4_mballoc_ready = 0; ext4_exit_mballoc(); +out2: + ext4_exit_sysfs(); out3: - ext4_exit_feat_adverts(); -out4: - if (ext4_proc_root) - remove_proc_entry("fs/ext4", NULL); - kset_unregister(ext4_kset); -out5: ext4_exit_system_zone(); -out6: +out4: ext4_exit_pageio(); -out7: +out5: ext4_exit_es(); return err; @@ -5507,16 +5415,14 @@ static void __exit ext4_exit_fs(void) { + ext4_exit_crypto(); ext4_destroy_lazyinit_thread(); unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); destroy_inodecache(); - ext4_exit_xattr(); ext4_exit_mballoc(); - ext4_exit_feat_adverts(); - remove_proc_entry("fs/ext4", NULL); - kset_unregister(ext4_kset); + ext4_exit_sysfs(); ext4_exit_system_zone(); ext4_exit_pageio(); ext4_exit_es();