--- zzzz-none-000/linux-3.10.107/fs/ext4/ialloc.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/ext4/ialloc.c 2021-02-04 17:41:59.000000000 +0000 @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -64,37 +63,6 @@ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } -/* Initializes an uninitialized inode bitmap */ -static unsigned ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t block_group, - struct ext4_group_desc *gdp) -{ - J_ASSERT_BH(bh, buffer_locked(bh)); - - /* If checksum is bad mark all blocks and inodes use to prevent - * allocation, essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - ext4_error(sb, "Checksum bad for group %u", block_group); - ext4_free_group_clusters_set(sb, gdp, 0); - ext4_free_inodes_set(sb, gdp, 0); - ext4_itable_unused_set(sb, gdp, 0); - memset(bh->b_data, 0xff, sb->s_blocksize); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - return 0; - } - - memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); - ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, - bh->b_data); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); - - return EXT4_INODES_PER_GROUP(sb); -} - void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) { if (uptodate) { @@ -105,6 +73,42 @@ put_bh(bh); } +static int ext4_validate_inode_bitmap(struct super_block *sb, + struct ext4_group_desc *desc, + ext4_group_t block_group, + struct buffer_head *bh) +{ + ext4_fsblk_t blk; + struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (buffer_verified(bh)) + return 0; + if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) + return -EFSCORRUPTED; + + ext4_lock_group(sb, block_group); + blk = ext4_inode_bitmap(sb, desc); + if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, + EXT4_INODES_PER_GROUP(sb) / 8)) { + ext4_unlock_group(sb, block_group); + ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " + "inode_bitmap = %llu", block_group, blk); + grp = ext4_get_group_info(sb, block_group); + if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { + int count; + count = ext4_free_inodes_count(sb, desc); + percpu_counter_sub(&sbi->s_freeinodes_counter, + count); + } + set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); + return -EFSBADCRC; + } + set_buffer_verified(bh); + ext4_unlock_group(sb, block_group); + return 0; +} + /* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. @@ -117,10 +121,11 @@ struct ext4_group_desc *desc; struct buffer_head *bh = NULL; ext4_fsblk_t bitmap_blk; + int err; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - return NULL; + return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_inode_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); @@ -128,7 +133,7 @@ ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-EIO); } if (bitmap_uptodate(bh)) goto verify; @@ -140,8 +145,19 @@ } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - ext4_init_inode_bitmap(sb, bh, block_group, desc); + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Inode bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), + sb->s_blocksize * 8, bh->b_data); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); set_buffer_verified(bh); @@ -173,23 +189,17 @@ ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; + return ERR_PTR(-EIO); } verify: - ext4_lock_group(sb, block_group); - if (!buffer_verified(bh) && - !ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_INODES_PER_GROUP(sb) / 8)) { - ext4_unlock_group(sb, block_group); - put_bh(bh); - ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " - "inode_bitmap = %llu", block_group, bitmap_blk); - return NULL; - } - ext4_unlock_group(sb, block_group); - set_buffer_verified(bh); + err = ext4_validate_inode_bitmap(sb, desc, block_group, bh); + if (err) + goto out; return bh; +out: + put_bh(bh); + return ERR_PTR(err); } /* @@ -221,6 +231,7 @@ struct ext4_super_block *es; struct ext4_sb_info *sbi; int fatal = 0, err, count, cleared; + struct ext4_group_info *grp; if (!sb) { printk(KERN_ERR "EXT4-fs: %s:%d: inode on " @@ -266,8 +277,17 @@ block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = ext4_read_inode_bitmap(sb, block_group); - if (!bitmap_bh) + /* Don't bother if the inode bitmap is corrupt. */ + grp = ext4_get_group_info(sb, block_group); + if (IS_ERR(bitmap_bh)) { + fatal = PTR_ERR(bitmap_bh); + bitmap_bh = NULL; goto error_return; + } + if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp))) { + fatal = -EFSCORRUPTED; + goto error_return; + } BUFFER_TRACE(bitmap_bh, "get_write_access"); fatal = ext4_journal_get_write_access(handle, bitmap_bh); @@ -315,8 +335,16 @@ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!fatal) fatal = err; - } else + } else { ext4_error(sb, "bit already cleared for inode %lu", ino); + if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { + int count; + count = ext4_free_inodes_count(sb, gdp); + percpu_counter_sub(&sbi->s_freeinodes_counter, + count); + } + set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); + } error_return: brelse(bitmap_bh); @@ -415,7 +443,7 @@ ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); if (S_ISDIR(mode) && - ((parent == sb->s_root->d_inode) || + ((parent == d_inode(sb->s_root)) || (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) { int best_ndir = inodes_per_group; int ret = -1; @@ -426,7 +454,7 @@ ext4fs_dirhash(qstr->name, qstr->len, &hinfo); grp = hinfo.hash; } else - get_random_bytes(&grp, sizeof(grp)); + grp = prandom_u32(); parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; @@ -625,6 +653,51 @@ } /* + * In no journal mode, if an inode has recently been deleted, we want + * to avoid reusing it until we're reasonably sure the inode table + * block has been written back to disk. (Yes, these values are + * somewhat arbitrary...) + */ +#define RECENTCY_MIN 5 +#define RECENTCY_DIRTY 30 + +static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino) +{ + struct ext4_group_desc *gdp; + struct ext4_inode *raw_inode; + struct buffer_head *bh; + unsigned long dtime, now; + int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; + int offset, ret = 0, recentcy = RECENTCY_MIN; + + gdp = ext4_get_group_desc(sb, group, NULL); + if (unlikely(!gdp)) + return 0; + + bh = sb_getblk(sb, ext4_inode_table(sb, gdp) + + (ino / inodes_per_block)); + if (unlikely(!bh) || !buffer_uptodate(bh)) + /* + * If the block is not in the buffer cache, then it + * must have been written out. + */ + goto out; + + offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb); + raw_inode = (struct ext4_inode *) (bh->b_data + offset); + dtime = le32_to_cpu(raw_inode->i_dtime); + now = get_seconds(); + if (buffer_dirty(bh)) + recentcy += RECENTCY_DIRTY; + + if (dtime && (dtime < now) && (now < dtime + recentcy)) + ret = 1; +out: + brelse(bh); + return ret; +} + +/* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of @@ -648,15 +721,30 @@ struct ext4_group_desc *gdp = NULL; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; - int ret2, err = 0; + int ret2, err; struct inode *ret; ext4_group_t i; ext4_group_t flex_group; + struct ext4_group_info *grp; + int encrypt = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) return ERR_PTR(-EPERM); + if ((ext4_encrypted_inode(dir) || + DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + err = ext4_get_encryption_info(dir); + if (err) + return ERR_PTR(err); + if (ext4_encryption_info(dir) == NULL) + return ERR_PTR(-EPERM); + if (!handle) + nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb); + encrypt = 1; + } + sb = dir->i_sb; ngroups = ext4_get_groups_count(sb); trace_ext4_request_inode(dir, mode); @@ -681,7 +769,9 @@ inode->i_gid = dir->i_gid; } else inode_init_owner(inode, dir, mode); - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) + goto out; if (!goal) goal = sbi->s_inode_goal; @@ -725,10 +815,24 @@ continue; } + grp = ext4_get_group_info(sb, group); + /* Skip groups with already-known suspicious inode tables */ + if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { + if (++group == ngroups) + group = 0; + continue; + } + brelse(inode_bitmap_bh); inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); - if (!inode_bitmap_bh) - goto out; + /* Skip groups with suspicious inode tables */ + if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || + IS_ERR(inode_bitmap_bh)) { + inode_bitmap_bh = NULL; + if (++group == ngroups) + group = 0; + continue; + } repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) @@ -741,10 +845,16 @@ "inode=%lu", ino + 1); continue; } + if ((EXT4_SB(sb)->s_journal == NULL) && + recently_deleted(sb, group, ino)) { + ino++; + goto next_inode; + } if (!handle) { BUG_ON(nblocks <= 0); handle = __ext4_journal_start_sb(dir->i_sb, line_no, - handle_type, nblocks); + handle_type, nblocks, + 0); if (IS_ERR(handle)) { err = PTR_ERR(handle); ext4_std_error(sb, err); @@ -763,6 +873,7 @@ ino++; /* the inode bitmap is zero-based */ if (!ret2) goto got; /* we grabbed the inode! */ +next_inode: if (ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; next_group: @@ -793,8 +904,8 @@ struct buffer_head *block_bitmap_bh; block_bitmap_bh = ext4_read_block_bitmap(sb, group); - if (!block_bitmap_bh) { - err = -EIO; + if (IS_ERR(block_bitmap_bh)) { + err = PTR_ERR(block_bitmap_bh); goto out; } BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); @@ -810,7 +921,8 @@ /* recheck and clear flag under lock if we still need to */ ext4_lock_group(sb, group); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); @@ -921,8 +1033,7 @@ spin_unlock(&sbi->s_next_gen_lock); /* Precompute checksum seed for inode metadata */ - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { + if (ext4_has_metadata_csum(sb)) { __u32 csum; __le32 inum = cpu_to_le32(inode->i_ino); __le32 gen = cpu_to_le32(inode->i_generation); @@ -936,11 +1047,9 @@ ext4_set_inode_state(inode, EXT4_STATE_NEW); ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; - ei->i_inline_off = 0; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA)) + if (ext4_has_feature_inline_data(sb)) ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); - ret = inode; err = dquot_alloc_inode(inode); if (err) @@ -954,7 +1063,7 @@ if (err) goto fail_free_drop; - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { + if (ext4_has_feature_extents(sb)) { /* set extent flag only for directory, file and normal symlink*/ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); @@ -967,6 +1076,12 @@ ei->i_datasync_tid = handle->h_transaction->t_tid; } + if (encrypt) { + err = ext4_inherit_context(dir, inode); + if (err) + goto fail_free_drop; + } + err = ext4_mark_inode_dirty(handle, inode); if (err) { ext4_std_error(sb, err); @@ -997,22 +1112,20 @@ unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); ext4_group_t block_group; int bit; - struct buffer_head *bitmap_bh; + struct buffer_head *bitmap_bh = NULL; struct inode *inode = NULL; - long err = -EIO; + int err = -EFSCORRUPTED; - /* Error cases - e2fsck has already cleaned up for us */ - if (ino > max_ino) { - ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); - goto error; - } + if (ino < EXT4_FIRST_INO(sb) || ino > max_ino) + goto bad_orphan; block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = ext4_read_inode_bitmap(sb, block_group); - if (!bitmap_bh) { - ext4_warning(sb, "inode bitmap error for orphan %lu", ino); - goto error; + if (IS_ERR(bitmap_bh)) { + ext4_error(sb, "inode bitmap error %ld for orphan %lu", + ino, PTR_ERR(bitmap_bh)); + return (struct inode *) bitmap_bh; } /* Having the inode bit set should be a 100% indicator that this @@ -1023,8 +1136,12 @@ goto bad_orphan; inode = ext4_iget(sb, ino); - if (IS_ERR(inode)) - goto iget_failed; + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ext4_error(sb, "couldn't read orphan inode %lu (err %d)", + ino, err); + return inode; + } /* * If the orphans has i_nlinks > 0 then it should be able to @@ -1041,29 +1158,25 @@ brelse(bitmap_bh); return inode; -iget_failed: - err = PTR_ERR(inode); - inode = NULL; bad_orphan: - ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext4_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_WARNING "inode=%p\n", inode); + ext4_error(sb, "bad orphan inode %lu", ino); + if (bitmap_bh) + printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext4_test_bit(bit, bitmap_bh->b_data)); if (inode) { - printk(KERN_WARNING "is_bad_inode(inode)=%d\n", + printk(KERN_ERR "is_bad_inode(inode)=%d\n", is_bad_inode(inode)); - printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n", + printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n", NEXT_ORPHAN(inode)); - printk(KERN_WARNING "max_ino=%lu\n", max_ino); - printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink); + printk(KERN_ERR "max_ino=%lu\n", max_ino); + printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink); /* Avoid freeing blocks if we got a bad deleted inode */ if (inode->i_nlink == 0) inode->i_blocks = 0; iput(inode); } brelse(bitmap_bh); -error: return ERR_PTR(err); } @@ -1088,8 +1201,10 @@ desc_count += ext4_free_inodes_count(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_inode_bitmap(sb, i); - if (!bitmap_bh) + if (IS_ERR(bitmap_bh)) { + bitmap_bh = NULL; continue; + } x = ext4_count_free(bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb) / 8);