--- zzzz-none-000/linux-3.10.107/fs/jbd2/journal.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/jbd2/journal.c 2021-02-04 17:41:59.000000000 +0000 @@ -103,18 +103,37 @@ static void __journal_abort_soft (journal_t *journal, int errno); static int jbd2_journal_create_slab(size_t slab_size); +#ifdef CONFIG_JBD2_DEBUG +void __jbd2_debug(int level, const char *file, const char *func, + unsigned int line, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (level > jbd2_journal_enable_debug) + return; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); + va_end(args); +} +EXPORT_SYMBOL(__jbd2_debug); +#endif + /* Checksumming functions */ -int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) +static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3_feature(j)) return 1; return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; } -static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) +static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) { - __u32 csum, old_csum; + __u32 csum; + __be32 old_csum; old_csum = sb->s_checksum; sb->s_checksum = 0; @@ -124,17 +143,17 @@ return cpu_to_be32(csum); } -int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) +static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return 1; return sb->s_checksum == jbd2_superblock_csum(j, sb); } -void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) +static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) { - if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (!jbd2_journal_has_csum_v2or3(j)) return; sb->s_checksum = jbd2_superblock_csum(j, sb); @@ -283,8 +302,8 @@ journal->j_flags |= JBD2_UNMOUNT; while (journal->j_task) { - wake_up(&journal->j_wait_commit); write_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_commit); wait_event(journal->j_wait_done_commit, journal->j_task == NULL); write_lock(&journal->j_state_lock); } @@ -310,14 +329,12 @@ * * If the source buffer has already been modified by a new transaction * since we took the last commit snapshot, we use the frozen copy of - * that data for IO. If we end up using the existing buffer_head's data - * for the write, then we *have* to lock the buffer to prevent anyone - * else from using and possibly modifying it while the IO is in - * progress. + * that data for IO. If we end up using the existing buffer_head's data + * for the write, then we have to make sure nobody modifies it while the + * IO is in progress. do_get_write_access() handles this. * - * The function returns a pointer to the buffer_heads to be used for IO. - * - * We assume that the journal has already been locked in this function. + * The function returns a pointer to the buffer_head to be used for IO. + * * * Return value: * <0: Error @@ -330,15 +347,14 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned long long blocknr) + struct buffer_head **bh_out, + sector_t blocknr) { int need_copy_out = 0; int done_copy_out = 0; int do_escape = 0; char *mapped_data; struct buffer_head *new_bh; - struct journal_head *new_jh; struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); @@ -355,27 +371,17 @@ */ J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); -retry_alloc: - new_bh = alloc_buffer_head(GFP_NOFS); - if (!new_bh) { - /* - * Failure is not an option, but __GFP_NOFAIL is going - * away; so we retry ourselves here. - */ - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry_alloc; - } + new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); /* keep subsequent assertions sane */ atomic_set(&new_bh->b_count, 1); - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ + jbd_lock_bh_state(bh_in); +repeat: /* * If a new transaction has already done a buffer copy-out, then * we use that version of the data for the commit. */ - jbd_lock_bh_state(bh_in); -repeat: if (jh_in->b_frozen_data) { done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); @@ -415,7 +421,7 @@ jbd_unlock_bh_state(bh_in); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); if (!tmp) { - jbd2_journal_put_journal_head(new_jh); + brelse(new_bh); return -ENOMEM; } jbd_lock_bh_state(bh_in); @@ -426,7 +432,7 @@ jh_in->b_frozen_data = tmp; mapped_data = kmap_atomic(new_page); - memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); + memcpy(tmp, mapped_data + new_offset, bh_in->b_size); kunmap_atomic(mapped_data); new_page = virt_to_page(tmp); @@ -452,14 +458,14 @@ } set_bh_page(new_bh, new_page, new_offset); - new_jh->b_transaction = NULL; - new_bh->b_size = jh2bh(jh_in)->b_size; - new_bh->b_bdev = transaction->t_journal->j_dev; + new_bh->b_size = bh_in->b_size; + new_bh->b_bdev = journal->j_dev; new_bh->b_blocknr = blocknr; + new_bh->b_private = bh_in; set_buffer_mapped(new_bh); set_buffer_dirty(new_bh); - *jh_out = new_jh; + *bh_out = new_bh; /* * The to-be-written buffer needs to get moved to the io queue, @@ -470,11 +476,9 @@ spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); spin_unlock(&journal->j_list_lock); + set_buffer_shadow(bh_in); jbd_unlock_bh_state(bh_in); - JBUFFER_TRACE(new_jh, "file as BJ_IO"); - jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); - return do_escape | (done_copy_out << 1); } @@ -484,35 +488,6 @@ */ /* - * __jbd2_log_space_left: Return the number of free blocks left in the journal. - * - * Called with the journal already locked. - * - * Called under j_state_lock - */ - -int __jbd2_log_space_left(journal_t *journal) -{ - int left = journal->j_free; - - /* assert_spin_locked(&journal->j_state_lock); */ - - /* - * Be pessimistic here about the number of those free blocks which - * might be required for log descriptor control blocks. - */ - -#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ - - left -= MIN_LOG_RESERVED_BLOCKS; - - if (left <= 0) - return 0; - left -= (left >> 3); - return left; -} - -/* * Called with j_state_lock locked for writing. * Returns true if a transaction commit was started. */ @@ -564,20 +539,17 @@ } /* - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. - * - * We can only force the running transaction if we don't have an active handle; - * otherwise, we will deadlock. - * - * Returns true if a transaction was started. + * Force and wait any uncommitted transactions. We can only force the running + * transaction if we don't have an active handle, otherwise, we will deadlock. + * Returns: <0 in case of error, + * 0 if nothing to commit, + * 1 if transaction was successfully committed. */ -int jbd2_journal_force_commit_nested(journal_t *journal) +static int __jbd2_journal_force_commit(journal_t *journal) { transaction_t *transaction = NULL; tid_t tid; - int need_to_start = 0; + int need_to_start = 0, ret = 0; read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { @@ -588,16 +560,53 @@ transaction = journal->j_committing_transaction; if (!transaction) { + /* Nothing to commit */ read_unlock(&journal->j_state_lock); - return 0; /* Nothing to retry */ + return 0; } - tid = transaction->t_tid; read_unlock(&journal->j_state_lock); if (need_to_start) jbd2_log_start_commit(journal, tid); - jbd2_log_wait_commit(journal, tid); - return 1; + ret = jbd2_log_wait_commit(journal, tid); + if (!ret) + ret = 1; + + return ret; +} + +/** + * Force and wait upon a commit if the calling process is not within + * transaction. This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. + * + * @journal: journal to force + * Returns true if progress was made. + */ +int jbd2_journal_force_commit_nested(journal_t *journal) +{ + int ret; + + ret = __jbd2_journal_force_commit(journal); + return ret > 0; +} + +/** + * int journal_force_commit() - force any uncommitted transactions + * @journal: journal to force + * + * Caller want unconditional commit. We can only force the running transaction + * if we don't have an active handle, otherwise, we will deadlock. + */ +int jbd2_journal_force_commit(journal_t *journal) +{ + int ret; + + J_ASSERT(!current->journal_info); + ret = __jbd2_journal_force_commit(journal); + if (ret > 0) + ret = 0; + return ret; } /* @@ -684,7 +693,7 @@ read_lock(&journal->j_state_lock); #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { - printk(KERN_EMERG + printk(KERN_ERR "%s: error: j_commit_request=%d, tid=%d\n", __func__, journal->j_commit_request, tid); } @@ -692,18 +701,16 @@ while (tid_gt(tid, journal->j_commit_sequence)) { jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", tid, journal->j_commit_sequence); - wake_up(&journal->j_wait_commit); read_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_commit); wait_event(journal->j_wait_done_commit, !tid_gt(tid, journal->j_commit_sequence)); read_lock(&journal->j_state_lock); } read_unlock(&journal->j_state_lock); - if (unlikely(is_journal_aborted(journal))) { - printk(KERN_EMERG "journal commit I/O error\n"); + if (unlikely(is_journal_aborted(journal))) err = -EIO; - } return err; } @@ -798,7 +805,7 @@ * But we don't bother doing that, so there will be coherency problems with * mmaps of blockdevs which hold live JBD-controlled filesystems. */ -struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) +struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; unsigned long long blocknr; @@ -817,7 +824,7 @@ set_buffer_uptodate(bh); unlock_buffer(bh); BUFFER_TRACE(bh, "return this buffer"); - return jbd2_journal_add_journal_head(bh); + return bh; } /* @@ -1069,11 +1076,10 @@ return NULL; init_waitqueue_head(&journal->j_wait_transaction_locked); - init_waitqueue_head(&journal->j_wait_logspace); init_waitqueue_head(&journal->j_wait_done_commit); - init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); + init_waitqueue_head(&journal->j_wait_reserved); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); @@ -1083,6 +1089,7 @@ journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); journal->j_min_batch_time = 0; journal->j_max_batch_time = 15000; /* 15ms */ + atomic_set(&journal->j_reserved_credits, 0); /* The journal is marked for error until we succeed with recovery! */ journal->j_flags = JBD2_ABORT; @@ -1128,7 +1135,6 @@ { journal_t *journal = journal_init_common(); struct buffer_head *bh; - char *p; int n; if (!journal) @@ -1141,9 +1147,7 @@ journal->j_blk_offset = start; journal->j_maxlen = len; bdevname(journal->j_dev, journal->j_devname); - p = journal->j_devname; - while ((p = strchr(p, '/'))) - *p = '!'; + strreplace(journal->j_devname, '/', '!'); jbd2_stats_proc_init(journal); n = journal->j_blocksize / sizeof(journal_block_tag_t); journal->j_wbufsize = n; @@ -1195,10 +1199,7 @@ journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; journal->j_inode = inode; bdevname(journal->j_dev, journal->j_devname); - p = journal->j_devname; - while ((p = strchr(p, '/'))) - *p = '!'; - p = journal->j_devname + strlen(journal->j_devname); + p = strreplace(journal->j_devname, '/', '!'); sprintf(p, "-%lu", journal->j_inode->i_ino); jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", @@ -1228,7 +1229,7 @@ goto out_err; } - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) { printk(KERN_ERR "%s: Cannot get buffer for journal superblock\n", @@ -1456,7 +1457,7 @@ sb->s_errno = cpu_to_be32(journal->j_errno); read_unlock(&journal->j_state_lock); - jbd2_write_superblock(journal, WRITE_SYNC); + jbd2_write_superblock(journal, WRITE_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -1523,24 +1524,32 @@ goto out; } - if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_has_feature_csum2(journal) && + jbd2_has_feature_csum3(journal)) { + /* Can't have checksum v2 and v3 at the same time! */ + printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " + "at the same time!\n"); + goto out; + } + + if (jbd2_journal_has_csum_v2or3_feature(journal) && + jbd2_has_feature_checksum(journal)) { /* Can't have checksum v1 and v2 on at the same time! */ - printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " + printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " "at the same time!\n"); goto out; } if (!jbd2_verify_csum_type(journal, sb)) { - printk(KERN_ERR "JBD: Unknown checksum type\n"); + printk(KERN_ERR "JBD2: Unknown checksum type\n"); goto out; } /* Load the checksum driver */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + if (jbd2_journal_has_csum_v2or3_feature(journal)) { journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); + printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); err = PTR_ERR(journal->j_chksum_driver); journal->j_chksum_driver = NULL; goto out; @@ -1549,12 +1558,13 @@ /* Check superblock checksum */ if (!jbd2_superblock_csum_verify(journal, sb)) { - printk(KERN_ERR "JBD: journal checksum error\n"); + printk(KERN_ERR "JBD2: journal checksum error\n"); + err = -EFSBADCRC; goto out; } /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) + if (jbd2_journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); @@ -1641,7 +1651,7 @@ printk(KERN_ERR "JBD2: journal transaction %u on %s " "is corrupt.\n", journal->j_failed_commit, journal->j_devname); - return -EIO; + return -EFSCORRUPTED; } /* OK, we've finished with the dynamic journal bits: @@ -1685,8 +1695,17 @@ while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); mutex_lock(&journal->j_checkpoint_mutex); - jbd2_log_do_checkpoint(journal); + err = jbd2_log_do_checkpoint(journal); mutex_unlock(&journal->j_checkpoint_mutex); + /* + * If checkpointing failed, just free the buffers to avoid + * looping forever + */ + if (err) { + jbd2_journal_destroy_checkpoint(journal); + spin_lock(&journal->j_list_lock); + break; + } spin_lock(&journal->j_list_lock); } @@ -1713,8 +1732,7 @@ if (journal->j_proc_entry) jbd2_stats_proc_exit(journal); - if (journal->j_inode) - iput(journal->j_inode); + iput(journal->j_inode); if (journal->j_revoke) jbd2_journal_destroy_revoke(journal); if (journal->j_chksum_driver) @@ -1820,8 +1838,14 @@ if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) return 0; - /* Asking for checksumming v2 and v1? Only give them v2. */ - if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && + /* If enabling v2 checksums, turn on v3 instead */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { + incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; + incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; + } + + /* Asking for checksumming v3 and v1? Only give them v3. */ + if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && compat & JBD2_FEATURE_COMPAT_CHECKSUM) compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; @@ -1830,8 +1854,8 @@ sb = journal->j_superblock; - /* If enabling v2 checksums, update superblock */ - if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { + /* If enabling v3 checksums, update superblock */ + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { sb->s_checksum_type = JBD2_CRC32C_CHKSUM; sb->s_feature_compat &= ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); @@ -1841,25 +1865,24 @@ journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c " + printk(KERN_ERR "JBD2: Cannot load crc32c " "driver.\n"); journal->j_chksum_driver = NULL; return 0; } - } - /* Precompute checksum seed for all metadata */ - if (JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_CSUM_V2)) + /* Precompute checksum seed for all metadata */ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); + } } /* If enabling v1 checksums, downgrade superblock */ if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) sb->s_feature_incompat &= - ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | + JBD2_FEATURE_INCOMPAT_CSUM_V3); sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_ro_compat |= cpu_to_be32(ro); @@ -2184,16 +2207,20 @@ */ size_t journal_tag_bytes(journal_t *journal) { - journal_block_tag_t tag; - size_t x = 0; + size_t sz; + + if (jbd2_has_feature_csum3(journal)) + return sizeof(journal_block_tag3_t); + + sz = sizeof(journal_block_tag_t); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) - x += sizeof(tag.t_checksum); + if (jbd2_has_feature_csum2(journal)) + sz += sizeof(__u16); - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) - return x + JBD2_TAG_SIZE64; + if (jbd2_has_feature_64bit(journal)) + return sz; else - return x + JBD2_TAG_SIZE32; + return sz - sizeof(__u32); } /* @@ -2333,7 +2360,7 @@ jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", sizeof(struct journal_head), 0, /* offset */ - SLAB_TEMPORARY, /* flags */ + SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU, NULL); /* ctor */ retval = 0; if (!jbd2_journal_head_cache) { @@ -2361,14 +2388,12 @@ #ifdef CONFIG_JBD2_DEBUG atomic_inc(&nr_journal_heads); #endif - ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); + ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); if (!ret) { jbd_debug(1, "out of memory for journal_head\n"); pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); - while (!ret) { - yield(); - ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); - } + ret = kmem_cache_zalloc(jbd2_journal_head_cache, + GFP_NOFS | __GFP_NOFAIL); } return ret; } @@ -2429,10 +2454,8 @@ struct journal_head *new_jh = NULL; repeat: - if (!buffer_jbd(bh)) { + if (!buffer_jbd(bh)) new_jh = journal_alloc_journal_head(); - memset(new_jh, 0, sizeof(*new_jh)); - } jbd_lock_bh_journal_head(bh); if (buffer_jbd(bh)) { @@ -2664,7 +2687,7 @@ #ifdef CONFIG_JBD2_DEBUG int n = atomic_read(&nr_journal_heads); if (n) - printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); + printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n); #endif jbd2_remove_jbd_stats_proc_entry(); jbd2_journal_destroy_caches();