--- zzzz-none-000/linux-3.10.107/fs/reiserfs/inode.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/reiserfs/inode.c 2021-02-04 17:41:59.000000000 +0000 @@ -11,41 +11,48 @@ #include #include #include -#include +#include #include #include #include #include #include #include -#include +#include int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); void reiserfs_evict_inode(struct inode *inode) { - /* We need blocks for transaction + (user+group) quota update (possibly delete) */ + /* + * We need blocks for transaction + (user+group) quota + * update (possibly delete) + */ int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); struct reiserfs_transaction_handle th; - int depth; int err; if (!inode->i_nlink && !is_bad_inode(inode)) dquot_initialize(inode); - truncate_inode_pages(&inode->i_data, 0); + truncate_inode_pages_final(&inode->i_data); if (inode->i_nlink) goto no_delete; - depth = reiserfs_write_lock_once(inode->i_sb); + /* + * The = 0 happens when we abort creating a new inode + * for some reason like lack of space.. + * also handles bad_inode case + */ + if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { - /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ - if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ reiserfs_delete_xattrs(inode); + reiserfs_write_lock(inode->i_sb); + if (journal_begin(&th, inode->i_sb, jbegin_count)) goto out; reiserfs_update_inode_transaction(inode); @@ -54,31 +61,43 @@ err = reiserfs_delete_object(&th, inode); - /* Do quota update inside a transaction for journaled quotas. We must do that - * after delete_object so that quota updates go into the same transaction as - * stat data deletion */ - if (!err) + /* + * Do quota update inside a transaction for journaled quotas. + * We must do that after delete_object so that quota updates + * go into the same transaction as stat data deletion + */ + if (!err) { + int depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_free_inode(inode); + reiserfs_write_lock_nested(inode->i_sb, depth); + } - if (journal_end(&th, inode->i_sb, jbegin_count)) + if (journal_end(&th)) goto out; - /* check return value from reiserfs_delete_object after + /* + * check return value from reiserfs_delete_object after * ending the transaction */ if (err) goto out; - /* all items of file are deleted, so we can remove "save" link */ - remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything - * about an error here */ + /* + * all items of file are deleted, so we can remove + * "save" link + * we can't do anything about an error here + */ + remove_save_link(inode, 0 /* not truncate */); +out: + reiserfs_write_unlock(inode->i_sb); } else { /* no object items are in the tree */ ; } - out: - reiserfs_write_unlock_once(inode->i_sb, depth); - clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ + + /* note this must go after the journal_end to prevent deadlock */ + clear_inode(inode); + dquot_drop(inode); inode->i_blocks = 0; return; @@ -100,8 +119,10 @@ key->key_length = length; } -/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set - offset and type of key */ +/* + * take base of inode_key (it comes from inode always) (dirid, objectid) + * and version from an inode, set offset and type of key + */ void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset, int type, int length) { @@ -111,9 +132,7 @@ length); } -// -// when key is 0, do not set version and short key -// +/* when key is 0, do not set version and short key */ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, int version, loff_t offset, int type, int length, @@ -129,43 +148,47 @@ set_le_ih_k_type(ih, type); put_ih_item_len(ih, length); /* set_ih_free_space (ih, 0); */ - // for directory items it is entry count, for directs and stat - // datas - 0xffff, for indirects - 0 + /* + * for directory items it is entry count, for directs and stat + * datas - 0xffff, for indirects - 0 + */ put_ih_entry_count(ih, entry_count); } -// -// FIXME: we might cache recently accessed indirect item +/* + * FIXME: we might cache recently accessed indirect item + * Ugh. Not too eager for that.... + * I cut the code until such time as I see a convincing argument (benchmark). + * I don't want a bloated inode struct..., and I don't like code complexity.... + */ -// Ugh. Not too eager for that.... -// I cut the code until such time as I see a convincing argument (benchmark). -// I don't want a bloated inode struct..., and I don't like code complexity.... - -/* cutting the code is fine, since it really isn't in use yet and is easy -** to add back in. But, Vladimir has a really good idea here. Think -** about what happens for reading a file. For each page, -** The VFS layer calls reiserfs_readpage, who searches the tree to find -** an indirect item. This indirect item has X number of pointers, where -** X is a big number if we've done the block allocation right. But, -** we only use one or two of these pointers during each call to readpage, -** needlessly researching again later on. -** -** The size of the cache could be dynamic based on the size of the file. -** -** I'd also like to see us cache the location the stat data item, since -** we are needlessly researching for that frequently. -** -** --chris -*/ +/* + * cutting the code is fine, since it really isn't in use yet and is easy + * to add back in. But, Vladimir has a really good idea here. Think + * about what happens for reading a file. For each page, + * The VFS layer calls reiserfs_readpage, who searches the tree to find + * an indirect item. This indirect item has X number of pointers, where + * X is a big number if we've done the block allocation right. But, + * we only use one or two of these pointers during each call to readpage, + * needlessly researching again later on. + * + * The size of the cache could be dynamic based on the size of the file. + * + * I'd also like to see us cache the location the stat data item, since + * we are needlessly researching for that frequently. + * + * --chris + */ -/* If this page has a file tail in it, and -** it was read in by get_block_create_0, the page data is valid, -** but tail is still sitting in a direct item, and we can't write to -** it. So, look through this page, and check all the mapped buffers -** to make sure they have valid block numbers. Any that don't need -** to be unmapped, so that __block_write_begin will correctly call -** reiserfs_get_block to convert the tail into an unformatted node -*/ +/* + * If this page has a file tail in it, and + * it was read in by get_block_create_0, the page data is valid, + * but tail is still sitting in a direct item, and we can't write to + * it. So, look through this page, and check all the mapped buffers + * to make sure they have valid block numbers. Any that don't need + * to be unmapped, so that __block_write_begin will correctly call + * reiserfs_get_block to convert the tail into an unformatted node + */ static inline void fix_tail_page_for_writing(struct page *page) { struct buffer_head *head, *next, *bh; @@ -183,8 +206,10 @@ } } -/* reiserfs_get_block does not need to allocate a block only if it has been - done already or non-hole position has been found in the indirect item */ +/* + * reiserfs_get_block does not need to allocate a block only if it has been + * done already or non-hole position has been found in the indirect item + */ static inline int allocation_needed(int retval, b_blocknr_t allocated, struct item_head *ih, __le32 * item, int pos_in_item) @@ -208,14 +233,16 @@ map_bh(bh, inode->i_sb, block); } -// -// files which were created in the earlier version can not be longer, -// than 2 gb -// +/* + * files which were created in the earlier version can not be longer, + * than 2 gb + */ static int file_capable(struct inode *inode, sector_t block) { - if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. - block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb + /* it is new file. */ + if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || + /* old file, but 'block' is inside of 2gb */ + block < (1 << (31 - inode->i_sb->s_blocksize_bits))) return 1; return 0; @@ -225,7 +252,6 @@ struct inode *inode, struct treepath *path) { struct super_block *s = th->t_super; - int len = th->t_blocks_allocated; int err; BUG_ON(!th->t_trans_id); @@ -238,7 +264,7 @@ return 0; } reiserfs_update_sd(th, inode); - err = journal_end(th, s, len); + err = journal_end(th); if (!err) { err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6); if (!err) @@ -247,14 +273,14 @@ return err; } -// it is called by get_block when create == 0. Returns block number -// for 'block'-th logical block of file. When it hits direct item it -// returns 0 (being called from bmap) or read direct item into piece -// of page (bh_result) - -// Please improve the english/clarity in the comment above, as it is -// hard to understand. - +/* + * it is called by get_block when create == 0. Returns block number + * for 'block'-th logical block of file. When it hits direct item it + * returns 0 (being called from bmap) or read direct item into piece + * of page (bh_result) + * Please improve the english/clarity in the comment above, as it is + * hard to understand. + */ static int _get_block_create_0(struct inode *inode, sector_t block, struct buffer_head *bh_result, int args) { @@ -270,7 +296,7 @@ int done = 0; unsigned long offset; - // prepare the key to look for the 'block'-th block of file + /* prepare the key to look for the 'block'-th block of file */ make_cpu_key(&key, inode, (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); @@ -282,23 +308,28 @@ kunmap(bh_result->b_page); if (result == IO_ERROR) return -EIO; - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. + /* + * We do not return -ENOENT if there is a hole but page is + * uptodate, because it means that there is some MMAPED data + * associated with it that is yet to be written to disk. + */ if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page)) { return -ENOENT; } return 0; } - // + bh = get_last_bh(&path); - ih = get_ih(&path); + ih = tp_item_head(&path); if (is_indirect_le_ih(ih)) { - __le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih); + __le32 *ind_item = (__le32 *) ih_item_body(bh, ih); - /* FIXME: here we could cache indirect item or part of it in - the inode to avoid search_by_key in case of subsequent - access to file */ + /* + * FIXME: here we could cache indirect item or part of it in + * the inode to avoid search_by_key in case of subsequent + * access to file + */ blocknr = get_block_num(ind_item, path.pos_in_item); ret = 0; if (blocknr) { @@ -308,8 +339,12 @@ set_buffer_boundary(bh_result); } } else - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. + /* + * We do not return -ENOENT if there is a hole but + * page is uptodate, because it means that there is + * some MMAPED data associated with it that is + * yet to be written to disk. + */ if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page)) { ret = -ENOENT; @@ -320,41 +355,45 @@ kunmap(bh_result->b_page); return ret; } - // requested data are in direct item(s) + /* requested data are in direct item(s) */ if (!(args & GET_BLOCK_READ_DIRECT)) { - // we are called by bmap. FIXME: we can not map block of file - // when it is stored in direct item(s) + /* + * we are called by bmap. FIXME: we can not map block of file + * when it is stored in direct item(s) + */ pathrelse(&path); if (p) kunmap(bh_result->b_page); return -ENOENT; } - /* if we've got a direct item, and the buffer or page was uptodate, - ** we don't want to pull data off disk again. skip to the - ** end, where we map the buffer and return + /* + * if we've got a direct item, and the buffer or page was uptodate, + * we don't want to pull data off disk again. skip to the + * end, where we map the buffer and return */ if (buffer_uptodate(bh_result)) { goto finished; } else /* - ** grab_tail_page can trigger calls to reiserfs_get_block on up to date - ** pages without any buffers. If the page is up to date, we don't want - ** read old data off disk. Set the up to date bit on the buffer instead - ** and jump to the end + * grab_tail_page can trigger calls to reiserfs_get_block on + * up to date pages without any buffers. If the page is up + * to date, we don't want read old data off disk. Set the up + * to date bit on the buffer instead and jump to the end */ if (!bh_result->b_page || PageUptodate(bh_result->b_page)) { set_buffer_uptodate(bh_result); goto finished; } - // read file tail into part of page + /* read file tail into part of page */ offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1); copy_item_head(&tmp_ih, ih); - /* we only want to kmap if we are reading the tail into the page. - ** this is not the common case, so we don't kmap until we are - ** sure we need to. But, this means the item might move if - ** kmap schedules + /* + * we only want to kmap if we are reading the tail into the page. + * this is not the common case, so we don't kmap until we are + * sure we need to. But, this means the item might move if + * kmap schedules */ if (!p) p = (char *)kmap(bh_result->b_page); @@ -365,10 +404,11 @@ if (!is_direct_le_ih(ih)) { BUG(); } - /* make sure we don't read more bytes than actually exist in - ** the file. This can happen in odd cases where i_size isn't - ** correct, and when direct item padding results in a few - ** extra bytes at the end of the direct item + /* + * make sure we don't read more bytes than actually exist in + * the file. This can happen in odd cases where i_size isn't + * correct, and when direct item padding results in a few + * extra bytes at the end of the direct item */ if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) break; @@ -380,40 +420,43 @@ } else { chars = ih_item_len(ih) - path.pos_in_item; } - memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars); + memcpy(p, ih_item_body(bh, ih) + path.pos_in_item, chars); if (done) break; p += chars; + /* + * we done, if read direct item is not the last item of + * node FIXME: we could try to check right delimiting key + * to see whether direct item continues in the right + * neighbor or rely on i_size + */ if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1)) - // we done, if read direct item is not the last item of - // node FIXME: we could try to check right delimiting key - // to see whether direct item continues in the right - // neighbor or rely on i_size break; - // update key to look for the next piece + /* update key to look for the next piece */ set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars); result = search_for_position_by_key(inode->i_sb, &key, &path); if (result != POSITION_FOUND) - // i/o error most likely + /* i/o error most likely */ break; bh = get_last_bh(&path); - ih = get_ih(&path); + ih = tp_item_head(&path); } while (1); flush_dcache_page(bh_result->b_page); kunmap(bh_result->b_page); - finished: +finished: pathrelse(&path); if (result == IO_ERROR) return -EIO; - /* this buffer has valid data, but isn't valid for io. mapping it to + /* + * this buffer has valid data, but isn't valid for io. mapping it to * block #0 tells the rest of reiserfs it just has a tail in it */ map_bh(bh_result, inode->i_sb, 0); @@ -421,8 +464,10 @@ return 0; } -// this is called to create file map. So, _get_block_create_0 will not -// read direct item +/* + * this is called to create file map. So, _get_block_create_0 will not + * read direct item + */ static int reiserfs_bmap(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { @@ -436,22 +481,23 @@ return 0; } -/* special version of get_block that is only used by grab_tail_page right -** now. It is sent to __block_write_begin, and when you try to get a -** block past the end of the file (or a block from a hole) it returns -** -ENOENT instead of a valid buffer. __block_write_begin expects to -** be able to do i/o on the buffers returned, unless an error value -** is also returned. -** -** So, this allows __block_write_begin to be used for reading a single block -** in a page. Where it does not produce a valid page for holes, or past the -** end of the file. This turns out to be exactly what we need for reading -** tails for conversion. -** -** The point of the wrapper is forcing a certain value for create, even -** though the VFS layer is calling this function with create==1. If you -** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, -** don't use this function. +/* + * special version of get_block that is only used by grab_tail_page right + * now. It is sent to __block_write_begin, and when you try to get a + * block past the end of the file (or a block from a hole) it returns + * -ENOENT instead of a valid buffer. __block_write_begin expects to + * be able to do i/o on the buffers returned, unless an error value + * is also returned. + * + * So, this allows __block_write_begin to be used for reading a single block + * in a page. Where it does not produce a valid page for holes, or past the + * end of the file. This turns out to be exactly what we need for reading + * tails for conversion. + * + * The point of the wrapper is forcing a certain value for create, even + * though the VFS layer is calling this function with create==1. If you + * don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, + * don't use this function. */ static int reiserfs_get_block_create_0(struct inode *inode, sector_t block, struct buffer_head *bh_result, @@ -460,8 +506,10 @@ return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE); } -/* This is special helper for reiserfs_get_block in case we are executing - direct_IO request. */ +/* + * This is special helper for reiserfs_get_block in case we are executing + * direct_IO request. + */ static int reiserfs_get_blocks_direct_io(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, @@ -471,9 +519,11 @@ bh_result->b_page = NULL; - /* We set the b_size before reiserfs_get_block call since it is - referenced in convert_tail_for_hole() that may be called from - reiserfs_get_block() */ + /* + * We set the b_size before reiserfs_get_block call since it is + * referenced in convert_tail_for_hole() that may be called from + * reiserfs_get_block() + */ bh_result->b_size = (1 << inode->i_blkbits); ret = reiserfs_get_block(inode, iblock, bh_result, @@ -483,14 +533,18 @@ /* don't allow direct io onto tail pages */ if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* make sure future calls to the direct io funcs for this offset - ** in the file fail by unmapping the buffer + /* + * make sure future calls to the direct io funcs for this + * offset in the file fail by unmapping the buffer */ clear_buffer_mapped(bh_result); ret = -EINVAL; } - /* Possible unpacked tail. Flush the data before pages have - disappeared */ + + /* + * Possible unpacked tail. Flush the data before pages have + * disappeared + */ if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) { int err; @@ -504,20 +558,20 @@ if (err < 0) ret = err; } - out: +out: return ret; } /* -** helper function for when reiserfs_get_block is called for a hole -** but the file tail is still in a direct item -** bh_result is the buffer head for the hole -** tail_offset is the offset of the start of the tail in the file -** -** This calls prepare_write, which will start a new transaction -** you should not be in a transaction, or have any paths held when you -** call this. -*/ + * helper function for when reiserfs_get_block is called for a hole + * but the file tail is still in a direct item + * bh_result is the buffer head for the hole + * tail_offset is the offset of the start of the tail in the file + * + * This calls prepare_write, which will start a new transaction + * you should not be in a transaction, or have any paths held when you + * call this. + */ static int convert_tail_for_hole(struct inode *inode, struct buffer_head *bh_result, loff_t tail_offset) @@ -537,9 +591,10 @@ tail_end = (tail_start | (bh_result->b_size - 1)) + 1; index = tail_offset >> PAGE_CACHE_SHIFT; - /* hole_page can be zero in case of direct_io, we are sure - that we cannot get here if we write with O_DIRECT into - tail page */ + /* + * hole_page can be zero in case of direct_io, we are sure + * that we cannot get here if we write with O_DIRECT into tail page + */ if (!hole_page || index != hole_page->index) { tail_page = grab_cache_page(inode->i_mapping, index); retval = -ENOMEM; @@ -550,14 +605,15 @@ tail_page = hole_page; } - /* we don't have to make sure the conversion did not happen while - ** we were locking the page because anyone that could convert - ** must first take i_mutex. - ** - ** We must fix the tail page for writing because it might have buffers - ** that are mapped, but have a block number of 0. This indicates tail - ** data that has been read directly into the page, and - ** __block_write_begin won't trigger a get_block in this case. + /* + * we don't have to make sure the conversion did not happen while + * we were locking the page because anyone that could convert + * must first take i_mutex. + * + * We must fix the tail page for writing because it might have buffers + * that are mapped, but have a block number of 0. This indicates tail + * data that has been read directly into the page, and + * __block_write_begin won't trigger a get_block in this case. */ fix_tail_page_for_writing(tail_page); retval = __reiserfs_write_begin(tail_page, tail_start, @@ -570,12 +626,12 @@ retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end); - unlock: +unlock: if (tail_page != hole_page) { unlock_page(tail_page); page_cache_release(tail_page); } - out: +out: return retval; } @@ -601,7 +657,8 @@ struct buffer_head *bh_result, int create) { int repeat, retval = 0; - b_blocknr_t allocated_block_nr = 0; // b_blocknr_t is (unsigned) 32 bit int + /* b_blocknr_t is (unsigned) 32 bit int*/ + b_blocknr_t allocated_block_nr = 0; INITIALIZE_PATH(path); int pos_in_item; struct cpu_key key; @@ -610,14 +667,15 @@ __le32 *item; int done; int fs_gen; - int lock_depth; struct reiserfs_transaction_handle *th = NULL; - /* space reserved in transaction batch: - . 3 balancings in direct->indirect conversion - . 1 block involved into reiserfs_update_sd() - XXX in practically impossible worst case direct2indirect() - can incur (much) more than 3 balancings. - quota update for user, group */ + /* + * space reserved in transaction batch: + * . 3 balancings in direct->indirect conversion + * . 1 block involved into reiserfs_update_sd() + * XXX in practically impossible worst case direct2indirect() + * can incur (much) more than 3 balancings. + * quota update for user, group + */ int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb); @@ -626,25 +684,27 @@ loff_t new_offset = (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); version = get_inode_item_key_version(inode); if (!file_capable(inode, block)) { - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return -EFBIG; } - /* if !create, we aren't changing the FS, so we don't need to - ** log anything, so we don't need to start a transaction + /* + * if !create, we aren't changing the FS, so we don't need to + * log anything, so we don't need to start a transaction */ if (!(create & GET_BLOCK_CREATE)) { int ret; /* find number of block-th logical block of the file */ ret = _get_block_create_0(inode, block, bh_result, create | GET_BLOCK_READ_DIRECT); - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return ret; } + /* * if we're already in a transaction, make sure to close * any new transactions we start in this func @@ -653,8 +713,10 @@ reiserfs_transaction_running(inode->i_sb)) dangle = 0; - /* If file is of such a size, that it might have a tail and tails are enabled - ** we should mark it as possibly needing tail packing on close + /* + * If file is of such a size, that it might have a tail and + * tails are enabled we should mark it as possibly needing + * tail packing on close */ if ((have_large_tails(inode->i_sb) && inode->i_size < i_block_size(inode) * 4) @@ -665,7 +727,7 @@ /* set the key of the first byte in the 'block'-th block of file */ make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ ); if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { - start_trans: +start_trans: th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count); if (!th) { retval = -ENOMEM; @@ -673,7 +735,7 @@ } reiserfs_update_inode_transaction(inode); } - research: +research: retval = search_for_position_by_key(inode->i_sb, &key, &path); if (retval == IO_ERROR) { @@ -682,8 +744,8 @@ } bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); + ih = tp_item_head(&path); + item = tp_item_body(&path); pos_in_item = path.pos_in_item; fs_gen = get_generation(inode->i_sb); @@ -701,11 +763,12 @@ _allocate_block(th, block, inode, &allocated_block_nr, &path, create); + /* + * restart the transaction to give the journal a chance to free + * some blocks. releases the path, so we have to go back to + * research if we succeed on the second try + */ if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) { - /* restart the transaction to give the journal a chance to free - ** some blocks. releases the path, so we have to go back to - ** research if we succeed on the second try - */ SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1; retval = restart_transaction(th, inode, &path); if (retval) @@ -732,9 +795,11 @@ if (indirect_item_found(retval, ih)) { b_blocknr_t unfm_ptr; - /* 'block'-th block is in the file already (there is - corresponding cell in some indirect item). But it may be - zero unformatted node pointer (hole) */ + /* + * 'block'-th block is in the file already (there is + * corresponding cell in some indirect item). But it may be + * zero unformatted node pointer (hole) + */ unfm_ptr = get_block_num(item, pos_in_item); if (unfm_ptr == 0) { /* use allocated block to plug the hole */ @@ -751,7 +816,7 @@ reiserfs_add_ordered_list(inode, bh_result); put_block_num(item, pos_in_item, allocated_block_nr); unfm_ptr = allocated_block_nr; - journal_mark_dirty(th, inode->i_sb, bh); + journal_mark_dirty(th, bh); reiserfs_update_sd(th, inode); } set_block_dev_mapped(bh_result, unfm_ptr, inode); @@ -760,11 +825,12 @@ if (!dangle && th) retval = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); - /* the item was found, so new blocks were not added to the file - ** there is no need to make sure the inode is updated with this - ** transaction + /* + * the item was found, so new blocks were not added to the file + * there is no need to make sure the inode is updated with this + * transaction */ return retval; } @@ -774,9 +840,11 @@ goto start_trans; } - /* desired position is not found or is in the direct item. We have - to append file with holes up to 'block'-th block converting - direct items to indirect one if necessary */ + /* + * desired position is not found or is in the direct item. We have + * to append file with holes up to 'block'-th block converting + * direct items to indirect one if necessary + */ done = 0; do { if (is_statdata_le_ih(ih)) { @@ -788,16 +856,18 @@ TYPE_INDIRECT, UNFM_P_SIZE, 0 /* free_space */ ); + /* + * we are going to add 'block'-th block to the file. + * Use allocated block for that + */ if (cpu_key_k_offset(&key) == 1) { - /* we are going to add 'block'-th block to the file. Use - allocated block for that */ unp = cpu_to_le32(allocated_block_nr); set_block_dev_mapped(bh_result, allocated_block_nr, inode); set_buffer_new(bh_result); done = 1; } - tmp_key = key; // ;) + tmp_key = key; /* ;) */ set_cpu_key_k_offset(&tmp_key, 1); PATH_LAST_POSITION(&path)++; @@ -807,9 +877,12 @@ if (retval) { reiserfs_free_block(th, inode, allocated_block_nr, 1); - goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST + /* + * retval == -ENOSPC, -EDQUOT or -EIO + * or -EEXIST + */ + goto failure; } - //mark_tail_converted (inode); } else if (is_direct_le_ih(ih)) { /* direct item has to be converted */ loff_t tail_offset; @@ -817,18 +890,24 @@ tail_offset = ((le_ih_k_offset(ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1; + + /* + * direct item we just found fits into block we have + * to map. Convert it into unformatted node: use + * bh_result for the conversion + */ if (tail_offset == cpu_key_k_offset(&key)) { - /* direct item we just found fits into block we have - to map. Convert it into unformatted node: use - bh_result for the conversion */ set_block_dev_mapped(bh_result, allocated_block_nr, inode); unbh = bh_result; done = 1; } else { - /* we have to padd file tail stored in direct item(s) - up to block size and convert it to unformatted - node. FIXME: this should also get into page cache */ + /* + * we have to pad file tail stored in direct + * item(s) up to block size and convert it + * to unformatted node. FIXME: this should + * also get into page cache + */ pathrelse(&path); /* @@ -857,7 +936,10 @@ inode->i_ino, retval); if (allocated_block_nr) { - /* the bitmap, the super, and the stat data == 3 */ + /* + * the bitmap, the super, + * and the stat data == 3 + */ if (!th) th = reiserfs_persistent_transaction(inode->i_sb, 3); if (th) @@ -879,43 +961,57 @@ allocated_block_nr, 1); goto failure; } - /* it is important the set_buffer_uptodate is done after - ** the direct2indirect. The buffer might contain valid - ** data newer than the data on disk (read by readpage, changed, - ** and then sent here by writepage). direct2indirect needs - ** to know if unbh was already up to date, so it can decide - ** if the data in unbh needs to be replaced with data from - ** the disk + /* + * it is important the set_buffer_uptodate is done + * after the direct2indirect. The buffer might + * contain valid data newer than the data on disk + * (read by readpage, changed, and then sent here by + * writepage). direct2indirect needs to know if unbh + * was already up to date, so it can decide if the + * data in unbh needs to be replaced with data from + * the disk */ set_buffer_uptodate(unbh); - /* unbh->b_page == NULL in case of DIRECT_IO request, this means - buffer will disappear shortly, so it should not be added to + /* + * unbh->b_page == NULL in case of DIRECT_IO request, + * this means buffer will disappear shortly, so it + * should not be added to */ if (unbh->b_page) { - /* we've converted the tail, so we must - ** flush unbh before the transaction commits + /* + * we've converted the tail, so we must + * flush unbh before the transaction commits */ reiserfs_add_tail_list(inode, unbh); - /* mark it dirty now to prevent commit_write from adding - ** this buffer to the inode's dirty buffer list + /* + * mark it dirty now to prevent commit_write + * from adding this buffer to the inode's + * dirty buffer list */ /* - * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty(). - * It's still atomic, but it sets the page dirty too, - * which makes it eligible for writeback at any time by the - * VM (which was also the case with __mark_buffer_dirty()) + * AKPM: changed __mark_buffer_dirty to + * mark_buffer_dirty(). It's still atomic, + * but it sets the page dirty too, which makes + * it eligible for writeback at any time by the + * VM (which was also the case with + * __mark_buffer_dirty()) */ mark_buffer_dirty(unbh); } } else { - /* append indirect item with holes if needed, when appending - pointer to 'block'-th block use block, which is already - allocated */ + /* + * append indirect item with holes if needed, when + * appending pointer to 'block'-th block use block, + * which is already allocated + */ struct cpu_key tmp_key; - unp_t unf_single = 0; // We use this in case we need to allocate only - // one block which is a fastpath + /* + * We use this in case we need to allocate + * only one block which is a fastpath + */ + unp_t unf_single = 0; unp_t *un; __u64 max_to_insert = MAX_ITEM_LEN(inode->i_sb->s_blocksize) / @@ -924,14 +1020,17 @@ RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, "vs-804: invalid position for append"); - /* indirect item has to be appended, set up key of that position */ + /* + * indirect item has to be appended, + * set up key of that position + * (key type is unimportant) + */ make_cpu_key(&tmp_key, inode, le_key_k_offset(version, - &(ih->ih_key)) + + &ih->ih_key) + op_bytes_number(ih, inode->i_sb->s_blocksize), - //pos_in_item * inode->i_sb->s_blocksize, - TYPE_INDIRECT, 3); // key type is unimportant + TYPE_INDIRECT, 3); RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key), "green-805: invalid offset"); @@ -952,8 +1051,10 @@ } } if (blocks_needed <= max_to_insert) { - /* we are going to add target block to the file. Use allocated - block for that */ + /* + * we are going to add target block to + * the file. Use allocated block for that + */ un[blocks_needed - 1] = cpu_to_le32(allocated_block_nr); set_block_dev_mapped(bh_result, @@ -962,8 +1063,11 @@ done = 1; } else { /* paste hole to the indirect item */ - /* If kmalloc failed, max_to_insert becomes zero and it means we - only have space for one block */ + /* + * If kmalloc failed, max_to_insert becomes + * zero and it means we only have space for + * one block + */ blocks_needed = max_to_insert ? max_to_insert : 1; } @@ -982,9 +1086,12 @@ goto failure; } if (!done) { - /* We need to mark new file size in case this function will be - interrupted/aborted later on. And we may do this only for - holes. */ + /* + * We need to mark new file size in case + * this function will be interrupted/aborted + * later on. And we may do this only for + * holes. + */ inode->i_size += inode->i_sb->s_blocksize * blocks_needed; } @@ -993,13 +1100,13 @@ if (done == 1) break; - /* this loop could log more blocks than we had originally asked - ** for. So, we have to allow the transaction to end if it is - ** too big or too full. Update the inode so things are - ** consistent if we crash before the function returns - ** - ** release the path so that anybody waiting on the path before - ** ending their transaction will be able to continue. + /* + * this loop could log more blocks than we had originally + * asked for. So, we have to allow the transaction to end + * if it is too big or too full. Update the inode so things + * are consistent if we crash before the function returns + * release the path so that anybody waiting on the path before + * ending their transaction will be able to continue. */ if (journal_transaction_should_end(th, th->t_blocks_allocated)) { retval = restart_transaction(th, inode, &path); @@ -1011,11 +1118,7 @@ * long time. reschedule if needed and also release the write * lock for others. */ - if (need_resched()) { - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - schedule(); - lock_depth = reiserfs_write_lock_once(inode->i_sb); - } + reiserfs_cond_resched(inode->i_sb); retval = search_for_position_by_key(inode->i_sb, &key, &path); if (retval == IO_ERROR) { @@ -1033,14 +1136,14 @@ goto failure; } bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); + ih = tp_item_head(&path); + item = tp_item_body(&path); pos_in_item = path.pos_in_item; } while (1); retval = 0; - failure: +failure: if (th && (!dangle || (retval && !th->t_trans_id))) { int err; if (th->t_trans_id) @@ -1050,7 +1153,7 @@ retval = err; } - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); reiserfs_check_path(&path); return retval; } @@ -1062,8 +1165,10 @@ return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); } -/* Compute real number of used bytes by file - * Following three functions can go away when we'll have enough space in stat item +/* + * Compute real number of used bytes by file + * Following three functions can go away when we'll have enough space in + * stat item */ static int real_space_diff(struct inode *inode, int sd_size) { @@ -1073,13 +1178,14 @@ if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) return sd_size; - /* End of file is also in full block with indirect reference, so round - ** up to the next block. - ** - ** there is just no way to know if the tail is actually packed - ** on the file, so we have to assume it isn't. When we pack the - ** tail, we add 4 bytes to pretend there really is an unformatted - ** node pointer + /* + * End of file is also in full block with indirect reference, so round + * up to the next block. + * + * there is just no way to know if the tail is actually packed + * on the file, so we have to assume it isn't. When we pack the + * tail, we add 4 bytes to pretend there really is an unformatted + * node pointer */ bytes = ((inode->i_size + @@ -1110,36 +1216,36 @@ bytes += (loff_t) 511; } - /* files from before the quota patch might i_blocks such that - ** bytes < real_space. Deal with that here to prevent it from - ** going negative. + /* + * files from before the quota patch might i_blocks such that + * bytes < real_space. Deal with that here to prevent it from + * going negative. */ if (bytes < real_space) return 0; return (bytes - real_space) >> 9; } -// -// BAD: new directories have stat data of new type and all other items -// of old type. Version stored in the inode says about body items, so -// in update_stat_data we can not rely on inode, but have to check -// item version directly -// +/* + * BAD: new directories have stat data of new type and all other items + * of old type. Version stored in the inode says about body items, so + * in update_stat_data we can not rely on inode, but have to check + * item version directly + */ -// called by read_locked_inode +/* called by read_locked_inode */ static void init_inode(struct inode *inode, struct treepath *path) { struct buffer_head *bh; struct item_head *ih; __u32 rdev; - //int version = ITEM_VERSION_1; bh = PATH_PLAST_BUFFER(path); - ih = PATH_PITEM_HEAD(path); + ih = tp_item_head(path); - copy_key(INODE_PKEY(inode), &(ih->ih_key)); + copy_key(INODE_PKEY(inode), &ih->ih_key); - INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); + INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list); REISERFS_I(inode)->i_flags = 0; REISERFS_I(inode)->i_prealloc_block = 0; REISERFS_I(inode)->i_prealloc_count = 0; @@ -1149,7 +1255,7 @@ if (stat_data_v1(ih)) { struct stat_data_v1 *sd = - (struct stat_data_v1 *)B_I_PITEM(bh, ih); + (struct stat_data_v1 *)ih_item_body(bh, ih); unsigned long blocks; set_inode_item_key_version(inode, KEY_FORMAT_3_5); @@ -1170,20 +1276,26 @@ inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); blocks = (inode->i_size + 511) >> 9; blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9); + + /* + * there was a bug in <=3.5.23 when i_blocks could take + * negative values. Starting from 3.5.17 this value could + * even be stored in stat data. For such files we set + * i_blocks based on file size. Just 2 notes: this can be + * wrong for sparse files. On-disk value will be only + * updated if file's inode will ever change + */ if (inode->i_blocks > blocks) { - // there was a bug in <=3.5.23 when i_blocks could take negative - // values. Starting from 3.5.17 this value could even be stored in - // stat data. For such files we set i_blocks based on file - // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be - // only updated if file's inode will ever change inode->i_blocks = blocks; } rdev = sd_v1_rdev(sd); REISERFS_I(inode)->i_first_direct_byte = sd_v1_first_direct_byte(sd); - /* an early bug in the quota code can give us an odd number for the - ** block count. This is incorrect, fix it here. + + /* + * an early bug in the quota code can give us an odd + * number for the block count. This is incorrect, fix it here. */ if (inode->i_blocks & 1) { inode->i_blocks++; @@ -1191,13 +1303,17 @@ inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, SD_V1_SIZE)); - /* nopack is initially zero for v1 objects. For v2 objects, - nopack is initialised from sd_attrs */ + /* + * nopack is initially zero for v1 objects. For v2 objects, + * nopack is initialised from sd_attrs + */ REISERFS_I(inode)->i_flags &= ~i_nopack_mask; } else { - // new stat data found, but object may have old items - // (directories and symlinks) - struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih); + /* + * new stat data found, but object may have old items + * (directories and symlinks) + */ + struct stat_data *sd = (struct stat_data *)ih_item_body(bh, ih); inode->i_mode = sd_v2_mode(sd); set_nlink(inode, sd_v2_nlink(sd)); @@ -1227,8 +1343,10 @@ inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks, SD_V2_SIZE)); - /* read persistent inode attributes from sd and initialise - generic inode flags from them */ + /* + * read persistent inode attributes from sd and initialise + * generic inode flags from them + */ REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd); sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode); } @@ -1251,7 +1369,7 @@ } } -// update new stat data with inode fields +/* update new stat data with inode fields */ static void inode2sd(void *sd, struct inode *inode, loff_t size) { struct stat_data *sd_v2 = (struct stat_data *)sd; @@ -1275,7 +1393,7 @@ set_sd_v2_attrs(sd_v2, flags); } -// used to copy inode's fields to old stat data +/* used to copy inode's fields to old stat data */ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size) { struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd; @@ -1294,14 +1412,15 @@ else set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE)); - // Sigh. i_first_direct_byte is back + /* Sigh. i_first_direct_byte is back */ set_sd_v1_first_direct_byte(sd_v1, REISERFS_I(inode)->i_first_direct_byte); } -/* NOTE, you must prepare the buffer head before sending it here, -** and then log it after the call -*/ +/* + * NOTE, you must prepare the buffer head before sending it here, + * and then log it after the call + */ static void update_stat_data(struct treepath *path, struct inode *inode, loff_t size) { @@ -1309,17 +1428,17 @@ struct item_head *ih; bh = PATH_PLAST_BUFFER(path); - ih = PATH_PITEM_HEAD(path); + ih = tp_item_head(path); if (!is_statdata_le_ih(ih)) reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h", INODE_PKEY(inode), ih); + /* path points to old stat data */ if (stat_data_v1(ih)) { - // path points to old stat data - inode2sd_v1(B_I_PITEM(bh, ih), inode, size); + inode2sd_v1(ih_item_body(bh, ih), inode, size); } else { - inode2sd(B_I_PITEM(bh, ih), inode, size); + inode2sd(ih_item_body(bh, ih), inode, size); } return; @@ -1337,7 +1456,8 @@ BUG_ON(!th->t_trans_id); - make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); //key type is unimportant + /* key type is unimportant */ + make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3); for (;;) { int pos; @@ -1365,45 +1485,48 @@ return; } - /* sigh, prepare_for_journal might schedule. When it schedules the - ** FS might change. We have to detect that, and loop back to the - ** search if the stat data item has moved + /* + * sigh, prepare_for_journal might schedule. When it + * schedules the FS might change. We have to detect that, + * and loop back to the search if the stat data item has moved */ bh = get_last_bh(&path); - ih = get_ih(&path); + ih = tp_item_head(&path); copy_item_head(&tmp_ih, ih); fs_gen = get_generation(inode->i_sb); reiserfs_prepare_for_journal(inode->i_sb, bh, 1); + + /* Stat_data item has been moved after scheduling. */ if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) { reiserfs_restore_prepared_buffer(inode->i_sb, bh); - continue; /* Stat_data item has been moved after scheduling. */ + continue; } break; } update_stat_data(&path, inode, size); - journal_mark_dirty(th, th->t_super, bh); + journal_mark_dirty(th, bh); pathrelse(&path); return; } -/* reiserfs_read_locked_inode is called to read the inode off disk, and it -** does a make_bad_inode when things go wrong. But, we need to make sure -** and clear the key in the private portion of the inode, otherwise a -** corresponding iput might try to delete whatever object the inode last -** represented. -*/ +/* + * reiserfs_read_locked_inode is called to read the inode off disk, and it + * does a make_bad_inode when things go wrong. But, we need to make sure + * and clear the key in the private portion of the inode, otherwise a + * corresponding iput might try to delete whatever object the inode last + * represented. + */ static void reiserfs_make_bad_inode(struct inode *inode) { memset(INODE_PKEY(inode), 0, KEY_SIZE); make_bad_inode(inode); } -// -// initially this function was derived from minix or ext2's analog and -// evolved as the prototype did -// - +/* + * initially this function was derived from minix or ext2's analog and + * evolved as the prototype did + */ int reiserfs_init_locked_inode(struct inode *inode, void *p) { struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p; @@ -1412,8 +1535,10 @@ return 0; } -/* looks for stat data in the tree, and fills up the fields of in-core - inode stat data fields */ +/* + * looks for stat data in the tree, and fills up the fields of in-core + * inode stat data fields + */ void reiserfs_read_locked_inode(struct inode *inode, struct reiserfs_iget_args *args) { @@ -1424,8 +1549,10 @@ dirino = args->dirid; - /* set version 1, version 2 could be used too, because stat data - key is the same in both versions */ + /* + * set version 1, version 2 could be used too, because stat data + * key is the same in both versions + */ key.version = KEY_FORMAT_3_5; key.on_disk_key.k_dir_id = dirino; key.on_disk_key.k_objectid = inode->i_ino; @@ -1441,8 +1568,9 @@ reiserfs_make_bad_inode(inode); return; } + + /* a stale NFS handle can trigger this without it being an error */ if (retval != ITEM_FOUND) { - /* a stale NFS handle can trigger this without it being an error */ pathrelse(&path_to_sd); reiserfs_make_bad_inode(inode); clear_nlink(inode); @@ -1451,20 +1579,25 @@ init_inode(inode, &path_to_sd); - /* It is possible that knfsd is trying to access inode of a file - that is being removed from the disk by some other thread. As we - update sd on unlink all that is required is to check for nlink - here. This bug was first found by Sizif when debugging - SquidNG/Butterfly, forgotten, and found again after Philippe - Gramoulle reproduced it. - - More logical fix would require changes in fs/inode.c:iput() to - remove inode from hash-table _after_ fs cleaned disk stuff up and - in iget() to return NULL if I_FREEING inode is found in - hash-table. */ - /* Currently there is one place where it's ok to meet inode with - nlink==0: processing of open-unlinked and half-truncated files - during mount (fs/reiserfs/super.c:finish_unfinished()). */ + /* + * It is possible that knfsd is trying to access inode of a file + * that is being removed from the disk by some other thread. As we + * update sd on unlink all that is required is to check for nlink + * here. This bug was first found by Sizif when debugging + * SquidNG/Butterfly, forgotten, and found again after Philippe + * Gramoulle reproduced it. + + * More logical fix would require changes in fs/inode.c:iput() to + * remove inode from hash-table _after_ fs cleaned disk stuff up and + * in iget() to return NULL if I_FREEING inode is found in + * hash-table. + */ + + /* + * Currently there is one place where it's ok to meet inode with + * nlink==0: processing of open-unlinked and half-truncated files + * during mount (fs/reiserfs/super.c:finish_unfinished()). + */ if ((inode->i_nlink == 0) && !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) { reiserfs_warning(inode->i_sb, "vs-13075", @@ -1474,7 +1607,8 @@ reiserfs_make_bad_inode(inode); } - reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ + /* init inode should be relsing */ + reiserfs_check_path(&path_to_sd); /* * Stat data v1 doesn't support ACLs. @@ -1483,7 +1617,7 @@ cache_no_acl(inode); } -/** +/* * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). * * @inode: inode from hash table to check @@ -1509,14 +1643,15 @@ { struct inode *inode; struct reiserfs_iget_args args; + int depth; args.objectid = key->on_disk_key.k_objectid; args.dirid = key->on_disk_key.k_dir_id; - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(s); inode = iget5_locked(s, key->on_disk_key.k_objectid, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); if (!inode) return ERR_PTR(-ENOMEM); @@ -1557,7 +1692,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { - /* fhtype happens to reflect the number of u32s encoded. + /* + * fhtype happens to reflect the number of u32s encoded. * due to a bug in earlier code, fhtype might indicate there * are more u32s then actually fitted. * so if fhtype seems to be more than len, reduce fhtype. @@ -1626,13 +1762,16 @@ return *lenp; } -/* looks for stat data, then copies fields to it, marks the buffer - containing stat data as dirty */ -/* reiserfs inodes are never really dirty, since the dirty inode call -** always logs them. This call allows the VFS inode marking routines -** to properly mark inodes for datasync and such, but only actually -** does something when called for a synchronous update. -*/ +/* + * looks for stat data, then copies fields to it, marks the buffer + * containing stat data as dirty + */ +/* + * reiserfs inodes are never really dirty, since the dirty inode call + * always logs them. This call allows the VFS inode marking routines + * to properly mark inodes for datasync and such, but only actually + * does something when called for a synchronous update. + */ int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct reiserfs_transaction_handle th; @@ -1640,24 +1779,28 @@ if (inode->i_sb->s_flags & MS_RDONLY) return -EROFS; - /* memory pressure can sometimes initiate write_inode calls with sync == 1, - ** these cases are just when the system needs ram, not when the - ** inode needs to reach disk for safety, and they can safely be - ** ignored because the altered inode has already been logged. + /* + * memory pressure can sometimes initiate write_inode calls with + * sync == 1, + * these cases are just when the system needs ram, not when the + * inode needs to reach disk for safety, and they can safely be + * ignored because the altered inode has already been logged. */ if (wbc->sync_mode == WB_SYNC_ALL && !(current->flags & PF_MEMALLOC)) { reiserfs_write_lock(inode->i_sb); if (!journal_begin(&th, inode->i_sb, jbegin_count)) { reiserfs_update_sd(&th, inode); - journal_end_sync(&th, inode->i_sb, jbegin_count); + journal_end_sync(&th); } reiserfs_write_unlock(inode->i_sb); } return 0; } -/* stat data of new object is inserted already, this inserts the item - containing "." and ".." entries */ +/* + * stat data of new object is inserted already, this inserts the item + * containing "." and ".." entries + */ static int reiserfs_new_directory(struct reiserfs_transaction_handle *th, struct inode *inode, struct item_head *ih, struct treepath *path, @@ -1675,9 +1818,11 @@ le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3 /*key length */ ); - /* compose item head for new item. Directories consist of items of - old type (ITEM_VERSION_1). Do not set key (second arg is 0), it - is done by reiserfs_new_inode */ + /* + * compose item head for new item. Directories consist of items of + * old type (ITEM_VERSION_1). Do not set key (second arg is 0), it + * is done by reiserfs_new_inode + */ if (old_format_only(sb)) { make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2); @@ -1715,9 +1860,12 @@ return reiserfs_insert_item(th, path, &key, ih, inode, body); } -/* stat data of object has been inserted, this inserts the item - containing the body of symlink */ -static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode, /* Inode of symlink */ +/* + * stat data of object has been inserted, this inserts the item + * containing the body of symlink + */ +static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, + struct inode *inode, struct item_head *ih, struct treepath *path, const char *symname, int item_len) @@ -1755,15 +1903,26 @@ return reiserfs_insert_item(th, path, &key, ih, inode, symname); } -/* inserts the stat data into the tree, and then calls - reiserfs_new_directory (to insert ".", ".." item if new object is - directory) or reiserfs_new_symlink (to insert symlink body if new - object is symlink) or nothing (if new object is regular file) - - NOTE! uid and gid must already be set in the inode. If we return - non-zero due to an error, we have to drop the quota previously allocated - for the fresh inode. This can only be done outside a transaction, so - if we return non-zero, we also end the transaction. */ +/* + * inserts the stat data into the tree, and then calls + * reiserfs_new_directory (to insert ".", ".." item if new object is + * directory) or reiserfs_new_symlink (to insert symlink body if new + * object is symlink) or nothing (if new object is regular file) + + * NOTE! uid and gid must already be set in the inode. If we return + * non-zero due to an error, we have to drop the quota previously allocated + * for the fresh inode. This can only be done outside a transaction, so + * if we return non-zero, we also end the transaction. + * + * @th: active transaction handle + * @dir: parent directory for new inode + * @mode: mode of new inode + * @symname: symlink contents if inode is symlink + * @isize: 0 for regular file, EMPTY_DIR_SIZE for dirs, strlen(symname) for + * symlinks + * @inode: inode to be filled + * @security: optional security context to associate with this inode + */ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, struct inode *dir, umode_t mode, const char *symname, /* 0 for regular, EMTRY_DIR_SIZE for dirs, @@ -1772,7 +1931,7 @@ struct inode *inode, struct reiserfs_security_handle *security) { - struct super_block *sb; + struct super_block *sb = dir->i_sb; struct reiserfs_iget_args args; INITIALIZE_PATH(path_to_key); struct cpu_key key; @@ -1780,12 +1939,13 @@ struct stat_data sd; int retval; int err; + int depth; BUG_ON(!th->t_trans_id); - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(sb); err = dquot_alloc_inode(inode); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(sb, depth); if (err) goto out_end_trans; if (!dir->i_nlink) { @@ -1793,8 +1953,6 @@ goto out_bad_inode; } - sb = dir->i_sb; - /* item head of new item */ ih.ih_key.k_dir_id = reiserfs_choose_packing(dir); ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th)); @@ -1809,23 +1967,24 @@ else make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); - memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); + memcpy(INODE_PKEY(inode), &ih.ih_key, KEY_SIZE); args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); err = insert_inode_locked4(inode, args.objectid, reiserfs_find_actor, &args); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); if (err) { err = -EINVAL; goto out_bad_inode; } if (old_format_only(sb)) - /* not a perfect generation count, as object ids can be reused, but - ** this is as good as reiserfs can do right now. - ** note that the private part of inode isn't filled in yet, we have - ** to use the directory. + /* + * not a perfect generation count, as object ids can be reused, + * but this is as good as reiserfs can do right now. + * note that the private part of inode isn't filled in yet, + * we have to use the directory. */ inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid); else @@ -1852,7 +2011,7 @@ REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 : U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ; - INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list)); + INIT_LIST_HEAD(&REISERFS_I(inode)->i_prealloc_list); REISERFS_I(inode)->i_flags = 0; REISERFS_I(inode)->i_prealloc_block = 0; REISERFS_I(inode)->i_prealloc_count = 0; @@ -1880,9 +2039,9 @@ goto out_bad_inode; } if (old_format_only(sb)) { + /* i_uid or i_gid is too big to be stored in stat data v3.5 */ if (i_uid_read(inode) & ~0xffff || i_gid_read(inode) & ~0xffff) { pathrelse(&path_to_key); - /* i_uid or i_gid is too big to be stored in stat data v3.5 */ err = -EINVAL; goto out_bad_inode; } @@ -1890,9 +2049,11 @@ } else { inode2sd(&sd, inode, inode->i_size); } - // store in in-core inode the key of stat data and version all - // object items will have (directory items will have old offset - // format, other new objects will consist of new items) + /* + * store in in-core inode the key of stat data and version all + * object items will have (directory items will have old offset + * format, other new objects will consist of new items) + */ if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) set_inode_item_key_version(inode, KEY_FORMAT_3_5); else @@ -1936,16 +2097,18 @@ if (retval) { err = retval; reiserfs_check_path(&path_to_key); - journal_end(th, th->t_super, th->t_blocks_allocated); + journal_end(th); goto out_inserted_sd; } if (reiserfs_posixacl(inode->i_sb)) { + reiserfs_write_unlock(inode->i_sb); retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); + reiserfs_write_lock(inode->i_sb); if (retval) { err = retval; reiserfs_check_path(&path_to_key); - journal_end(th, th->t_super, th->t_blocks_allocated); + journal_end(th); goto out_inserted_sd; } } else if (inode->i_sb->s_flags & MS_POSIXACL) { @@ -1956,12 +2119,13 @@ inode->i_flags |= S_PRIVATE; if (security->name) { + reiserfs_write_unlock(inode->i_sb); retval = reiserfs_security_write(th, inode, security); + reiserfs_write_lock(inode->i_sb); if (retval) { err = retval; reiserfs_check_path(&path_to_key); - retval = journal_end(th, th->t_super, - th->t_blocks_allocated); + retval = journal_end(th); if (retval) err = retval; goto out_inserted_sd; @@ -1973,27 +2137,28 @@ return 0; -/* it looks like you can easily compress these two goto targets into - * one. Keeping it like this doesn't actually hurt anything, and they - * are place holders for what the quota code actually needs. - */ - out_bad_inode: +out_bad_inode: /* Invalidate the object, nothing was inserted yet */ INODE_PKEY(inode)->k_objectid = 0; /* Quota change must be inside a transaction for journaling */ + depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_free_inode(inode); + reiserfs_write_lock_nested(inode->i_sb, depth); - out_end_trans: - journal_end(th, th->t_super, th->t_blocks_allocated); - reiserfs_write_unlock(inode->i_sb); - /* Drop can be outside and it needs more credits so it's better to have it outside */ +out_end_trans: + journal_end(th); + /* + * Drop can be outside and it needs more credits so it's better + * to have it outside + */ + depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_drop(inode); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); inode->i_flags |= S_NOQUOTA; make_bad_inode(inode); - out_inserted_sd: +out_inserted_sd: clear_nlink(inode); th->t_trans_id = 0; /* so the caller can't use this handle later */ unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ @@ -2002,25 +2167,26 @@ } /* -** finds the tail page in the page cache, -** reads the last block in. -** -** On success, page_result is set to a locked, pinned page, and bh_result -** is set to an up to date buffer for the last block in the file. returns 0. -** -** tail conversion is not done, so bh_result might not be valid for writing -** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before -** trying to write the block. -** -** on failure, nonzero is returned, page_result and bh_result are untouched. -*/ + * finds the tail page in the page cache, + * reads the last block in. + * + * On success, page_result is set to a locked, pinned page, and bh_result + * is set to an up to date buffer for the last block in the file. returns 0. + * + * tail conversion is not done, so bh_result might not be valid for writing + * check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before + * trying to write the block. + * + * on failure, nonzero is returned, page_result and bh_result are untouched. + */ static int grab_tail_page(struct inode *inode, struct page **page_result, struct buffer_head **bh_result) { - /* we want the page with the last byte in the file, - ** not the page that will hold the next byte for appending + /* + * we want the page with the last byte in the file, + * not the page that will hold the next byte for appending */ unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; unsigned long pos = 0; @@ -2032,10 +2198,11 @@ struct page *page; int error; - /* we know that we are only called with inode->i_size > 0. - ** we also know that a file tail can never be as big as a block - ** If i_size % blocksize == 0, our file is currently block aligned - ** and it won't need converting or zeroing after a truncate. + /* + * we know that we are only called with inode->i_size > 0. + * we also know that a file tail can never be as big as a block + * If i_size % blocksize == 0, our file is currently block aligned + * and it won't need converting or zeroing after a truncate. */ if ((offset & (blocksize - 1)) == 0) { return -ENOENT; @@ -2064,10 +2231,11 @@ } while (bh != head); if (!buffer_uptodate(bh)) { - /* note, this should never happen, prepare_write should - ** be taking care of this for us. If the buffer isn't up to date, - ** I've screwed up the code to find the buffer, or the code to - ** call prepare_write + /* + * note, this should never happen, prepare_write should be + * taking care of this for us. If the buffer isn't up to + * date, I've screwed up the code to find the buffer, or the + * code to call prepare_write */ reiserfs_error(inode->i_sb, "clm-6000", "error reading block %lu", bh->b_blocknr); @@ -2077,21 +2245,21 @@ *bh_result = bh; *page_result = page; - out: +out: return error; - unlock: +unlock: unlock_page(page); page_cache_release(page); return error; } /* -** vfs version of truncate file. Must NOT be called with -** a transaction already started. -** -** some code taken from block_truncate_page -*/ + * vfs version of truncate file. Must NOT be called with + * a transaction already started. + * + * some code taken from block_truncate_page + */ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) { struct reiserfs_transaction_handle th; @@ -2103,16 +2271,17 @@ int error; struct buffer_head *bh = NULL; int err2; - int lock_depth; - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); if (inode->i_size > 0) { error = grab_tail_page(inode, &page, &bh); if (error) { - // -ENOENT means we truncated past the end of the file, - // and get_block_create_0 could not find a block to read in, - // which is ok. + /* + * -ENOENT means we truncated past the end of the + * file, and get_block_create_0 could not find a + * block to read in, which is ok. + */ if (error != -ENOENT) reiserfs_error(inode->i_sb, "clm-6001", "grab_tail_page failed %d", @@ -2122,29 +2291,33 @@ } } - /* so, if page != NULL, we have a buffer head for the offset at - ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, - ** then we have an unformatted node. Otherwise, we have a direct item, - ** and no zeroing is required on disk. We zero after the truncate, - ** because the truncate might pack the item anyway - ** (it will unmap bh if it packs). - */ - /* it is enough to reserve space in transaction for 2 balancings: - one for "save" link adding and another for the first - cut_from_item. 1 is for update_sd */ + /* + * so, if page != NULL, we have a buffer head for the offset at + * the end of the file. if the bh is mapped, and bh->b_blocknr != 0, + * then we have an unformatted node. Otherwise, we have a direct item, + * and no zeroing is required on disk. We zero after the truncate, + * because the truncate might pack the item anyway + * (it will unmap bh if it packs). + * + * it is enough to reserve space in transaction for 2 balancings: + * one for "save" link adding and another for the first + * cut_from_item. 1 is for update_sd + */ error = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); if (error) goto out; reiserfs_update_inode_transaction(inode); if (update_timestamps) - /* we are doing real truncate: if the system crashes before the last - transaction of truncating gets committed - on reboot the file - either appears truncated properly or not truncated at all */ + /* + * we are doing real truncate: if the system crashes + * before the last transaction of truncating gets committed + * - on reboot the file either appears truncated properly + * or not truncated at all + */ add_save_link(&th, inode, 1); err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps); - error = - journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1); + error = journal_end(&th); if (error) goto out; @@ -2174,16 +2347,16 @@ page_cache_release(page); } - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return 0; - out: +out: if (page) { unlock_page(page); page_cache_release(page); } - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return error; } @@ -2209,7 +2382,10 @@ int copy_size; int trans_running = 0; - /* catch places below that try to log something without starting a trans */ + /* + * catch places below that try to log something without + * starting a trans + */ th.t_trans_id = 0; if (!buffer_uptodate(bh_result)) { @@ -2217,11 +2393,11 @@ } kmap(bh_result->b_page); - start_over: +start_over: reiserfs_write_lock(inode->i_sb); make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3); - research: +research: retval = search_for_position_by_key(inode->i_sb, &key, &path); if (retval != POSITION_FOUND) { use_get_block = 1; @@ -2229,8 +2405,8 @@ } bh = get_last_bh(&path); - ih = get_ih(&path); - item = get_item(&path); + ih = tp_item_head(&path); + item = tp_item_body(&path); pos_in_item = path.pos_in_item; /* we've found an unformatted node */ @@ -2278,10 +2454,10 @@ goto research; } - memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, + memcpy(ih_item_body(bh, ih) + pos_in_item, p + bytes_copied, copy_size); - journal_mark_dirty(&th, inode->i_sb, bh); + journal_mark_dirty(&th, bh); bytes_copied += copy_size; set_block_dev_mapped(bh_result, 0, inode); @@ -2301,10 +2477,10 @@ } retval = 0; - out: +out: pathrelse(&path); if (trans_running) { - int err = journal_end(&th, inode->i_sb, jbegin_count); + int err = journal_end(&th); if (err) retval = err; trans_running = 0; @@ -2328,7 +2504,8 @@ kunmap(bh_result->b_page); if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { - /* we've copied data from the page into the direct item, so the + /* + * we've copied data from the page into the direct item, so the * buffer in the page is now clean, mark it to reflect that. */ lock_buffer(bh_result); @@ -2367,7 +2544,8 @@ return 0; } - /* The page dirty bit is cleared before writepage is called, which + /* + * The page dirty bit is cleared before writepage is called, which * means we have to tell create_empty_buffers to make dirty buffers * The page really should be up to date at this point, so tossing * in the BH_Uptodate is just a sanity check. @@ -2378,8 +2556,9 @@ } head = page_buffers(page); - /* last page in the file, zero out any contents past the - ** last byte in the file + /* + * last page in the file, zero out any contents past the + * last byte in the file */ if (page->index >= end_index) { unsigned last_offset; @@ -2409,7 +2588,8 @@ (!buffer_mapped(bh) || (buffer_mapped(bh) && bh->b_blocknr == 0))) { - /* not mapped yet, or it points to a direct item, search + /* + * not mapped yet, or it points to a direct item, search * the btree for the mapping info, and log any direct * items found */ @@ -2447,10 +2627,11 @@ if (checked) { reiserfs_prepare_for_journal(s, bh, 1); - journal_mark_dirty(&th, s, bh); + journal_mark_dirty(&th, bh); continue; } - /* from this point on, we know the buffer is mapped to a + /* + * from this point on, we know the buffer is mapped to a * real block and not a direct item */ if (wbc->sync_mode != WB_SYNC_NONE) { @@ -2469,7 +2650,7 @@ } while ((bh = bh->b_this_page) != head); if (checked) { - error = journal_end(&th, s, bh_per_page + 1); + error = journal_end(&th); reiserfs_write_unlock(s); if (error) goto fail; @@ -2494,7 +2675,7 @@ } while (bh != head); error = 0; - done: +done: if (nr == 0) { /* * if this page only had a direct item, it is very possible for @@ -2516,8 +2697,9 @@ } return error; - fail: - /* catches various errors, we need to make sure any valid dirty blocks +fail: + /* + * catches various errors, we need to make sure any valid dirty blocks * get to the media. The page is currently locked and not marked for * writeback */ @@ -2530,8 +2712,8 @@ mark_buffer_async_write(bh); } else { /* - * clear any dirty bits that might have come from getting - * attached to a dirty page + * clear any dirty bits that might have come from + * getting attached to a dirty page */ clear_buffer_dirty(bh); } @@ -2584,7 +2766,7 @@ int old_ref = 0; inode = mapping->host; - *fsdata = 0; + *fsdata = NULL; if (flags & AOP_FLAG_CONT_EXPAND && (pos & (inode->i_sb->s_blocksize - 1)) == 0) { pos ++; @@ -2611,15 +2793,18 @@ ret = __block_write_begin(page, pos, len, reiserfs_get_block); if (ret && reiserfs_transaction_running(inode->i_sb)) { struct reiserfs_transaction_handle *th = current->journal_info; - /* this gets a little ugly. If reiserfs_get_block returned an - * error and left a transacstion running, we've got to close it, - * and we've got to free handle if it was a persistent transaction. + /* + * this gets a little ugly. If reiserfs_get_block returned an + * error and left a transacstion running, we've got to close + * it, and we've got to free handle if it was a persistent + * transaction. * * But, if we had nested into an existing transaction, we need * to just drop the ref count on the handle. * * If old_ref == 0, the transaction is from reiserfs_get_block, - * and it was a persistent trans. Otherwise, it was nested above. + * and it was a persistent trans. Otherwise, it was nested + * above. */ if (th->t_refcount > old_ref) { if (old_ref) @@ -2648,10 +2833,11 @@ struct inode *inode = page->mapping->host; int ret; int old_ref = 0; + int depth; - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); fix_tail_page_for_writing(page); if (reiserfs_transaction_running(inode->i_sb)) { @@ -2667,15 +2853,18 @@ ret = __block_write_begin(page, from, len, reiserfs_get_block); if (ret && reiserfs_transaction_running(inode->i_sb)) { struct reiserfs_transaction_handle *th = current->journal_info; - /* this gets a little ugly. If reiserfs_get_block returned an - * error and left a transacstion running, we've got to close it, - * and we've got to free handle if it was a persistent transaction. + /* + * this gets a little ugly. If reiserfs_get_block returned an + * error and left a transacstion running, we've got to close + * it, and we've got to free handle if it was a persistent + * transaction. * * But, if we had nested into an existing transaction, we need * to just drop the ref count on the handle. * * If old_ref == 0, the transaction is from reiserfs_get_block, - * and it was a persistent trans. Otherwise, it was nested above. + * and it was a persistent trans. Otherwise, it was nested + * above. */ if (th->t_refcount > old_ref) { if (old_ref) @@ -2708,7 +2897,6 @@ int update_sd = 0; struct reiserfs_transaction_handle *th; unsigned start; - int lock_depth = 0; bool locked = false; if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) @@ -2731,17 +2919,20 @@ reiserfs_commit_page(inode, page, start, start + copied); - /* generic_commit_write does this for us, but does not update the - ** transaction tracking stuff when the size changes. So, we have - ** to do the i_size updates here. + /* + * generic_commit_write does this for us, but does not update the + * transaction tracking stuff when the size changes. So, we have + * to do the i_size updates here. */ if (pos + copied > inode->i_size) { struct reiserfs_transaction_handle myth; - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); locked = true; - /* If the file have grown beyond the border where it - can have a tail, unmark it as needing a tail - packing */ + /* + * If the file have grown beyond the border where it + * can have a tail, unmark it as needing a tail + * packing + */ if ((have_large_tails(inode->i_sb) && inode->i_size > i_block_size(inode) * 4) || (have_small_tails(inode->i_sb) @@ -2756,19 +2947,19 @@ inode->i_size = pos + copied; /* * this will just nest into our transaction. It's important - * to use mark_inode_dirty so the inode gets pushed around on the - * dirty lists, and so that O_SYNC works as expected + * to use mark_inode_dirty so the inode gets pushed around on + * the dirty lists, and so that O_SYNC works as expected */ mark_inode_dirty(inode); reiserfs_update_sd(&myth, inode); update_sd = 1; - ret = journal_end(&myth, inode->i_sb, 1); + ret = journal_end(&myth); if (ret) goto journal_error; } if (th) { if (!locked) { - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); locked = true; } if (!update_sd) @@ -2778,9 +2969,9 @@ goto out; } - out: +out: if (locked) - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); unlock_page(page); page_cache_release(page); @@ -2789,8 +2980,8 @@ return ret == 0 ? copied : ret; - journal_error: - reiserfs_write_unlock_once(inode->i_sb, lock_depth); +journal_error: + reiserfs_write_unlock(inode->i_sb); locked = false; if (th) { if (!update_sd) @@ -2808,25 +2999,29 @@ int ret = 0; int update_sd = 0; struct reiserfs_transaction_handle *th = NULL; + int depth; - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); if (reiserfs_transaction_running(inode->i_sb)) { th = current->journal_info; } reiserfs_commit_page(inode, page, from, to); - /* generic_commit_write does this for us, but does not update the - ** transaction tracking stuff when the size changes. So, we have - ** to do the i_size updates here. + /* + * generic_commit_write does this for us, but does not update the + * transaction tracking stuff when the size changes. So, we have + * to do the i_size updates here. */ if (pos > inode->i_size) { struct reiserfs_transaction_handle myth; - /* If the file have grown beyond the border where it - can have a tail, unmark it as needing a tail - packing */ + /* + * If the file have grown beyond the border where it + * can have a tail, unmark it as needing a tail + * packing + */ if ((have_large_tails(inode->i_sb) && inode->i_size > i_block_size(inode) * 4) || (have_small_tails(inode->i_sb) @@ -2841,13 +3036,13 @@ inode->i_size = pos; /* * this will just nest into our transaction. It's important - * to use mark_inode_dirty so the inode gets pushed around on the - * dirty lists, and so that O_SYNC works as expected + * to use mark_inode_dirty so the inode gets pushed around + * on the dirty lists, and so that O_SYNC works as expected */ mark_inode_dirty(inode); reiserfs_update_sd(&myth, inode); update_sd = 1; - ret = journal_end(&myth, inode->i_sb, 1); + ret = journal_end(&myth); if (ret) goto journal_error; } @@ -2859,10 +3054,10 @@ goto out; } - out: +out: return ret; - journal_error: +journal_error: if (th) { if (!update_sd) reiserfs_update_sd(th, inode); @@ -2920,9 +3115,10 @@ } } -/* decide if this buffer needs to stay around for data logging or ordered -** write purposes -*/ +/* + * decide if this buffer needs to stay around for data logging or ordered + * write purposes + */ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) { int ret = 1; @@ -2933,7 +3129,8 @@ if (!buffer_mapped(bh)) { goto free_jh; } - /* the page is locked, and the only places that log a data buffer + /* + * the page is locked, and the only places that log a data buffer * also lock the page. */ if (reiserfs_file_data_log(inode)) { @@ -2948,7 +3145,8 @@ struct reiserfs_journal_list *jl; struct reiserfs_jh *jh = bh->b_private; - /* why is this safe? + /* + * why is this safe? * reiserfs_setattr updates i_size in the on disk * stat data before allowing vmtruncate to be called. * @@ -2965,7 +3163,7 @@ && jl != SB_JOURNAL(inode->i_sb)->j_current_jl) ret = 0; } - free_jh: +free_jh: if (ret && bh->b_private) { reiserfs_free_jh(bh); } @@ -2975,16 +3173,19 @@ } /* clm -- taken from fs/buffer.c:block_invalidate_page */ -static void reiserfs_invalidatepage(struct page *page, unsigned long offset) +static void reiserfs_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) { struct buffer_head *head, *bh, *next; struct inode *inode = page->mapping->host; unsigned int curr_off = 0; + unsigned int stop = offset + length; + int partial_page = (offset || length < PAGE_CACHE_SIZE); int ret = 1; BUG_ON(!PageLocked(page)); - if (offset == 0) + if (!partial_page) ClearPageChecked(page); if (!page_has_buffers(page)) @@ -2996,6 +3197,9 @@ unsigned int next_off = curr_off + bh->b_size; next = bh->b_this_page; + if (next_off > stop) + goto out; + /* * is this block fully invalidated? */ @@ -3014,11 +3218,11 @@ * The get_block cached value has been unconditionally invalidated, * so real IO is not possible anymore. */ - if (!offset && ret) { + if (!partial_page && ret) { ret = try_to_release_page(page, 0); /* maybe should BUG_ON(!ret); - neilb */ } - out: +out: return; } @@ -3070,26 +3274,28 @@ return ret; } -/* We thank Mingming Cao for helping us understand in great detail what - to do in this section of the code. */ -static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) +/* + * We thank Mingming Cao for helping us understand in great detail what + * to do in this section of the code. + */ +static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - reiserfs_get_blocks_direct_io); + ret = blockdev_direct_IO(iocb, inode, iter, offset, + reiserfs_get_blocks_direct_io); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { truncate_setsize(inode, isize); @@ -3102,9 +3308,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { - struct inode *inode = dentry->d_inode; + struct inode *inode = d_inode(dentry); unsigned int ia_valid; - int depth; int error; error = inode_change_ok(inode, attr); @@ -3114,15 +3319,20 @@ /* must be turned off for recursive notify_change calls */ ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); - if (is_quota_modification(inode, attr)) - dquot_initialize(inode); - depth = reiserfs_write_lock_once(inode->i_sb); + if (is_quota_modification(inode, attr)) { + error = dquot_initialize(inode); + if (error) + return error; + } + reiserfs_write_lock(inode->i_sb); if (attr->ia_valid & ATTR_SIZE) { - /* version 2 items will be caught by the s_maxbytes check - ** done for us in vmtruncate + /* + * version 2 items will be caught by the s_maxbytes check + * done for us in vmtruncate */ if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && attr->ia_size > MAX_NON_LFS) { + reiserfs_write_unlock(inode->i_sb); error = -EFBIG; goto out; } @@ -3139,13 +3349,15 @@ err = journal_begin(&th, inode->i_sb, 4); if (!err) { reiserfs_discard_prealloc(&th, inode); - err = journal_end(&th, inode->i_sb, 4); + err = journal_end(&th); } if (err) error = err; } - if (error) + if (error) { + reiserfs_write_unlock(inode->i_sb); goto out; + } /* * file size is changed, ctime and mtime are * to be updated @@ -3153,6 +3365,7 @@ attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); } } + reiserfs_write_unlock(inode->i_sb); if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) || ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) && @@ -3176,37 +3389,38 @@ if (error) return error; - /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ + /* + * (user+group)*(old+new) structure - we count quota + * info and , inode write (sb, inode) + */ + reiserfs_write_lock(inode->i_sb); error = journal_begin(&th, inode->i_sb, jbegin_count); + reiserfs_write_unlock(inode->i_sb); if (error) goto out; - reiserfs_write_unlock_once(inode->i_sb, depth); error = dquot_transfer(inode, attr); - depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); if (error) { - journal_end(&th, inode->i_sb, jbegin_count); + journal_end(&th); + reiserfs_write_unlock(inode->i_sb); goto out; } - /* Update corresponding info in inode so that everything is in - * one transaction */ + /* + * Update corresponding info in inode so that everything + * is in one transaction + */ if (attr->ia_valid & ATTR_UID) inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; mark_inode_dirty(inode); - error = journal_end(&th, inode->i_sb, jbegin_count); + error = journal_end(&th); + reiserfs_write_unlock(inode->i_sb); if (error) goto out; } - /* - * Relax the lock here, as it might truncate the - * inode pages and wait for inode pages locks. - * To release such page lock, the owner needs the - * reiserfs lock - */ - reiserfs_write_unlock_once(inode->i_sb, depth); if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { error = inode_newsize_ok(inode, attr->ia_size); @@ -3226,16 +3440,13 @@ setattr_copy(inode, attr); mark_inode_dirty(inode); } - depth = reiserfs_write_lock_once(inode->i_sb); if (!error && reiserfs_posixacl(inode->i_sb)) { if (attr->ia_valid & ATTR_MODE) error = reiserfs_acl_chmod(inode); } - out: - reiserfs_write_unlock_once(inode->i_sb, depth); - +out: return error; }