--- zzzz-none-000/linux-3.10.107/fs/xfs/xfs_iomap.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/xfs/xfs_iomap.c 2021-02-04 17:41:59.000000000 +0000 @@ -17,31 +17,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_bmap_util.h" #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" #include "xfs_trans_space.h" -#include "xfs_utils.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" @@ -57,7 +49,6 @@ xfs_extlen_t extsize, xfs_fileoff_t *last_fsb) { - xfs_fileoff_t new_last_fsb = 0; xfs_extlen_t align = 0; int eof, error; @@ -75,8 +66,8 @@ else if (mp->m_dalign) align = mp->m_dalign; - if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align)) - new_last_fsb = roundup_64(*last_fsb, align); + if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) + align = 0; } /* @@ -84,14 +75,14 @@ * (when file on a real-time subvolume or has di_extsize hint). */ if (extsize) { - if (new_last_fsb) - align = roundup_64(new_last_fsb, extsize); + if (align) + align = roundup_64(align, extsize); else align = extsize; - new_last_fsb = roundup_64(*last_fsb, align); } - if (new_last_fsb) { + if (align) { + xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align); error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); if (error) return error; @@ -109,13 +100,13 @@ xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x\n", + "blkcnt: %llx extent-state: %x", (unsigned long long)ip->i_ino, (unsigned long long)imap->br_startblock, (unsigned long long)imap->br_startoff, (unsigned long long)imap->br_blockcount, imap->br_state); - return EFSCORRUPTED; + return -EFSCORRUPTED; } int @@ -133,7 +124,6 @@ xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; int nimaps; - int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; @@ -141,20 +131,30 @@ uint qblocks, resblks, resrtextents; int committed; int error; - - error = xfs_qm_dqattach(ip, 0); - if (error) - return XFS_ERROR(error); + int lockmode; + int bmapi_flags = XFS_BMAPI_PREALLOC; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); + lockmode = XFS_ILOCK_SHARED; /* locked by caller */ + + ASSERT(xfs_isilocked(ip, lockmode)); offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > XFS_ISIZE(ip)) { + /* + * Assert that the in-core extent list is present since this can + * call xfs_iread_extents() and we only have the ilock shared. + * This should be safe because the lock was held around a bmapi + * call in the caller and we only need it to access the in-core + * list. + */ + ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & + XFS_IFEXTENTS); error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) - return XFS_ERROR(error); + goto out_unlock; } else { if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) @@ -184,22 +184,47 @@ } /* + * Drop the shared lock acquired by the caller, attach the dquot if + * necessary and move on to transaction setup. + */ + xfs_iunlock(ip, lockmode); + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + + /* * Allocate and setup the transaction */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); - error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), resrtextents, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + + /* + * For DAX, we do not allocate unwritten extents, but instead we zero + * the block before we commit the transaction. Ideally we'd like to do + * this outside the transaction context, but if we commit and then crash + * we may not have zeroed the blocks and this will be exposed on + * recovery of the allocation. Hence we must zero before commit. + * Further, if we are mapping unwritten extents here, we need to zero + * and convert them to written so that we don't need an unwritten extent + * callback for DAX. This also means that we need to be able to dip into + * the reserve block pool if there is no space left but we need to do + * unwritten extent conversion. + */ + if (IS_DAX(VFS_I(ip))) { + bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; + tp->t_flags |= XFS_TRANS_RESERVE; + } + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + resblks, resrtextents); /* * Check for running out of space, note: need lock to return */ if (error) { - xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + xfs_trans_cancel(tp); + return error; } - xfs_ilock(ip, XFS_ILOCK_EXCL); + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockmode); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) @@ -207,18 +232,15 @@ xfs_trans_ijoin(tp, ip, 0); - bmapi_flag = 0; - if (offset < XFS_ISIZE(ip) || extsz) - bmapi_flag |= XFS_BMAPI_PREALLOC; - /* * From this point onwards we overwrite the imap pointer that the * caller gave to us. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, - &firstfsb, 0, imap, &nimaps, &free_list); + error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + bmapi_flags, &firstfsb, resblks, imap, + &nimaps, &free_list); if (error) goto out_bmap_cancel; @@ -228,7 +250,8 @@ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto out_bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + + error = xfs_trans_commit(tp); if (error) goto out_unlock; @@ -236,7 +259,7 @@ * Copy any maps to caller's array and return any error. */ if (nimaps == 0) { - error = XFS_ERROR(ENOSPC); + error = -ENOSPC; goto out_unlock; } @@ -244,14 +267,14 @@ error = xfs_alert_fsblock_zero(ip, imap); out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return error; out_bmap_cancel: xfs_bmap_cancel(&free_list); xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); out_trans_cancel: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); goto out_unlock; } @@ -275,7 +298,6 @@ { xfs_fileoff_t start_fsb; xfs_filblks_t count_fsb; - xfs_fsblock_t firstblock; int n, error, imaps; int found_delalloc = 0; @@ -284,6 +306,15 @@ return 0; /* + * If the file is smaller than the minimum prealloc and we are using + * dynamic preallocation, don't do any preallocation at all as it is + * likely this is the only write to the file that is going to be done. + */ + if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && + XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) + return 0; + + /* * If there are any real blocks past eof, then don't * do any speculative allocation. */ @@ -291,7 +322,6 @@ count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); while (count_fsb > 0) { imaps = nimaps; - firstblock = NULLFSBLOCK; error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, 0); if (error) @@ -345,6 +375,10 @@ if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) return 0; + /* If the file is small, then use the minimum prealloc */ + if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) + return 0; + /* * As we write multiple pages, the offset will always align to the * start of a page and hence point to a hole at EOF. i.e. if the size is @@ -395,15 +429,17 @@ struct xfs_inode *ip, int type, xfs_fsblock_t *qblocks, - int *qshift) + int *qshift, + int64_t *qfreesp) { int64_t freesp; int shift = 0; struct xfs_dquot *dq = xfs_inode_dquot(ip, type); - /* over hi wmark, squash the prealloc completely */ - if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { + /* no dq, or over hi wmark, squash the prealloc completely */ + if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { *qblocks = 0; + *qfreesp = 0; return; } @@ -416,6 +452,9 @@ shift += 2; } + if (freesp < *qfreesp) + *qfreesp = freesp; + /* only overwrite the throttle values if we are more aggressive */ if ((freesp >> shift) < (*qblocks >> *qshift)) { *qblocks = freesp; @@ -459,8 +498,7 @@ alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), alloc_blocks); - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); - freesp = mp->m_sb.sb_fdblocks; + freesp = percpu_counter_read_positive(&mp->m_fdblocks); if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { shift = 2; if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) @@ -474,15 +512,18 @@ } /* - * Check each quota to cap the prealloc size and provide a shift - * value to throttle with. + * Check each quota to cap the prealloc size, provide a shift value to + * throttle with and adjust amount of available space. */ if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift, + &freesp); if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) - xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); + xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift, + &freesp); /* * The final prealloc size is set to the minimum of free space available @@ -550,7 +591,7 @@ */ error = xfs_qm_dqattach_locked(ip, 0); if (error) - return XFS_ERROR(error); + return error; extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -594,11 +635,11 @@ imap, &nimaps, XFS_BMAPI_ENTIRE); switch (error) { case 0: - case ENOSPC: - case EDQUOT: + case -ENOSPC: + case -EDQUOT: break; default: - return XFS_ERROR(error); + return error; } /* @@ -612,7 +653,7 @@ error = 0; goto retry; } - return XFS_ERROR(error ? error : ENOSPC); + return error ? error : -ENOSPC; } if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) @@ -643,7 +684,6 @@ xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, - size_t count, xfs_bmbt_irec_t *imap) { xfs_mount_t *mp = ip->i_mount; @@ -662,13 +702,13 @@ */ error = xfs_qm_dqattach(ip, 0); if (error) - return XFS_ERROR(error); + return error; offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = imap->br_blockcount; map_start_fsb = imap->br_startoff; - XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); + XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* @@ -685,13 +725,11 @@ tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); tp->t_flags |= XFS_TRANS_RESERVE; nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); - error = xfs_trans_reserve(tp, nres, - XFS_WRITE_LOG_RES(mp), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + nres, 0); if (error) { - xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + xfs_trans_cancel(tp); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); @@ -731,7 +769,7 @@ */ nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); - error = xfs_bmap_last_offset(NULL, ip, &last_block, + error = xfs_bmap_last_offset(ip, &last_block, XFS_DATA_FORK); if (error) goto trans_cancel; @@ -740,7 +778,7 @@ if ((map_start_fsb + count_fsb) > last_block) { count_fsb = last_block - map_start_fsb; if (count_fsb == 0) { - error = EAGAIN; + error = -EAGAIN; goto trans_cancel; } } @@ -750,10 +788,9 @@ * pointer that the caller gave to us. */ error = xfs_bmapi_write(tp, ip, map_start_fsb, - count_fsb, - XFS_BMAPI_STACK_SWITCH, - &first_block, 1, - imap, &nimaps, &free_list); + count_fsb, 0, &first_block, + nres, imap, &nimaps, + &free_list); if (error) goto trans_cancel; @@ -761,7 +798,7 @@ if (error) goto trans_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); if (error) goto error0; @@ -778,7 +815,7 @@ if ((offset_fsb >= imap->br_startoff) && (offset_fsb < (imap->br_startoff + imap->br_blockcount))) { - XFS_STATS_INC(xs_xstrat_quick); + XFS_STATS_INC(mp, xs_xstrat_quick); return 0; } @@ -792,17 +829,17 @@ trans_cancel: xfs_bmap_cancel(&free_list); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + xfs_trans_cancel(tp); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); - return XFS_ERROR(error); + return error; } int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, - size_t count) + xfs_off_t count) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; @@ -851,13 +888,11 @@ sb_start_intwrite(mp->m_super); tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; - error = xfs_trans_reserve(tp, resblks, - XFS_WRITE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, + resblks, 0); if (error) { - xfs_trans_cancel(tp, 0); - return XFS_ERROR(error); + xfs_trans_cancel(tp); + return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -869,8 +904,8 @@ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, - XFS_BMAPI_CONVERT, &firstfsb, - 1, &imap, &nimaps, &free_list); + XFS_BMAPI_CONVERT, &firstfsb, resblks, + &imap, &nimaps, &free_list); if (error) goto error_on_bmapi_transaction; @@ -893,10 +928,10 @@ if (error) goto error_on_bmapi_transaction; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) - return XFS_ERROR(error); + return error; if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) return xfs_alert_fsblock_zero(ip, &imap); @@ -917,7 +952,7 @@ error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); - xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); + xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); - return XFS_ERROR(error); + return error; }