--- zzzz-none-000/linux-3.10.107/fs/f2fs/segment.h 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/f2fs/segment.h 2021-02-04 17:41:59.000000000 +0000 @@ -9,22 +9,20 @@ * published by the Free Software Foundation. */ #include +#include /* constant macro */ #define NULL_SEGNO ((unsigned int)(~0)) #define NULL_SECNO ((unsigned int)(~0)) +#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ + /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) -#define IS_DATASEG(t) \ - ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ - (t == CURSEG_WARM_DATA)) - -#define IS_NODESEG(t) \ - ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ - (t == CURSEG_WARM_NODE)) +#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) +#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) #define IS_CURSEG(sbi, seg) \ ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ @@ -48,18 +46,31 @@ (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ sbi->segs_per_sec)) \ -#define START_BLOCK(sbi, segno) \ - (SM_I(sbi)->seg0_blkaddr + \ +#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) +#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) + +#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) +#define MAIN_SECS(sbi) (sbi->total_sections) + +#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) +#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) + +#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) +#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ + sbi->log_blocks_per_seg)) + +#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) + #define NEXT_FREE_BLKADDR(sbi, curseg) \ (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) -#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) - -#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ - ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) + #define GET_SEGNO(sbi, blk_addr) \ (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ @@ -77,26 +88,21 @@ #define SIT_ENTRY_OFFSET(sit_i, segno) \ (segno % sit_i->sents_per_block) -#define SIT_BLOCK_OFFSET(sit_i, segno) \ +#define SIT_BLOCK_OFFSET(segno) \ (segno / SIT_ENTRY_PER_BLOCK) -#define START_SEGNO(sit_i, segno) \ - (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(segno) \ + (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) +#define SIT_BLK_CNT(sbi) \ + ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) -#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) -#define TOTAL_SECS(sbi) (sbi->total_sections) -#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ - (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) -#define SECTOR_TO_BLOCK(sbi, sectors) \ - (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) - -/* during checkpoint, bio_private is used to synchronize the last bio */ -struct bio_private { - struct f2fs_sb_info *sbi; - bool is_sync; - void *wait; -}; +#define SECTOR_FROM_BLOCK(blk_addr) \ + (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) +#define SECTOR_TO_BLOCK(sectors) \ + (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) +#define MAX_BIO_BLOCKS(sbi) \ + ((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES)) /* * indicate a block allocation direction: RIGHT and LEFT. @@ -131,10 +137,12 @@ /* * BG_GC means the background cleaning job. * FG_GC means the on-demand cleaning job. + * FORCE_FG_GC means on-demand cleaning job in background. */ enum { BG_GC = 0, - FG_GC + FG_GC, + FORCE_FG_GC, }; /* for a function parameter to select a victim segment */ @@ -142,6 +150,7 @@ int alloc_mode; /* LFS or SSR */ int gc_mode; /* GC_CB or GC_GREEDY */ unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int max_search; /* maximum # of segments to search */ unsigned int offset; /* last scanned bitmap offset */ unsigned int ofs_unit; /* bitmap search unit */ unsigned int min_cost; /* minimum cost */ @@ -157,6 +166,7 @@ */ unsigned short ckpt_valid_blocks; unsigned char *ckpt_valid_map; + unsigned char *discard_map; unsigned char type; /* segment type like CURSEG_XXX_TYPE */ unsigned long long mtime; /* modification time of the segment */ }; @@ -169,6 +179,20 @@ void (*allocate_segment)(struct f2fs_sb_info *, int, bool); }; +/* + * this value is set in page as a private data which indicate that + * the page is atomically written, and it is in inmem_pages list. + */ +#define ATOMIC_WRITTEN_PAGE 0x0000ffff + +#define IS_ATOMIC_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) + +struct inmem_pages { + struct list_head list; + struct page *page; +}; + struct sit_info { const struct segment_allocation *s_ops; @@ -178,6 +202,7 @@ char *sit_bitmap; /* SIT bitmap pointer */ unsigned int bitmap_size; /* SIT bitmap size */ + unsigned long *tmp_map; /* bitmap for temporal use */ unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ @@ -196,7 +221,7 @@ unsigned int start_segno; /* start segment number logically */ unsigned int free_segments; /* # of free segments */ unsigned int free_sections; /* # of free sections */ - rwlock_t segmap_lock; /* free segmap lock */ + spinlock_t segmap_lock; /* free segmap lock */ unsigned long *free_segmap; /* free segment bitmap */ unsigned long *free_secmap; /* free section bitmap */ }; @@ -239,6 +264,12 @@ unsigned int next_segno; /* preallocated segment */ }; +struct sit_entry_set { + struct list_head set_list; /* link with all sit sets */ + unsigned int start_segno; /* start segno of sits in set */ + unsigned int entry_cnt; /* the # of sit entries in set */ +}; + /* * inline functions */ @@ -301,9 +332,9 @@ unsigned int max, unsigned int segno) { unsigned int ret; - read_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); ret = find_next_bit(free_i->free_segmap, max, segno); - read_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); return ret; } @@ -314,16 +345,17 @@ unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); free_i->free_segments++; - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + next = find_next_bit(free_i->free_segmap, + start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_inuse(struct f2fs_sb_info *sbi, @@ -345,18 +377,18 @@ unsigned int start_segno = secno * sbi->segs_per_sec; unsigned int next; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { free_i->free_segments++; - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), - start_segno); + next = find_next_bit(free_i->free_segmap, + start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { if (test_and_clear_bit(secno, free_i->free_secmap)) free_i->free_sections++; } } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, @@ -364,13 +396,13 @@ { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int secno = segno / sbi->segs_per_sec; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) free_i->free_sections--; } - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, @@ -382,26 +414,12 @@ static inline block_t written_block_count(struct f2fs_sb_info *sbi) { - struct sit_info *sit_i = SIT_I(sbi); - block_t vblocks; - - mutex_lock(&sit_i->sentry_lock); - vblocks = sit_i->written_valid_blocks; - mutex_unlock(&sit_i->sentry_lock); - - return vblocks; + return SIT_I(sbi)->written_valid_blocks; } static inline unsigned int free_segments(struct f2fs_sb_info *sbi) { - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int free_segs; - - read_lock(&free_i->segmap_lock); - free_segs = free_i->free_segments; - read_unlock(&free_i->segmap_lock); - - return free_segs; + return FREE_I(sbi)->free_segments; } static inline int reserved_segments(struct f2fs_sb_info *sbi) @@ -411,14 +429,7 @@ static inline unsigned int free_sections(struct f2fs_sb_info *sbi) { - struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int free_secs; - - read_lock(&free_i->segmap_lock); - free_secs = free_i->free_sections; - read_unlock(&free_i->segmap_lock); - - return free_secs; + return FREE_I(sbi)->free_sections; } static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) @@ -453,7 +464,10 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi) { - return (free_sections(sbi) < overprovision_sections(sbi)); + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + return free_sections(sbi) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi) + 1); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) @@ -461,33 +475,74 @@ int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - if (sbi->por_doing) + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; - return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + - reserved_sections(sbi))); + return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + + reserved_sections(sbi)); +} + +static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) +{ + return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; } static inline int utilization(struct f2fs_sb_info *sbi) { - return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); + return div_u64((u64)valid_user_blocks(sbi) * 100, + sbi->user_block_count); } /* * Sometimes f2fs may be better to drop out-of-place update policy. - * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write - * data in the original place likewise other traditional file systems. - * But, currently set 100 in percentage, which means it is disabled. - * See below need_inplace_update(). + * And, users can control the policy through sysfs entries. + * There are five policies with triggering conditions as follows. + * F2FS_IPU_FORCE - all the time, + * F2FS_IPU_SSR - if SSR mode is activated, + * F2FS_IPU_UTIL - if FS utilization is over threashold, + * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over + * threashold, + * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash + * storages. IPU will be triggered only if the # of dirty + * pages over min_fsync_blocks. + * F2FS_IPUT_DISABLE - disable IPU. (=default option) */ -#define MIN_IPU_UTIL 100 +#define DEF_MIN_IPU_UTIL 70 +#define DEF_MIN_FSYNC_BLOCKS 8 + +enum { + F2FS_IPU_FORCE, + F2FS_IPU_SSR, + F2FS_IPU_UTIL, + F2FS_IPU_SSR_UTIL, + F2FS_IPU_FSYNC, +}; + static inline bool need_inplace_update(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - if (S_ISDIR(inode->i_mode)) + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int policy = SM_I(sbi)->ipu_policy; + + /* IPU can be done only for the user data */ + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) return false; - if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + + if (policy & (0x1 << F2FS_IPU_FORCE)) + return true; + if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) + return true; + if (policy & (0x1 << F2FS_IPU_UTIL) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) && + utilization(sbi) > SM_I(sbi)->min_ipu_util) + return true; + + /* this is only set during fdatasync */ + if (policy & (0x1 << F2FS_IPU_FSYNC) && + is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) return true; + return false; } @@ -513,53 +568,52 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) { - unsigned int end_segno = SM_I(sbi)->segment_count - 1; - BUG_ON(segno > end_segno); + f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); } -/* - * This function is used for only debugging. - * NOTE: In future, we have to remove this function. - */ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { - struct f2fs_sm_info *sm_info = SM_I(sbi); - block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; - block_t start_addr = sm_info->seg0_blkaddr; - block_t end_addr = start_addr + total_blks - 1; - BUG_ON(blk_addr < start_addr); - BUG_ON(blk_addr > end_addr); + f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi) + || blk_addr >= MAX_BLKADDR(sbi)); } /* - * Summary block is always treated as invalid block + * Summary block is always treated as an invalid block */ static inline void check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) { - struct f2fs_sm_info *sm_info = SM_I(sbi); - unsigned int end_segno = sm_info->segment_count - 1; +#ifdef CONFIG_F2FS_CHECK_FS + bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; int valid_blocks = 0; - int i; - - /* check segment usage */ - BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); - - /* check boundary of a given segment number */ - BUG_ON(segno > end_segno); + int cur_pos = 0, next_pos; /* check bitmap with valid block count */ - for (i = 0; i < sbi->blocks_per_seg; i++) - if (f2fs_test_bit(i, raw_sit->valid_map)) - valid_blocks++; + do { + if (is_valid) { + next_pos = find_next_zero_bit_le(&raw_sit->valid_map, + sbi->blocks_per_seg, + cur_pos); + valid_blocks += next_pos - cur_pos; + } else + next_pos = find_next_bit_le(&raw_sit->valid_map, + sbi->blocks_per_seg, + cur_pos); + cur_pos = next_pos; + is_valid = !is_valid; + } while (cur_pos < sbi->blocks_per_seg); BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); +#endif + /* check segment usage, and check boundary of a given segment number */ + f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg + || segno > TOTAL_SEGS(sbi) - 1); } static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + unsigned int offset = SIT_BLOCK_OFFSET(start); block_t blk_addr = sit_i->sit_base_addr + offset; check_seg_range(sbi, start); @@ -586,12 +640,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { - unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + unsigned int block_off = SIT_BLOCK_OFFSET(start); - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); + f2fs_change_bit(block_off, sit_i->sit_bitmap); } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) @@ -633,5 +684,51 @@ { struct block_device *bdev = sbi->sb->s_bdev; struct request_queue *q = bdev_get_queue(bdev); - return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); + return SECTOR_TO_BLOCK(queue_max_sectors(q)); +} + +/* + * It is very important to gather dirty pages and write at once, so that we can + * submit a big bio without interfering other data writes. + * By default, 512 pages for directory data, + * 512 pages (2MB) * 3 for three types of nodes, and + * max_bio_blocks for meta are set. + */ +static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) +{ + if (sbi->sb->s_bdi->wb.dirty_exceeded) + return 0; + + if (type == DATA) + return sbi->blocks_per_seg; + else if (type == NODE) + return 3 * sbi->blocks_per_seg; + else if (type == META) + return MAX_BIO_BLOCKS(sbi); + else + return 0; +} + +/* + * When writing pages, it'd better align nr_to_write for segment size. + */ +static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, + struct writeback_control *wbc) +{ + long nr_to_write, desired; + + if (wbc->sync_mode != WB_SYNC_NONE) + return 0; + + nr_to_write = wbc->nr_to_write; + + if (type == DATA) + desired = 4096; + else if (type == NODE) + desired = 3 * max_hw_blocks(sbi); + else + desired = MAX_BIO_BLOCKS(sbi); + + wbc->nr_to_write = desired; + return desired - nr_to_write; }