--- zzzz-none-000/linux-3.10.107/fs/f2fs/segment.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/f2fs/segment.c 2021-02-04 17:41:59.000000000 +0000 @@ -13,13 +13,270 @@ #include #include #include -#include +#include +#include +#include #include "f2fs.h" #include "segment.h" #include "node.h" +#include "trace.h" #include +#define __reverse_ffz(x) __reverse_ffs(~(x)) + +static struct kmem_cache *discard_entry_slab; +static struct kmem_cache *sit_entry_set_slab; +static struct kmem_cache *inmem_entry_slab; + +static unsigned long __reverse_ulong(unsigned char *str) +{ + unsigned long tmp = 0; + int shift = 24, idx = 0; + +#if BITS_PER_LONG == 64 + shift = 56; +#endif + while (shift >= 0) { + tmp |= (unsigned long)str[idx++] << shift; + shift -= BITS_PER_BYTE; + } + return tmp; +} + +/* + * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since + * MSB and LSB are reversed in a byte by f2fs_set_bit. + */ +static inline unsigned long __reverse_ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff00000000UL) == 0) + num += 32; + else + word >>= 32; +#endif + if ((word & 0xffff0000) == 0) + num += 16; + else + word >>= 16; + + if ((word & 0xff00) == 0) + num += 8; + else + word >>= 8; + + if ((word & 0xf0) == 0) + num += 4; + else + word >>= 4; + + if ((word & 0xc) == 0) + num += 2; + else + word >>= 2; + + if ((word & 0x2) == 0) + num += 1; + return num; +} + +/* + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because + * f2fs_set_bit makes MSB and LSB reversed in a byte. + * Example: + * MSB <--> LSB + * f2fs_set_bit(0, bitmap) => 1000 0000 + * f2fs_set_bit(7, bitmap) => 0000 0001 + */ +static unsigned long __find_rev_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + const unsigned long *p = addr + BIT_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + + if (offset >= size) + return size; + + size -= result; + offset %= BITS_PER_LONG; + if (!offset) + goto aligned; + + tmp = __reverse_ulong((unsigned char *)p); + tmp &= ~0UL >> offset; + + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + p++; +aligned: + while (size & ~(BITS_PER_LONG-1)) { + tmp = __reverse_ulong((unsigned char *)p); + if (tmp) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + p++; + } + if (!size) + return result; + + tmp = __reverse_ulong((unsigned char *)p); +found_first: + tmp &= (~0UL << (BITS_PER_LONG - size)); + if (!tmp) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __reverse_ffs(tmp); +} + +static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) +{ + const unsigned long *p = addr + BIT_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + + if (offset >= size) + return size; + + size -= result; + offset %= BITS_PER_LONG; + if (!offset) + goto aligned; + + tmp = __reverse_ulong((unsigned char *)p); + tmp |= ~((~0UL << offset) >> offset); + + if (size < BITS_PER_LONG) + goto found_first; + if (tmp != ~0UL) + goto found_middle; + + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + p++; +aligned: + while (size & ~(BITS_PER_LONG - 1)) { + tmp = __reverse_ulong((unsigned char *)p); + if (tmp != ~0UL) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + p++; + } + if (!size) + return result; + + tmp = __reverse_ulong((unsigned char *)p); +found_first: + tmp |= ~(~0UL << (BITS_PER_LONG - size)); + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + __reverse_ffz(tmp); +} + +void register_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *new; + + f2fs_trace_pid(page); + + set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); + SetPagePrivate(page); + + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); + + /* add atomic page indices to the list */ + new->page = page; + INIT_LIST_HEAD(&new->list); + + /* increase reference count with clean state */ + mutex_lock(&fi->inmem_lock); + get_page(page); + list_add_tail(&new->list, &fi->inmem_pages); + inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + mutex_unlock(&fi->inmem_lock); + + trace_f2fs_register_inmem_page(page, INMEM); +} + +int commit_inmem_pages(struct inode *inode, bool abort) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *cur, *tmp; + bool submit_bio = false; + struct f2fs_io_info fio = { + .sbi = sbi, + .type = DATA, + .rw = WRITE_SYNC | REQ_PRIO, + .encrypted_page = NULL, + }; + int err = 0; + + /* + * The abort is true only when f2fs_evict_inode is called. + * Basically, the f2fs_evict_inode doesn't produce any data writes, so + * that we don't need to call f2fs_balance_fs. + * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this + * inode becomes free by iget_locked in f2fs_iget. + */ + if (!abort) { + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); + } + + mutex_lock(&fi->inmem_lock); + list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { + lock_page(cur->page); + if (!abort) { + if (cur->page->mapping == inode->i_mapping) { + set_page_dirty(cur->page); + f2fs_wait_on_page_writeback(cur->page, DATA); + if (clear_page_dirty_for_io(cur->page)) + inode_dec_dirty_pages(inode); + trace_f2fs_commit_inmem_page(cur->page, INMEM); + fio.page = cur->page; + err = do_write_data_page(&fio); + if (err) { + unlock_page(cur->page); + break; + } + clear_cold_data(cur->page); + submit_bio = true; + } + } else { + trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); + } + set_page_private(cur->page, 0); + ClearPagePrivate(cur->page); + f2fs_put_page(cur->page, 1); + + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + } + mutex_unlock(&fi->inmem_lock); + + if (!abort) { + f2fs_unlock_op(sbi); + if (submit_bio) + f2fs_submit_merged_bio(sbi, DATA, WRITE); + } + return err; +} + /* * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. @@ -32,10 +289,134 @@ */ if (has_not_enough_free_secs(sbi, 0)) { mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi); + f2fs_gc(sbi, false); } } +void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) +{ + /* try to shrink extent cache when there is no enough memory */ + if (!available_free_memory(sbi, EXTENT_CACHE)) + f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); + + /* check the # of cached NAT entries */ + if (!available_free_memory(sbi, NAT_ENTRIES)) + try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); + + if (!available_free_memory(sbi, FREE_NIDS)) + try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); + + /* checkpoint is the only way to shrink partial cached entries */ + if (!available_free_memory(sbi, NAT_ENTRIES) || + excess_prefree_segs(sbi) || + !available_free_memory(sbi, INO_ENTRIES) || + jiffies > sbi->cp_expires) + f2fs_sync_fs(sbi->sb, true); +} + +static int issue_flush_thread(void *data) +{ + struct f2fs_sb_info *sbi = data; + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + wait_queue_head_t *q = &fcc->flush_wait_queue; +repeat: + if (kthread_should_stop()) + return 0; + + if (!llist_empty(&fcc->issue_list)) { + struct bio *bio; + struct flush_cmd *cmd, *next; + int ret; + + bio = f2fs_bio_alloc(0); + + fcc->dispatch_list = llist_del_all(&fcc->issue_list); + fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); + + bio->bi_bdev = sbi->sb->s_bdev; + ret = submit_bio_wait(WRITE_FLUSH, bio); + + llist_for_each_entry_safe(cmd, next, + fcc->dispatch_list, llnode) { + cmd->ret = ret; + complete(&cmd->wait); + } + bio_put(bio); + fcc->dispatch_list = NULL; + } + + wait_event_interruptible(*q, + kthread_should_stop() || !llist_empty(&fcc->issue_list)); + goto repeat; +} + +int f2fs_issue_flush(struct f2fs_sb_info *sbi) +{ + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd cmd; + + trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE)); + + if (test_opt(sbi, NOBARRIER)) + return 0; + + if (!test_opt(sbi, FLUSH_MERGE)) { + struct bio *bio = f2fs_bio_alloc(0); + int ret; + + bio->bi_bdev = sbi->sb->s_bdev; + ret = submit_bio_wait(WRITE_FLUSH, bio); + bio_put(bio); + return ret; + } + + init_completion(&cmd.wait); + + llist_add(&cmd.llnode, &fcc->issue_list); + + if (!fcc->dispatch_list) + wake_up(&fcc->flush_wait_queue); + + wait_for_completion(&cmd.wait); + + return cmd.ret; +} + +int create_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + struct flush_cmd_control *fcc; + int err = 0; + + fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + if (!fcc) + return -ENOMEM; + init_waitqueue_head(&fcc->flush_wait_queue); + init_llist_head(&fcc->issue_list); + SM_I(sbi)->cmd_control_info = fcc; + fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, + "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(fcc->f2fs_issue_flush)) { + err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); + SM_I(sbi)->cmd_control_info = NULL; + return err; + } + + return err; +} + +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + + if (fcc && fcc->f2fs_issue_flush) + kthread_stop(fcc->f2fs_issue_flush); + kfree(fcc); + SM_I(sbi)->cmd_control_info = NULL; +} + static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -50,20 +431,14 @@ if (dirty_type == DIRTY) { struct seg_entry *sentry = get_seg_entry(sbi, segno); - enum dirty_type t = DIRTY_HOT_DATA; + enum dirty_type t = sentry->type; - dirty_type = sentry->type; - - if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) - dirty_i->nr_dirty[dirty_type]++; - - /* Only one bitmap should be set */ - for (; t <= DIRTY_COLD_NODE; t++) { - if (t == dirty_type) - continue; - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]--; + if (unlikely(t >= DIRTY)) { + f2fs_bug_on(sbi, 1); + return; } + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]++; } } @@ -76,12 +451,11 @@ dirty_i->nr_dirty[dirty_type]--; if (dirty_type == DIRTY) { - enum dirty_type t = DIRTY_HOT_DATA; + struct seg_entry *sentry = get_seg_entry(sbi, segno); + enum dirty_type t = sentry->type; - /* clear all the bitmaps */ - for (; t <= DIRTY_COLD_NODE; t++) - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]--; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) clear_bit(GET_SECNO(sbi, segno), @@ -94,7 +468,7 @@ * Adding dirty entry into seglist is not critical operation. * If a given segment is one of current working segments, it won't be added. */ -void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned short valid_blocks; @@ -117,7 +491,122 @@ } mutex_unlock(&dirty_i->seglist_lock); - return; +} + +static int f2fs_issue_discard(struct f2fs_sb_info *sbi, + block_t blkstart, block_t blklen) +{ + sector_t start = SECTOR_FROM_BLOCK(blkstart); + sector_t len = SECTOR_FROM_BLOCK(blklen); + struct seg_entry *se; + unsigned int offset; + block_t i; + + for (i = blkstart; i < blkstart + blklen; i++) { + se = get_seg_entry(sbi, GET_SEGNO(sbi, i)); + offset = GET_BLKOFF_FROM_SEG0(sbi, i); + + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + } + trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); +} + +bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + int err = -ENOTSUPP; + + if (test_opt(sbi, DISCARD)) { + struct seg_entry *se = get_seg_entry(sbi, + GET_SEGNO(sbi, blkaddr)); + unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + if (f2fs_test_bit(offset, se->discard_map)) + return false; + + err = f2fs_issue_discard(sbi, blkaddr, 1); + } + + if (err) { + update_meta_page(sbi, NULL, blkaddr); + return true; + } + return false; +} + +static void __add_discard_entry(struct f2fs_sb_info *sbi, + struct cp_control *cpc, struct seg_entry *se, + unsigned int start, unsigned int end) +{ + struct list_head *head = &SM_I(sbi)->discard_list; + struct discard_entry *new, *last; + + if (!list_empty(head)) { + last = list_last_entry(head, struct discard_entry, list); + if (START_BLOCK(sbi, cpc->trim_start) + start == + last->blkaddr + last->len) { + last->len += end - start; + goto done; + } + } + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; + new->len = end - start; + list_add_tail(&new->list, head); +done: + SM_I(sbi)->nr_discards += end - start; +} + +static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ + int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); + int max_blocks = sbi->blocks_per_seg; + struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); + unsigned long *cur_map = (unsigned long *)se->cur_valid_map; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *discard_map = (unsigned long *)se->discard_map; + unsigned long *dmap = SIT_I(sbi)->tmp_map; + unsigned int start = 0, end = -1; + bool force = (cpc->reason == CP_DISCARD); + int i; + + if (se->valid_blocks == max_blocks) + return; + + if (!force) { + if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards) + return; + } + + /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ + for (i = 0; i < entries; i++) + dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] : + (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + + while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { + start = __find_rev_next_bit(dmap, max_blocks, end + 1); + if (start >= max_blocks) + break; + + end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + __add_discard_entry(sbi, cpc, se, start, end); + } +} + +void release_discard_addrs(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &(SM_I(sbi)->discard_list); + struct discard_entry *entry, *this; + + /* drop caches */ + list_for_each_entry_safe(entry, this, head, list) { + list_del(&entry->list); + kmem_cache_free(discard_entry_slab, entry); + } } /* @@ -126,55 +615,68 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; - unsigned int total_segs = TOTAL_SEGS(sbi); + unsigned int segno; mutex_lock(&dirty_i->seglist_lock); - while (1) { - segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); - if (segno >= total_segs) - break; + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi)) __set_test_and_free(sbi, segno); - offset = segno + 1; - } mutex_unlock(&dirty_i->seglist_lock); } -void clear_prefree_segments(struct f2fs_sb_info *sbi) +void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { + struct list_head *head = &(SM_I(sbi)->discard_list); + struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; - unsigned int total_segs = TOTAL_SEGS(sbi); + unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; + unsigned int start = 0, end = -1; mutex_lock(&dirty_i->seglist_lock); + while (1) { - segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); - if (segno >= total_segs) + int i; + start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1); + if (start >= MAIN_SEGS(sbi)) break; + end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi), + start + 1); - offset = segno + 1; - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) - dirty_i->nr_dirty[PRE]--; + for (i = start; i < end; i++) + clear_bit(i, prefree_map); + + dirty_i->nr_dirty[PRE] -= end - start; - /* Let's use trim */ - if (test_opt(sbi, DISCARD)) - blkdev_issue_discard(sbi->sb->s_bdev, - START_BLOCK(sbi, segno) << - sbi->log_sectors_per_block, - 1 << (sbi->log_sectors_per_block + - sbi->log_blocks_per_seg), - GFP_NOFS, 0); + if (!test_opt(sbi, DISCARD)) + continue; + + f2fs_issue_discard(sbi, START_BLOCK(sbi, start), + (end - start) << sbi->log_blocks_per_seg); } mutex_unlock(&dirty_i->seglist_lock); + + /* send small discards */ + list_for_each_entry_safe(entry, this, head, list) { + if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + goto skip; + f2fs_issue_discard(sbi, entry->blkaddr, entry->len); + cpc->trimmed += entry->len; +skip: + list_del(&entry->list); + SM_I(sbi)->nr_discards -= entry->len; + kmem_cache_free(discard_entry_slab, entry); + } } -static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) +static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) + + if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) { sit_i->dirty_sentries++; + return false; + } + + return true; } static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, @@ -196,9 +698,9 @@ se = get_seg_entry(sbi, segno); new_vblocks = se->valid_blocks + del; - offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) || + f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) || (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; @@ -207,11 +709,15 @@ /* Update valid block bitmap */ if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) - BUG(); + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) + f2fs_bug_on(sbi, 1); + if (!f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) - BUG(); + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) + f2fs_bug_on(sbi, 1); + if (f2fs_test_and_clear_bit(offset, se->discard_map)) + sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks += del; @@ -225,12 +731,14 @@ get_sec_entry(sbi, segno)->valid_blocks += del; } -static void refresh_sit_entry(struct f2fs_sb_info *sbi, - block_t old_blkaddr, block_t new_blkaddr) +void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) { - update_sit_entry(sbi, new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - update_sit_entry(sbi, old_blkaddr, -1); + update_sit_entry(sbi, new, 1); + if (GET_SEGNO(sbi, old) != NULL_SEGNO) + update_sit_entry(sbi, old, -1); + + locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); + locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); } void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) @@ -238,7 +746,7 @@ unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); - BUG_ON(addr == NULL_ADDR); + f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR) return; @@ -253,42 +761,68 @@ mutex_unlock(&sit_i->sentry_lock); } +bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int segno, offset; + struct seg_entry *se; + bool is_cp = false; + + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + return true; + + mutex_lock(&sit_i->sentry_lock); + + segno = GET_SEGNO(sbi, blkaddr); + se = get_seg_entry(sbi, segno); + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + if (f2fs_test_bit(offset, se->ckpt_valid_map)) + is_cp = true; + + mutex_unlock(&sit_i->sentry_lock); + + return is_cp; +} + /* * This function should be resided under the curseg_mutex lock */ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, - struct f2fs_summary *sum, unsigned short offset) + struct f2fs_summary *sum) { struct curseg_info *curseg = CURSEG_I(sbi, type); void *addr = curseg->sum_blk; - addr += offset * sizeof(struct f2fs_summary); + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); - return; } /* * Calculate the number of current summary pages for writing */ -int npages_for_summary_flush(struct f2fs_sb_info *sbi) +int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) { - int total_size_bytes = 0; int valid_sum_count = 0; - int i, sum_space; + int i, sum_in_page; for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { if (sbi->ckpt->alloc_type[i] == SSR) valid_sum_count += sbi->blocks_per_seg; - else - valid_sum_count += curseg_blkoff(sbi, i); + else { + if (for_ra) + valid_sum_count += le16_to_cpu( + F2FS_CKPT(sbi)->cur_data_blkoff[i]); + else + valid_sum_count += curseg_blkoff(sbi, i); + } } - total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) - + sizeof(struct nat_journal) + 2 - + sizeof(struct sit_journal) + 2; - sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; - if (total_size_bytes < sum_space) + sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - + SUM_FOOTER_SIZE) / SUMMARY_SIZE; + if (valid_sum_count <= sum_in_page) return 1; - else if (total_size_bytes < 2 * sum_space) + else if ((valid_sum_count - sum_in_page) <= + (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) return 2; return 3; } @@ -301,74 +835,33 @@ return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno)); } -static void write_sum_page(struct f2fs_sb_info *sbi, - struct f2fs_summary_block *sum_blk, block_t blk_addr) +void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct page *page = grab_meta_page(sbi, blk_addr); - void *kaddr = page_address(page); - memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE); + void *dst = page_address(page); + + if (src) + memcpy(dst, src, PAGE_CACHE_SIZE); + else + memset(dst, 0, PAGE_CACHE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } -static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) +static void write_sum_page(struct f2fs_sb_info *sbi, + struct f2fs_summary_block *sum_blk, block_t blk_addr) { - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; - unsigned int segno; - unsigned int ofs = 0; - - /* - * If there is not enough reserved sections, - * we should not reuse prefree segments. - */ - if (has_not_enough_free_secs(sbi, 0)) - return NULL_SEGNO; - - /* - * NODE page should not reuse prefree segment, - * since those information is used for SPOR. - */ - if (IS_NODESEG(type)) - return NULL_SEGNO; -next: - segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); - ofs += sbi->segs_per_sec; - - if (segno < TOTAL_SEGS(sbi)) { - int i; - - /* skip intermediate segments in a section */ - if (segno % sbi->segs_per_sec) - goto next; - - /* skip if the section is currently used */ - if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) - goto next; - - /* skip if whole section is not prefree */ - for (i = 1; i < sbi->segs_per_sec; i++) - if (!test_bit(segno + i, prefree_segmap)) - goto next; - - /* skip if whole section was not free at the last checkpoint */ - for (i = 0; i < sbi->segs_per_sec; i++) - if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) - goto next; - - return segno; - } - return NULL_SEGNO; + update_meta_page(sbi, (void *)sum_blk, blk_addr); } static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int segno = curseg->segno; + unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) - return !test_bit(segno + 1, free_i->free_segmap); + if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); return 0; } @@ -381,7 +874,7 @@ { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; - unsigned int total_zones = TOTAL_SECS(sbi) / sbi->secs_per_zone; + unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = *newseg / sbi->segs_per_sec; unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); unsigned int left_start = hint; @@ -389,22 +882,22 @@ int go_left = 0; int i; - write_lock(&free_i->segmap_lock); + spin_lock(&free_i->segmap_lock); if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, - TOTAL_SEGS(sbi), *newseg + 1); + MAIN_SEGS(sbi), *newseg + 1); if (segno - *newseg < sbi->segs_per_sec - (*newseg % sbi->segs_per_sec)) goto got_it; } find_other_zone: - secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), hint); - if (secno >= TOTAL_SECS(sbi)) { + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + if (secno >= MAIN_SECS(sbi)) { if (dir == ALLOC_RIGHT) { secno = find_next_zero_bit(free_i->free_secmap, - TOTAL_SECS(sbi), 0); - BUG_ON(secno >= TOTAL_SECS(sbi)); + MAIN_SECS(sbi), 0); + f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); } else { go_left = 1; left_start = hint - 1; @@ -419,8 +912,8 @@ continue; } left_start = find_next_zero_bit(free_i->free_secmap, - TOTAL_SECS(sbi), 0); - BUG_ON(left_start >= TOTAL_SECS(sbi)); + MAIN_SECS(sbi), 0); + f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); break; } secno = left_start; @@ -459,10 +952,10 @@ } got_it: /* set it as dirty segment in free segmap */ - BUG_ON(test_bit(segno, free_i->free_segmap)); + f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; - write_unlock(&free_i->segmap_lock); + spin_unlock(&free_i->segmap_lock); } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) @@ -495,7 +988,7 @@ int dir = ALLOC_LEFT; write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + GET_SUM_BLOCK(sbi, segno)); if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) dir = ALLOC_RIGHT; @@ -512,13 +1005,18 @@ struct curseg_info *seg, block_t start) { struct seg_entry *se = get_seg_entry(sbi, seg->segno); - block_t ofs; - for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { - if (!f2fs_test_bit(ofs, se->ckpt_valid_map) - && !f2fs_test_bit(ofs, se->cur_valid_map)) - break; - } - seg->next_blkoff = ofs; + int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); + unsigned long *target_map = SIT_I(sbi)->tmp_map; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *cur_map = (unsigned long *)se->cur_valid_map; + int i, pos; + + for (i = 0; i < entries; i++) + target_map[i] = ckpt_map[i] | cur_map[i]; + + pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + + seg->next_blkoff = pos; } /* @@ -536,7 +1034,7 @@ } /* - * This function always allocates a used segment (from dirty seglist) by SSR + * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) @@ -594,15 +1092,8 @@ { struct curseg_info *curseg = CURSEG_I(sbi, type); - if (force) { + if (force) new_curseg(sbi, type, true); - goto out; - } - - curseg->next_segno = check_prefree_segments(sbi, type); - - if (curseg->next_segno != NULL_SEGNO) - change_curseg(sbi, type, false); else if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) @@ -611,148 +1102,74 @@ change_curseg(sbi, type, true); else new_curseg(sbi, type, false); -out: - sbi->segment_count[curseg->alloc_type]++; + + stat_inc_seg_type(sbi, curseg); +} + +static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int old_segno; + + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + locate_dirty_segment(sbi, old_segno); } void allocate_new_segments(struct f2fs_sb_info *sbi) { - struct curseg_info *curseg; - unsigned int old_curseg; int i; - for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { - curseg = CURSEG_I(sbi, i); - old_curseg = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_curseg); - } + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + __allocate_new_segments(sbi, i); } static const struct segment_allocation default_salloc_ops = { .allocate_segment = allocate_segment_by_default, }; -static void f2fs_end_io_write(struct bio *bio, int err) +int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct bio_private *p = bio->bi_private; + __u64 start = F2FS_BYTES_TO_BLK(range->start); + __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; + unsigned int start_segno, end_segno; + struct cp_control cpc; - do { - struct page *page = bvec->bv_page; + if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) + return -EINVAL; - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - if (!uptodate) { - SetPageError(page); - if (page->mapping) - set_bit(AS_EIO, &page->mapping->flags); - set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); - p->sbi->sb->s_flags |= MS_RDONLY; - } - end_page_writeback(page); - dec_page_count(p->sbi, F2FS_WRITEBACK); - } while (bvec >= bio->bi_io_vec); - - if (p->is_sync) - complete(p->wait); - kfree(p); - bio_put(bio); -} - -struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) -{ - struct bio *bio; - struct bio_private *priv; -retry: - priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); - if (!priv) { - cond_resched(); - goto retry; - } - - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - bio->bi_bdev = bdev; - bio->bi_private = priv; - return bio; -} - -static void do_submit_bio(struct f2fs_sb_info *sbi, - enum page_type type, bool sync) -{ - int rw = sync ? WRITE_SYNC : WRITE; - enum page_type btype = type > META ? META : type; - - if (type >= META_FLUSH) - rw = WRITE_FLUSH_FUA; - - if (btype == META) - rw |= REQ_META; - - if (sbi->bio[btype]) { - struct bio_private *p = sbi->bio[btype]->bi_private; - p->sbi = sbi; - sbi->bio[btype]->bi_end_io = f2fs_end_io_write; - - trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]); - - if (type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - p->is_sync = true; - p->wait = &wait; - submit_bio(rw, sbi->bio[btype]); - wait_for_completion(&wait); - } else { - p->is_sync = false; - submit_bio(rw, sbi->bio[btype]); - } - sbi->bio[btype] = NULL; - } -} - -void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync) -{ - down_write(&sbi->bio_sem); - do_submit_bio(sbi, type, sync); - up_write(&sbi->bio_sem); -} - -static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t blk_addr, enum page_type type) -{ - struct block_device *bdev = sbi->sb->s_bdev; - - verify_block_addr(sbi, blk_addr); - - down_write(&sbi->bio_sem); + cpc.trimmed = 0; + if (end <= MAIN_BLKADDR(sbi)) + goto out; - inc_page_count(sbi, F2FS_WRITEBACK); + /* start/end segment number in main_area */ + start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); + end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : + GET_SEGNO(sbi, end); + cpc.reason = CP_DISCARD; + cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); + + /* do checkpoint to issue discard commands safely */ + for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { + cpc.trim_start = start_segno; - if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1) - do_submit_bio(sbi, type, false); -alloc_new: - if (sbi->bio[type] == NULL) { - sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); - sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); - /* - * The end_io will be assigned at the sumbission phase. - * Until then, let bio_add_page() merge consecutive IOs as much - * as possible. - */ - } + if (sbi->discard_blks == 0) + break; + else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi)) + cpc.trim_end = end_segno; + else + cpc.trim_end = min_t(unsigned int, + rounddown(start_segno + + BATCHED_TRIM_SEGMENTS(sbi), + sbi->segs_per_sec) - 1, end_segno); - if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) < - PAGE_CACHE_SIZE) { - do_submit_bio(sbi, type, false); - goto alloc_new; + mutex_lock(&sbi->gc_mutex); + write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); } - - sbi->last_block_in_bio[type] = blk_addr; - - up_write(&sbi->bio_sem); - trace_f2fs_submit_write_page(page, blk_addr, type); +out: + range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); + return 0; } static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) @@ -781,8 +1198,8 @@ else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; + if (IS_DNODE(page) && is_cold_node(page)) + return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } @@ -795,7 +1212,7 @@ if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; - else if (is_cold_data(page) || is_cold_file(inode)) + else if (is_cold_data(page) || file_is_cold(inode)) return CURSEG_COLD_DATA; else return CURSEG_WARM_DATA; @@ -810,130 +1227,159 @@ static int __get_segment_type(struct page *page, enum page_type p_type) { - struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); - switch (sbi->active_logs) { + switch (F2FS_P_SB(page)->active_logs) { case 2: return __get_segment_type_2(page, p_type); case 4: return __get_segment_type_4(page, p_type); } /* NR_CURSEG_TYPE(6) logs by default */ - BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); + f2fs_bug_on(F2FS_P_SB(page), + F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE); return __get_segment_type_6(page, p_type); } -static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blkaddr, block_t *new_blkaddr, - struct f2fs_summary *sum, enum page_type p_type) +void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, + block_t old_blkaddr, block_t *new_blkaddr, + struct f2fs_summary *sum, int type) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; - unsigned int old_cursegno; - int type; + bool direct_io = (type == CURSEG_DIRECT_IO); + + type = direct_io ? CURSEG_WARM_DATA : type; - type = __get_segment_type(page, p_type); curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); + mutex_lock(&sit_i->sentry_lock); + + /* direct_io'ed data is aligned to the segment for better performance */ + if (direct_io && curseg->next_blkoff && + !has_not_enough_free_secs(sbi, 0)) + __allocate_new_segments(sbi, type); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - old_cursegno = curseg->segno; /* * __add_sum_entry should be resided under the curseg_mutex * because, this function updates a summary entry in the * current summary block. */ - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); - mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); - sbi->block_count[curseg->alloc_type]++; + stat_inc_block_count(sbi, curseg); + + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); /* * SIT information should be updated before segment allocation, * since SSR needs latest valid block information. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - if (!__has_curseg_space(sbi, type)) - sit_i->s_ops->allocate_segment(sbi, type, false); - - locate_dirty_segment(sbi, old_cursegno); - locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); mutex_unlock(&sit_i->sentry_lock); - if (p_type == NODE) + if (page && IS_NODESEG(type)) fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); - /* writeout dirty page into bdev */ - submit_write_page(sbi, page, *new_blkaddr, p_type); - mutex_unlock(&curseg->curseg_mutex); } +static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) +{ + int type = __get_segment_type(fio->page, fio->type); + + allocate_data_block(fio->sbi, fio->page, fio->blk_addr, + &fio->blk_addr, sum, type); + + /* writeout dirty page into bdev */ + f2fs_submit_page_mbio(fio); +} + void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { + struct f2fs_io_info fio = { + .sbi = sbi, + .type = META, + .rw = WRITE_SYNC | REQ_META | REQ_PRIO, + .blk_addr = page->index, + .page = page, + .encrypted_page = NULL, + }; + + if (unlikely(page->index >= MAIN_BLKADDR(sbi))) + fio.rw &= ~REQ_META; + set_page_writeback(page); - submit_write_page(sbi, page, page->index, META); + f2fs_submit_page_mbio(&fio); } -void write_node_page(struct f2fs_sb_info *sbi, struct page *page, - unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) +void write_node_page(unsigned int nid, struct f2fs_io_info *fio) { struct f2fs_summary sum; + set_summary(&sum, nid, 0, 0); - do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); + do_write_page(&sum, fio); } -void write_data_page(struct inode *inode, struct page *page, - struct dnode_of_data *dn, block_t old_blkaddr, - block_t *new_blkaddr) +void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_sb_info *sbi = fio->sbi; struct f2fs_summary sum; struct node_info ni; - BUG_ON(old_blkaddr == NULL_ADDR); + f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); - - do_write_page(sbi, page, old_blkaddr, - new_blkaddr, &sum, DATA); + do_write_page(&sum, fio); + dn->data_blkaddr = fio->blk_addr; } -void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, - block_t old_blk_addr) +void rewrite_data_page(struct f2fs_io_info *fio) { - submit_write_page(sbi, page, old_blk_addr, DATA); + stat_inc_inplace_blocks(fio->sbi); + f2fs_submit_page_mbio(fio); } -void recover_data_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) +static void __f2fs_replace_block(struct f2fs_sb_info *sbi, + struct f2fs_summary *sum, + block_t old_blkaddr, block_t new_blkaddr, + bool recover_curseg) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; unsigned int segno, old_cursegno; struct seg_entry *se; int type; + unsigned short old_blkoff; segno = GET_SEGNO(sbi, new_blkaddr); se = get_seg_entry(sbi, segno); type = se->type; - if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { - if (old_blkaddr == NULL_ADDR) - type = CURSEG_COLD_DATA; - else + if (!recover_curseg) { + /* for recovery flow */ + if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { + if (old_blkaddr == NULL_ADDR) + type = CURSEG_COLD_DATA; + else + type = CURSEG_WARM_DATA; + } + } else { + if (!IS_CURSEG(sbi, segno)) type = CURSEG_WARM_DATA; } + curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); old_cursegno = curseg->segno; + old_blkoff = curseg->next_blkoff; /* change the current segment */ if (segno != curseg->segno) { @@ -941,66 +1387,111 @@ change_curseg(sbi, type, true); } - curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & - (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); + __add_sum_entry(sbi, type, sum); - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + if (!recover_curseg) + update_sit_entry(sbi, new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); - locate_dirty_segment(sbi, old_cursegno); locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr)); + + locate_dirty_segment(sbi, old_cursegno); + + if (recover_curseg) { + if (old_cursegno != curseg->segno) { + curseg->next_segno = old_cursegno; + change_curseg(sbi, type, true); + } + curseg->next_blkoff = old_blkoff; + } mutex_unlock(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); } -void rewrite_node_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) +void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, + block_t old_addr, block_t new_addr, + unsigned char version, bool recover_curseg) { - struct sit_info *sit_i = SIT_I(sbi); - int type = CURSEG_WARM_NODE; - struct curseg_info *curseg; - unsigned int segno, old_cursegno; - block_t next_blkaddr = next_blkaddr_of_node(page); - unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); + struct f2fs_summary sum; - curseg = CURSEG_I(sbi, type); + set_summary(&sum, dn->nid, dn->ofs_in_node, version); - mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg); - segno = GET_SEGNO(sbi, new_blkaddr); - old_cursegno = curseg->segno; + dn->data_blkaddr = new_addr; + set_data_blkaddr(dn); + f2fs_update_extent_cache(dn); +} - /* change the current segment */ - if (segno != curseg->segno) { - curseg->next_segno = segno; - change_curseg(sbi, type, true); +static inline bool is_merged_page(struct f2fs_sb_info *sbi, + struct page *page, enum page_type type) +{ + enum page_type btype = PAGE_TYPE_OF_BIO(type); + struct f2fs_bio_info *io = &sbi->write_io[btype]; + struct bio_vec *bvec; + struct page *target; + int i; + + down_read(&io->io_rwsem); + if (!io->bio) { + up_read(&io->io_rwsem); + return false; } - curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & - (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); - - /* change the current log to the next block addr in advance */ - if (next_segno != segno) { - curseg->next_segno = next_segno; - change_curseg(sbi, type, true); + + bio_for_each_segment_all(bvec, io->bio, i) { + + if (bvec->bv_page->mapping) { + target = bvec->bv_page; + } else { + struct f2fs_crypto_ctx *ctx; + + /* encrypted page */ + ctx = (struct f2fs_crypto_ctx *)page_private( + bvec->bv_page); + target = ctx->w.control_page; + } + + if (page == target) { + up_read(&io->io_rwsem); + return true; + } } - curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & - (sbi->blocks_per_seg - 1); - /* rewrite node page */ - set_page_writeback(page); - submit_write_page(sbi, page, new_blkaddr, NODE); - f2fs_submit_bio(sbi, NODE, true); - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); + up_read(&io->io_rwsem); + return false; +} - locate_dirty_segment(sbi, old_cursegno); - locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type) +{ + if (PageWriteback(page)) { + struct f2fs_sb_info *sbi = F2FS_P_SB(page); - mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); + if (is_merged_page(sbi, page, type)) + f2fs_submit_merged_bio(sbi, type, WRITE); + wait_on_page_writeback(page); + } +} + +void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + struct page *cpage; + + if (blkaddr == NEW_ADDR) + return; + + f2fs_bug_on(sbi, blkaddr == NULL_ADDR); + + cpage = find_lock_page(META_MAPPING(sbi), blkaddr); + if (cpage) { + f2fs_wait_on_page_writeback(cpage, DATA); + f2fs_put_page(cpage, 1); + } } static int read_compacted_summaries(struct f2fs_sb_info *sbi) @@ -1079,7 +1570,7 @@ segno = le32_to_cpu(ckpt->cur_data_segno[type]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - CURSEG_HOT_DATA]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); else blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); @@ -1088,7 +1579,7 @@ CURSEG_HOT_NODE]); blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - CURSEG_HOT_NODE]); - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) + if (__exist_node_summaries(sbi)) blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, type - CURSEG_HOT_NODE); else @@ -1099,7 +1590,7 @@ sum = (struct f2fs_summary_block *)page_address(new); if (IS_NODESEG(type)) { - if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { + if (__exist_node_summaries(sbi)) { struct f2fs_summary *ns = &sum->entries[0]; int i; for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { @@ -1107,9 +1598,12 @@ ns->ofs_in_node = 0; } } else { - if (restore_node_summary(sbi, segno, sum)) { + int err; + + err = restore_node_summary(sbi, segno, sum); + if (err) { f2fs_put_page(new, 1); - return -EINVAL; + return err; } } } @@ -1130,17 +1624,31 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { int type = CURSEG_HOT_DATA; + int err; if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + int npages = npages_for_summary_flush(sbi, true); + + if (npages >= 2) + ra_meta_pages(sbi, start_sum_block(sbi), npages, + META_CP, true); + /* restore for compacted data summary */ if (read_compacted_summaries(sbi)) return -EINVAL; type = CURSEG_HOT_NODE; } - for (; type <= CURSEG_COLD_NODE; type++) - if (read_normal_summaries(sbi, type)) - return -EINVAL; + if (__exist_node_summaries(sbi)) + ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type), + NR_CURSEG_TYPE - type, META_CP, true); + + for (; type <= CURSEG_COLD_NODE; type++) { + err = read_normal_summaries(sbi, type); + if (err) + return err; + } + return 0; } @@ -1167,8 +1675,6 @@ SUM_JOURNAL_SIZE); written_size += SUM_JOURNAL_SIZE; - set_page_dirty(page); - /* Step 3: write summary entries */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { unsigned short blkoff; @@ -1187,18 +1693,20 @@ summary = (struct f2fs_summary *)(kaddr + written_size); *summary = seg_i->sum_blk->entries[j]; written_size += SUMMARY_SIZE; - set_page_dirty(page); if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) continue; + set_page_dirty(page); f2fs_put_page(page, 1); page = NULL; } } - if (page) + if (page) { + set_page_dirty(page); f2fs_put_page(page, 1); + } } static void write_normal_summaries(struct f2fs_sb_info *sbi, @@ -1228,9 +1736,7 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) - write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); - return; + write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); } int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, @@ -1258,17 +1764,7 @@ static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); + return get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -1285,7 +1781,7 @@ /* get current sit block page without lock */ src_page = get_meta_page(sbi, src_off); dst_page = grab_meta_page(sbi, dst_off); - BUG_ON(PageDirty(src_page)); + f2fs_bug_on(sbi, PageDirty(src_page)); src_addr = page_address(src_page); dst_addr = page_address(dst_page); @@ -1299,97 +1795,192 @@ return dst_page; } -static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) +static struct sit_entry_set *grab_sit_entry_set(void) +{ + struct sit_entry_set *ses = + f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS); + + ses->entry_cnt = 0; + INIT_LIST_HEAD(&ses->set_list); + return ses; +} + +static void release_sit_entry_set(struct sit_entry_set *ses) +{ + list_del(&ses->set_list); + kmem_cache_free(sit_entry_set_slab, ses); +} + +static void adjust_sit_entry_set(struct sit_entry_set *ses, + struct list_head *head) +{ + struct sit_entry_set *next = ses; + + if (list_is_last(&ses->set_list, head)) + return; + + list_for_each_entry_continue(next, head, set_list) + if (ses->entry_cnt <= next->entry_cnt) + break; + + list_move_tail(&ses->set_list, &next->set_list); +} + +static void add_sit_entry(unsigned int segno, struct list_head *head) +{ + struct sit_entry_set *ses; + unsigned int start_segno = START_SEGNO(segno); + + list_for_each_entry(ses, head, set_list) { + if (ses->start_segno == start_segno) { + ses->entry_cnt++; + adjust_sit_entry_set(ses, head); + return; + } + } + + ses = grab_sit_entry_set(); + + ses->start_segno = start_segno; + ses->entry_cnt++; + list_add(&ses->set_list, head); +} + +static void add_sits_in_set(struct f2fs_sb_info *sbi) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + struct list_head *set_list = &sm_info->sit_entry_set; + unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap; + unsigned int segno; + + for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi)) + add_sit_entry(segno, set_list); +} + +static void remove_sits_in_journal(struct f2fs_sb_info *sbi) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; int i; - /* - * If the journal area in the current summary is full of sit entries, - * all the sit entries will be flushed. Otherwise the sit entries - * are not able to replace with newly hot sit entries. - */ - if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { - for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { - unsigned int segno; - segno = le32_to_cpu(segno_in_journal(sum, i)); - __mark_sit_entry_dirty(sbi, segno); - } - update_sits_in_cursum(sum, -sits_in_cursum(sum)); - return 1; + for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { + unsigned int segno; + bool dirtied; + + segno = le32_to_cpu(segno_in_journal(sum, i)); + dirtied = __mark_sit_entry_dirty(sbi, segno); + + if (!dirtied) + add_sit_entry(segno, &SM_I(sbi)->sit_entry_set); } - return 0; + update_sits_in_cursum(sum, -sits_in_cursum(sum)); } /* * CP calls this function, which flushes SIT entries including sit_journal, * and moves prefree segs to free segs. */ -void flush_sit_entries(struct f2fs_sb_info *sbi) +void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct sit_info *sit_i = SIT_I(sbi); unsigned long *bitmap = sit_i->dirty_sentries_bitmap; struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - unsigned long nsegs = TOTAL_SEGS(sbi); - struct page *page = NULL; - struct f2fs_sit_block *raw_sit = NULL; - unsigned int start = 0, end = 0; - unsigned int segno = -1; - bool flushed; + struct sit_entry_set *ses, *tmp; + struct list_head *head = &SM_I(sbi)->sit_entry_set; + bool to_journal = true; + struct seg_entry *se; mutex_lock(&curseg->curseg_mutex); mutex_lock(&sit_i->sentry_lock); + if (!sit_i->dirty_sentries) + goto out; + /* - * "flushed" indicates whether sit entries in journal are flushed - * to the SIT area or not. + * add and account sit entries of dirty bitmap in sit entry + * set temporarily */ - flushed = flush_sits_in_journal(sbi); + add_sits_in_set(sbi); - while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { - struct seg_entry *se = get_seg_entry(sbi, segno); - int sit_offset, offset; - - sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); - - if (flushed) - goto to_sit_page; - - offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); - if (offset >= 0) { - segno_in_journal(sum, offset) = cpu_to_le32(segno); - seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); - goto flush_done; - } -to_sit_page: - if (!page || (start > segno) || (segno > end)) { - if (page) { - f2fs_put_page(page, 1); - page = NULL; - } + /* + * if there are no enough space in journal to store dirty sit + * entries, remove all entries from journal and add and account + * them in sit entry set. + */ + if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) + remove_sits_in_journal(sbi); - start = START_SEGNO(sit_i, segno); - end = start + SIT_ENTRY_PER_BLOCK - 1; + /* + * there are two steps to flush sit entries: + * #1, flush sit entries to journal in current cold data summary block. + * #2, flush sit entries to sit page. + */ + list_for_each_entry_safe(ses, tmp, head, set_list) { + struct page *page = NULL; + struct f2fs_sit_block *raw_sit = NULL; + unsigned int start_segno = ses->start_segno; + unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, + (unsigned long)MAIN_SEGS(sbi)); + unsigned int segno = start_segno; + + if (to_journal && + !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL)) + to_journal = false; - /* read sit block that will be updated */ - page = get_next_sit_page(sbi, start); + if (!to_journal) { + page = get_next_sit_page(sbi, start_segno); raw_sit = page_address(page); } - /* udpate entry in SIT block */ - seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); -flush_done: - __clear_bit(segno, bitmap); - sit_i->dirty_sentries--; + /* flush dirty sit entries in region of current sit set */ + for_each_set_bit_from(segno, bitmap, end) { + int offset, sit_offset; + + se = get_seg_entry(sbi, segno); + + /* add discard candidates */ + if (cpc->reason != CP_DISCARD) { + cpc->trim_start = segno; + add_discard_addrs(sbi, cpc); + } + + if (to_journal) { + offset = lookup_journal_in_cursum(sum, + SIT_JOURNAL, segno, 1); + f2fs_bug_on(sbi, offset < 0); + segno_in_journal(sum, offset) = + cpu_to_le32(segno); + seg_info_to_raw_sit(se, + &sit_in_journal(sum, offset)); + } else { + sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); + seg_info_to_raw_sit(se, + &raw_sit->entries[sit_offset]); + } + + __clear_bit(segno, bitmap); + sit_i->dirty_sentries--; + ses->entry_cnt--; + } + + if (!to_journal) + f2fs_put_page(page, 1); + + f2fs_bug_on(sbi, ses->entry_cnt); + release_sit_entry_set(ses); + } + + f2fs_bug_on(sbi, !list_empty(head)); + f2fs_bug_on(sbi, sit_i->dirty_sentries); +out: + if (cpc->reason == CP_DISCARD) { + for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) + add_discard_addrs(sbi, cpc); } mutex_unlock(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - /* writeout last modified SIT block */ - f2fs_put_page(page, 1); - set_prefree_as_free_segments(sbi); } @@ -1409,28 +2000,36 @@ SM_I(sbi)->sit_info = sit_i; - sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry)); + sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * + sizeof(struct seg_entry), GFP_KERNEL); if (!sit_i->sentries) return -ENOMEM; - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); - sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL); + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); + sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); if (!sit_i->dirty_sentries_bitmap) return -ENOMEM; - for (start = 0; start < TOTAL_SEGS(sbi); start++) { + for (start = 0; start < MAIN_SEGS(sbi); start++) { sit_i->sentries[start].cur_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - if (!sit_i->sentries[start].cur_valid_map - || !sit_i->sentries[start].ckpt_valid_map) + sit_i->sentries[start].discard_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].cur_valid_map || + !sit_i->sentries[start].ckpt_valid_map || + !sit_i->sentries[start].discard_map) return -ENOMEM; } + sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->tmp_map) + return -ENOMEM; + if (sbi->segs_per_sec > 1) { - sit_i->sec_entries = vzalloc(TOTAL_SECS(sbi) * - sizeof(struct sec_entry)); + sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * + sizeof(struct sec_entry), GFP_KERNEL); if (!sit_i->sec_entries) return -ENOMEM; } @@ -1464,7 +2063,6 @@ static int build_free_segmap(struct f2fs_sb_info *sbi) { - struct f2fs_sm_info *sm_info = SM_I(sbi); struct free_segmap_info *free_i; unsigned int bitmap_size, sec_bitmap_size; @@ -1475,13 +2073,13 @@ SM_I(sbi)->free_info = free_i; - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); - free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL); + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); + free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL); if (!free_i->free_segmap) return -ENOMEM; - sec_bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); - free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL); + sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); + free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL); if (!free_i->free_secmap) return -ENOMEM; @@ -1490,11 +2088,10 @@ memset(free_i->free_secmap, 0xff, sec_bitmap_size); /* init free segmap information */ - free_i->start_segno = - (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr); + free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); free_i->free_segments = 0; free_i->free_sections = 0; - rwlock_init(&free_i->segmap_lock); + spin_lock_init(&free_i->segmap_lock); return 0; } @@ -1503,7 +2100,7 @@ struct curseg_info *array; int i; - array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); if (!array) return -ENOMEM; @@ -1525,36 +2122,53 @@ struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - unsigned int start; + int sit_blk_cnt = SIT_BLK_CNT(sbi); + unsigned int i, start, end; + unsigned int readed, start_blk = 0; + int nrpages = MAX_BIO_BLOCKS(sbi); - for (start = 0; start < TOTAL_SEGS(sbi); start++) { - struct seg_entry *se = &sit_i->sentries[start]; - struct f2fs_sit_block *sit_blk; - struct f2fs_sit_entry sit; - struct page *page; - int i; + do { + readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true); - mutex_lock(&curseg->curseg_mutex); - for (i = 0; i < sits_in_cursum(sum); i++) { - if (le32_to_cpu(segno_in_journal(sum, i)) == start) { - sit = sit_in_journal(sum, i); - mutex_unlock(&curseg->curseg_mutex); - goto got_it; + start = start_blk * sit_i->sents_per_block; + end = (start_blk + readed) * sit_i->sents_per_block; + + for (; start < end && start < MAIN_SEGS(sbi); start++) { + struct seg_entry *se = &sit_i->sentries[start]; + struct f2fs_sit_block *sit_blk; + struct f2fs_sit_entry sit; + struct page *page; + + mutex_lock(&curseg->curseg_mutex); + for (i = 0; i < sits_in_cursum(sum); i++) { + if (le32_to_cpu(segno_in_journal(sum, i)) + == start) { + sit = sit_in_journal(sum, i); + mutex_unlock(&curseg->curseg_mutex); + goto got_it; + } } - } - mutex_unlock(&curseg->curseg_mutex); - page = get_current_sit_page(sbi, start); - sit_blk = (struct f2fs_sit_block *)page_address(page); - sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; - f2fs_put_page(page, 1); + mutex_unlock(&curseg->curseg_mutex); + + page = get_current_sit_page(sbi, start); + sit_blk = (struct f2fs_sit_block *)page_address(page); + sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; + f2fs_put_page(page, 1); got_it: - check_block_count(sbi, start, &sit); - seg_info_from_raw_sit(se, &sit); - if (sbi->segs_per_sec > 1) { - struct sec_entry *e = get_sec_entry(sbi, start); - e->valid_blocks += se->valid_blocks; + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + + /* build discard map only one time */ + memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks; + + if (sbi->segs_per_sec > 1) { + struct sec_entry *e = get_sec_entry(sbi, start); + e->valid_blocks += se->valid_blocks; + } } - } + start_blk += readed; + } while (start_blk < sit_blk_cnt); } static void init_free_segmap(struct f2fs_sb_info *sbi) @@ -1562,7 +2176,7 @@ unsigned int start; int type; - for (start = 0; start < TOTAL_SEGS(sbi); start++) { + for (start = 0; start < MAIN_SEGS(sbi); start++) { struct seg_entry *sentry = get_seg_entry(sbi, start); if (!sentry->valid_blocks) __set_free(sbi, start); @@ -1582,15 +2196,19 @@ unsigned int segno = 0, offset = 0; unsigned short valid_blocks; - while (segno < TOTAL_SEGS(sbi)) { + while (1) { /* find dirty segment based on free segmap */ - segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset); - if (segno >= TOTAL_SEGS(sbi)) + segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset); + if (segno >= MAIN_SEGS(sbi)) break; offset = segno + 1; valid_blocks = get_valid_blocks(sbi, segno, 0); - if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks) + if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) + continue; + if (valid_blocks > sbi->blocks_per_seg) { + f2fs_bug_on(sbi, 1); continue; + } mutex_lock(&dirty_i->seglist_lock); __locate_dirty_segment(sbi, segno, DIRTY); mutex_unlock(&dirty_i->seglist_lock); @@ -1600,9 +2218,9 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SECS(sbi)); + unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi)); - dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL); + dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->victim_secmap) return -ENOMEM; return 0; @@ -1621,10 +2239,10 @@ SM_I(sbi)->dirty_info = dirty_i; mutex_init(&dirty_i->seglist_lock); - bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi)); + bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi)); for (i = 0; i < NR_DIRTY_TYPE; i++) { - dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL); + dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL); if (!dirty_i->dirty_segmap[i]) return -ENOMEM; } @@ -1645,7 +2263,7 @@ sit_i->min_mtime = LLONG_MAX; - for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { unsigned int i; unsigned long long mtime = 0; @@ -1674,8 +2292,6 @@ /* init sm info */ sbi->sm_info = sm_info; - INIT_LIST_HEAD(&sm_info->wblist_head); - spin_lock_init(&sm_info->wblist_lock); sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr); sm_info->segment_count = le32_to_cpu(raw_super->segment_count); @@ -1683,6 +2299,25 @@ sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); + sm_info->rec_prefree_segments = sm_info->main_segments * + DEF_RECLAIM_PREFREE_SEGMENTS / 100; + sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; + sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; + sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + + INIT_LIST_HEAD(&sm_info->discard_list); + sm_info->nr_discards = 0; + sm_info->max_discards = 0; + + sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; + + INIT_LIST_HEAD(&sm_info->sit_entry_set); + + if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { + err = create_flush_cmd_control(sbi); + if (err) + return err; + } err = build_sit_info(sbi); if (err) @@ -1712,7 +2347,7 @@ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); mutex_lock(&dirty_i->seglist_lock); - kfree(dirty_i->dirty_segmap[dirty_type]); + kvfree(dirty_i->dirty_segmap[dirty_type]); dirty_i->nr_dirty[dirty_type] = 0; mutex_unlock(&dirty_i->seglist_lock); } @@ -1720,7 +2355,7 @@ static void destroy_victim_secmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - kfree(dirty_i->victim_secmap); + kvfree(dirty_i->victim_secmap); } static void destroy_dirty_segmap(struct f2fs_sb_info *sbi) @@ -1759,8 +2394,8 @@ if (!free_i) return; SM_I(sbi)->free_info = NULL; - kfree(free_i->free_segmap); - kfree(free_i->free_secmap); + kvfree(free_i->free_segmap); + kvfree(free_i->free_secmap); kfree(free_i); } @@ -1773,14 +2408,17 @@ return; if (sit_i->sentries) { - for (start = 0; start < TOTAL_SEGS(sbi); start++) { + for (start = 0; start < MAIN_SEGS(sbi); start++) { kfree(sit_i->sentries[start].cur_valid_map); kfree(sit_i->sentries[start].ckpt_valid_map); + kfree(sit_i->sentries[start].discard_map); } } - vfree(sit_i->sentries); - vfree(sit_i->sec_entries); - kfree(sit_i->dirty_sentries_bitmap); + kfree(sit_i->tmp_map); + + kvfree(sit_i->sentries); + kvfree(sit_i->sec_entries); + kvfree(sit_i->dirty_sentries_bitmap); SM_I(sbi)->sit_info = NULL; kfree(sit_i->sit_bitmap); @@ -1790,6 +2428,10 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); + + if (!sm_info) + return; + destroy_flush_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); @@ -1797,3 +2439,36 @@ sbi->sm_info = NULL; kfree(sm_info); } + +int __init create_segment_manager_caches(void) +{ + discard_entry_slab = f2fs_kmem_cache_create("discard_entry", + sizeof(struct discard_entry)); + if (!discard_entry_slab) + goto fail; + + sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", + sizeof(struct sit_entry_set)); + if (!sit_entry_set_slab) + goto destory_discard_entry; + + inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", + sizeof(struct inmem_pages)); + if (!inmem_entry_slab) + goto destroy_sit_entry_set; + return 0; + +destroy_sit_entry_set: + kmem_cache_destroy(sit_entry_set_slab); +destory_discard_entry: + kmem_cache_destroy(discard_entry_slab); +fail: + return -ENOMEM; +} + +void destroy_segment_manager_caches(void) +{ + kmem_cache_destroy(sit_entry_set_slab); + kmem_cache_destroy(discard_entry_slab); + kmem_cache_destroy(inmem_entry_slab); +}