--- zzzz-none-000/linux-3.10.107/fs/btrfs/compression.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/fs/btrfs/compression.c 2021-02-04 17:41:59.000000000 +0000 @@ -32,7 +32,6 @@ #include #include #include -#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -92,17 +91,13 @@ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); return sizeof(struct compressed_bio) + - ((disk_size + root->sectorsize - 1) / root->sectorsize) * - csum_size; + (DIV_ROUND_UP(disk_size, root->sectorsize)) * csum_size; } static struct bio *compressed_bio_alloc(struct block_device *bdev, u64 first_byte, gfp_t gfp_flags) { - int nr_vecs; - - nr_vecs = bio_get_nr_vecs(bdev); - return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); + return btrfs_bio_alloc(bdev, first_byte >> 9, BIO_MAX_PAGES, gfp_flags); } static int check_compressed_csum(struct inode *inode, @@ -129,12 +124,10 @@ kunmap_atomic(kaddr); if (csum != *cb_sum) { - printk(KERN_INFO "btrfs csum failed ino %llu " - "extent %llu csum %u " - "wanted %u mirror %d\n", - (unsigned long long)btrfs_ino(inode), - (unsigned long long)disk_start, - csum, *cb_sum, cb->mirror_num); + btrfs_info(BTRFS_I(inode)->root->fs_info, + "csum failed ino %llu extent %llu csum %u wanted %u mirror %d", + btrfs_ino(inode), disk_start, csum, *cb_sum, + cb->mirror_num); ret = -EIO; goto fail; } @@ -156,7 +149,7 @@ * The compressed pages are freed here, and it must be run * in process context */ -static void end_compressed_bio_read(struct bio *bio, int err) +static void end_compressed_bio_read(struct bio *bio) { struct compressed_bio *cb = bio->bi_private; struct inode *inode; @@ -164,7 +157,7 @@ unsigned long index; int ret; - if (err) + if (bio->bi_error) cb->errors = 1; /* if there are more bios still pending for this compressed @@ -174,7 +167,8 @@ goto out; inode = cb->inode; - ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9); + ret = check_compressed_csum(inode, cb, + (u64)bio->bi_iter.bi_sector << 9); if (ret) goto csum_failed; @@ -203,19 +197,17 @@ if (cb->errors) { bio_io_error(cb->orig_bio); } else { - int bio_index = 0; - struct bio_vec *bvec = cb->orig_bio->bi_io_vec; + int i; + struct bio_vec *bvec; /* * we have verified the checksum already, set page * checked so the end_io handlers know about it */ - while (bio_index < cb->orig_bio->bi_vcnt) { + bio_for_each_segment_all(bvec, cb->orig_bio, i) SetPageChecked(bvec->bv_page); - bvec++; - bio_index++; - } - bio_endio(cb->orig_bio, 0); + + bio_endio(cb->orig_bio); } /* finally free the cb struct */ @@ -229,16 +221,19 @@ * Clear the writeback bits on all of the file * pages for a compressed write */ -static noinline void end_compressed_writeback(struct inode *inode, u64 start, - unsigned long ram_size) +static noinline void end_compressed_writeback(struct inode *inode, + const struct compressed_bio *cb) { - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = (start + ram_size - 1) >> PAGE_CACHE_SHIFT; + unsigned long index = cb->start >> PAGE_CACHE_SHIFT; + unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_CACHE_SHIFT; struct page *pages[16]; unsigned long nr_pages = end_index - index + 1; int i; int ret; + if (cb->errors) + mapping_set_error(inode->i_mapping, -EIO); + while (nr_pages > 0) { ret = find_get_pages_contig(inode->i_mapping, index, min_t(unsigned long, @@ -249,6 +244,8 @@ continue; } for (i = 0; i < ret; i++) { + if (cb->errors) + SetPageError(pages[i]); end_page_writeback(pages[i]); page_cache_release(pages[i]); } @@ -266,7 +263,7 @@ * This also calls the writeback end hooks for the file pages so that * metadata and checksums can be updated in the file. */ -static void end_compressed_bio_write(struct bio *bio, int err) +static void end_compressed_bio_write(struct bio *bio) { struct extent_io_tree *tree; struct compressed_bio *cb = bio->bi_private; @@ -274,7 +271,7 @@ struct page *page; unsigned long index; - if (err) + if (bio->bi_error) cb->errors = 1; /* if there are more bios still pending for this compressed @@ -292,10 +289,11 @@ tree->ops->writepage_end_io_hook(cb->compressed_pages[0], cb->start, cb->start + cb->len - 1, - NULL, 1); + NULL, + bio->bi_error ? 0 : 1); cb->compressed_pages[0]->mapping = NULL; - end_compressed_writeback(inode, cb->start, cb->len); + end_compressed_writeback(inode, cb); /* note, our inode could be gone now */ /* @@ -361,7 +359,7 @@ bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); - if(!bio) { + if (!bio) { kfree(cb); return -ENOMEM; } @@ -374,7 +372,7 @@ for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { page = compressed_pages[pg_index]; page->mapping = inode->i_mapping; - if (bio->bi_size) + if (bio->bi_iter.bi_size) ret = io_tree->ops->merge_bio_hook(WRITE, page, 0, PAGE_CACHE_SIZE, bio, 0); @@ -393,7 +391,8 @@ * freed before we're done setting it up */ atomic_inc(&cb->pending_bios); - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, + BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ if (!skip_sum) { @@ -414,7 +413,8 @@ bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); } if (bytes_left < PAGE_CACHE_SIZE) { - printk("bytes left %lu compress len %lu nr %lu\n", + btrfs_info(BTRFS_I(inode)->root->fs_info, + "bytes left %lu compress len %lu nr %lu", bytes_left, cb->compressed_len, cb->nr_pages); } bytes_left -= PAGE_CACHE_SIZE; @@ -423,7 +423,7 @@ } bio_get(bio); - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, bio, BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ if (!skip_sum) { @@ -475,20 +475,19 @@ rcu_read_lock(); page = radix_tree_lookup(&mapping->page_tree, pg_index); rcu_read_unlock(); - if (page) { + if (page && !radix_tree_exceptional_entry(page)) { misses++; if (misses > 4) break; goto next; } - page = __page_cache_alloc(mapping_gfp_mask(mapping) & - ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_constraint(mapping, + ~__GFP_FS)); if (!page) break; - if (add_to_page_cache_lru(page, mapping, pg_index, - GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) { page_cache_release(page); goto next; } @@ -508,7 +507,7 @@ if (!em || last_offset < em->start || (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || - (em->block_start >> 9) != cb->orig_bio->bi_sector) { + (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) { free_extent_map(em); unlock_extent(tree, last_offset, end); unlock_page(page); @@ -554,7 +553,7 @@ * in it. We don't actually do IO on those pages but allocate new ones * to hold the compressed pages on disk. * - * bio->bi_sector points to the compressed extent on disk + * bio->bi_iter.bi_sector points to the compressed extent on disk * bio->bi_io_vec points to all of the inode pages * bio->bi_vcnt is a count of pages * @@ -575,7 +574,7 @@ struct page *page; struct block_device *bdev; struct bio *comp_bio; - u64 cur_disk_byte = (u64)bio->bi_sector << 9; + u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9; u64 em_len; u64 em_start; struct extent_map *em; @@ -618,9 +617,8 @@ cb->compress_type = extent_compress_type(bio_flags); cb->orig_bio = bio; - nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / - PAGE_CACHE_SIZE; - cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, + nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); + cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!cb->compressed_pages) goto fail1; @@ -639,7 +637,11 @@ faili = nr_pages - 1; cb->nr_pages = nr_pages; - add_ra_bio_pages(inode, em_start + em_len, cb); + /* In the parent-locked case, we only locked the range we are + * interested in. In all other cases, we can opportunistically + * cache decompressed data that goes beyond the requested range. */ + if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED)) + add_ra_bio_pages(inode, em_start + em_len, cb); /* include any pages we added in add_ra-bio_pages */ uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; @@ -657,7 +659,7 @@ page->mapping = inode->i_mapping; page->index = em_start >> PAGE_CACHE_SHIFT; - if (comp_bio->bi_size) + if (comp_bio->bi_iter.bi_size) ret = tree->ops->merge_bio_hook(READ, page, 0, PAGE_CACHE_SIZE, comp_bio, 0); @@ -669,7 +671,8 @@ PAGE_CACHE_SIZE) { bio_get(comp_bio); - ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, + BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ /* @@ -685,13 +688,15 @@ comp_bio, sums); BUG_ON(ret); /* -ENOMEM */ } - sums += (comp_bio->bi_size + root->sectorsize - 1) / - root->sectorsize; + sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size, + root->sectorsize); ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); - if (ret) - bio_endio(comp_bio, ret); + if (ret) { + comp_bio->bi_error = ret; + bio_endio(comp_bio); + } bio_put(comp_bio); @@ -707,7 +712,8 @@ } bio_get(comp_bio); - ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); + ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, + BTRFS_WQ_ENDIO_DATA); BUG_ON(ret); /* -ENOMEM */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { @@ -716,8 +722,10 @@ } ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); - if (ret) - bio_endio(comp_bio, ret); + if (ret) { + comp_bio->bi_error = ret; + bio_endio(comp_bio); + } bio_put(comp_bio); return 0; @@ -736,13 +744,15 @@ return ret; } -static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES]; -static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES]; -static int comp_num_workspace[BTRFS_COMPRESS_TYPES]; -static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; -static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; +static struct { + struct list_head idle_ws; + spinlock_t ws_lock; + int num_ws; + atomic_t alloc_ws; + wait_queue_head_t ws_wait; +} btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; -static struct btrfs_compress_op *btrfs_compress_op[] = { +static const struct btrfs_compress_op * const btrfs_compress_op[] = { &btrfs_zlib_compress, &btrfs_lzo_compress, }; @@ -752,10 +762,10 @@ int i; for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { - INIT_LIST_HEAD(&comp_idle_workspace[i]); - spin_lock_init(&comp_workspace_lock[i]); - atomic_set(&comp_alloc_workspace[i], 0); - init_waitqueue_head(&comp_workspace_wait[i]); + INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); + spin_lock_init(&btrfs_comp_ws[i].ws_lock); + atomic_set(&btrfs_comp_ws[i].alloc_ws, 0); + init_waitqueue_head(&btrfs_comp_ws[i].ws_wait); } } @@ -769,38 +779,38 @@ int cpus = num_online_cpus(); int idx = type - 1; - struct list_head *idle_workspace = &comp_idle_workspace[idx]; - spinlock_t *workspace_lock = &comp_workspace_lock[idx]; - atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; - wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; - int *num_workspace = &comp_num_workspace[idx]; + struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; + spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; + atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; + wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; + int *num_ws = &btrfs_comp_ws[idx].num_ws; again: - spin_lock(workspace_lock); - if (!list_empty(idle_workspace)) { - workspace = idle_workspace->next; + spin_lock(ws_lock); + if (!list_empty(idle_ws)) { + workspace = idle_ws->next; list_del(workspace); - (*num_workspace)--; - spin_unlock(workspace_lock); + (*num_ws)--; + spin_unlock(ws_lock); return workspace; } - if (atomic_read(alloc_workspace) > cpus) { + if (atomic_read(alloc_ws) > cpus) { DEFINE_WAIT(wait); - spin_unlock(workspace_lock); - prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE); - if (atomic_read(alloc_workspace) > cpus && !*num_workspace) + spin_unlock(ws_lock); + prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); + if (atomic_read(alloc_ws) > cpus && !*num_ws) schedule(); - finish_wait(workspace_wait, &wait); + finish_wait(ws_wait, &wait); goto again; } - atomic_inc(alloc_workspace); - spin_unlock(workspace_lock); + atomic_inc(alloc_ws); + spin_unlock(ws_lock); workspace = btrfs_compress_op[idx]->alloc_workspace(); if (IS_ERR(workspace)) { - atomic_dec(alloc_workspace); - wake_up(workspace_wait); + atomic_dec(alloc_ws); + wake_up(ws_wait); } return workspace; } @@ -812,27 +822,30 @@ static void free_workspace(int type, struct list_head *workspace) { int idx = type - 1; - struct list_head *idle_workspace = &comp_idle_workspace[idx]; - spinlock_t *workspace_lock = &comp_workspace_lock[idx]; - atomic_t *alloc_workspace = &comp_alloc_workspace[idx]; - wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx]; - int *num_workspace = &comp_num_workspace[idx]; - - spin_lock(workspace_lock); - if (*num_workspace < num_online_cpus()) { - list_add_tail(workspace, idle_workspace); - (*num_workspace)++; - spin_unlock(workspace_lock); + struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; + spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; + atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; + wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; + int *num_ws = &btrfs_comp_ws[idx].num_ws; + + spin_lock(ws_lock); + if (*num_ws < num_online_cpus()) { + list_add(workspace, idle_ws); + (*num_ws)++; + spin_unlock(ws_lock); goto wake; } - spin_unlock(workspace_lock); + spin_unlock(ws_lock); btrfs_compress_op[idx]->free_workspace(workspace); - atomic_dec(alloc_workspace); + atomic_dec(alloc_ws); wake: + /* + * Make sure counter is updated before we wake up waiters. + */ smp_mb(); - if (waitqueue_active(workspace_wait)) - wake_up(workspace_wait); + if (waitqueue_active(ws_wait)) + wake_up(ws_wait); } /* @@ -844,11 +857,11 @@ int i; for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { - while (!list_empty(&comp_idle_workspace[i])) { - workspace = comp_idle_workspace[i].next; + while (!list_empty(&btrfs_comp_ws[i].idle_ws)) { + workspace = btrfs_comp_ws[i].idle_ws.next; list_del(workspace); btrfs_compress_op[i]->free_workspace(workspace); - atomic_dec(&comp_alloc_workspace[i]); + atomic_dec(&btrfs_comp_ws[i].alloc_ws); } } } @@ -886,7 +899,7 @@ workspace = find_workspace(type); if (IS_ERR(workspace)) - return -1; + return PTR_ERR(workspace); ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, start, len, pages, @@ -922,7 +935,7 @@ workspace = find_workspace(type); if (IS_ERR(workspace)) - return -ENOMEM; + return PTR_ERR(workspace); ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, disk_start, @@ -944,7 +957,7 @@ workspace = find_workspace(type); if (IS_ERR(workspace)) - return -ENOMEM; + return PTR_ERR(workspace); ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, dest_page, start_byte, @@ -1009,8 +1022,6 @@ bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); - if (*pg_index == (vcnt - 1) && *pg_offset == 0) - memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); @@ -1052,3 +1063,34 @@ return 1; } + +/* + * When uncompressing data, we need to make sure and zero any parts of + * the biovec that were not filled in by the decompression code. pg_index + * and pg_offset indicate the last page and the last offset of that page + * that have been filled in. This will zero everything remaining in the + * biovec. + */ +void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, + unsigned long pg_index, + unsigned long pg_offset) +{ + while (pg_index < vcnt) { + struct page *page = bvec[pg_index].bv_page; + unsigned long off = bvec[pg_index].bv_offset; + unsigned long len = bvec[pg_index].bv_len; + + if (pg_offset < off) + pg_offset = off; + if (pg_offset < off + len) { + unsigned long bytes = off + len - pg_offset; + char *kaddr; + + kaddr = kmap_atomic(page); + memset(kaddr + pg_offset, 0, bytes); + kunmap_atomic(kaddr); + } + pg_index++; + pg_offset = 0; + } +}