--- zzzz-none-000/linux-3.10.107/drivers/md/dm-crypt.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/drivers/md/dm-crypt.c 2021-02-04 17:41:59.000000000 +0000 @@ -1,7 +1,8 @@ /* - * Copyright (C) 2003 Christophe Saout + * Copyright (C) 2003 Jana Saout * Copyright (C) 2004 Clemens Fruhwirth - * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. + * Copyright (C) 2006-2015 Red Hat, Inc. All rights reserved. + * Copyright (C) 2013 Milan Broz * * This file is released under the GPL. */ @@ -17,9 +18,11 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -37,10 +40,8 @@ struct completion restart; struct bio *bio_in; struct bio *bio_out; - unsigned int offset_in; - unsigned int offset_out; - unsigned int idx_in; - unsigned int idx_out; + struct bvec_iter iter_in; + struct bvec_iter iter_out; sector_t cc_sector; atomic_t cc_pending; struct ablkcipher_request *req; @@ -59,8 +60,9 @@ atomic_t io_pending; int error; sector_t sector; - struct dm_crypt_io *base_io; -}; + + struct rb_node rb_node; +} CRYPTO_MINALIGN_ATTR; struct dm_crypt_request { struct convert_context *ctx; @@ -98,11 +100,19 @@ u8 *seed; }; +#define TCW_WHITENING_SIZE 16 +struct iv_tcw_private { + struct crypto_shash *crc32_tfm; + u8 *iv_seed; + u8 *whitening; +}; + /* * Crypt: maps a linear range of a block device * and encrypts / decrypts at the same time. */ -enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; +enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, + DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD }; /* * The fields in here must be read only after initialization. @@ -115,14 +125,18 @@ * pool for per bio private data, crypto requests and * encryption requeusts/buffer pages */ - mempool_t *io_pool; mempool_t *req_pool; mempool_t *page_pool; struct bio_set *bs; + struct mutex bio_alloc_lock; struct workqueue_struct *io_queue; struct workqueue_struct *crypt_queue; + struct task_struct *write_thread; + wait_queue_head_t write_thread_wait; + struct rb_root write_tree; + char *cipher; char *cipher_string; @@ -131,6 +145,7 @@ struct iv_essiv_private essiv; struct iv_benbi_private benbi; struct iv_lmk_private lmk; + struct iv_tcw_private tcw; } iv_gen_private; sector_t iv_offset; unsigned int iv_size; @@ -155,16 +170,16 @@ */ unsigned int dmreq_start; + unsigned int per_bio_data_size; + unsigned long flags; unsigned int key_size; - unsigned int key_parts; + unsigned int key_parts; /* independent parts in key buffer */ + unsigned int key_extra_size; /* additional keys length */ u8 key[0]; }; #define MIN_IOS 16 -#define MIN_POOL_PAGES 32 - -static struct kmem_cache *_crypt_io_pool; static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); @@ -211,6 +226,16 @@ * version 3: the same as version 2 with additional IV seed * (it uses 65 keys, last key is used as IV seed) * + * tcw: Compatible implementation of the block chaining mode used + * by the TrueCrypt device encryption system (prior to version 4.1). + * For more info see: https://gitlab.com/cryptsetup/cryptsetup/wikis/TrueCryptOnDiskFormat + * It operates on full 512 byte sectors and uses CBC + * with an IV derived from initial key and the sector number. + * In addition, whitening value is applied on every sector, whitening + * is calculated from initial key, sector number and mixed using CRC32. + * Note that this encryption scheme is vulnerable to watermarking attacks + * and should be used for old compatible containers access only. + * * plumb: unimplemented, see: * http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454 */ @@ -506,29 +531,26 @@ u8 *data) { struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; - struct { - struct shash_desc desc; - char ctx[crypto_shash_descsize(lmk->hash_tfm)]; - } sdesc; + SHASH_DESC_ON_STACK(desc, lmk->hash_tfm); struct md5_state md5state; - u32 buf[4]; + __le32 buf[4]; int i, r; - sdesc.desc.tfm = lmk->hash_tfm; - sdesc.desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->tfm = lmk->hash_tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; - r = crypto_shash_init(&sdesc.desc); + r = crypto_shash_init(desc); if (r) return r; if (lmk->seed) { - r = crypto_shash_update(&sdesc.desc, lmk->seed, LMK_SEED_SIZE); + r = crypto_shash_update(desc, lmk->seed, LMK_SEED_SIZE); if (r) return r; } /* Sector is always 512B, block size 16, add data of blocks 1-31 */ - r = crypto_shash_update(&sdesc.desc, data + 16, 16 * 31); + r = crypto_shash_update(desc, data + 16, 16 * 31); if (r) return r; @@ -537,12 +559,12 @@ buf[1] = cpu_to_le32((((u64)dmreq->iv_sector >> 32) & 0x00FFFFFF) | 0x80000000); buf[2] = cpu_to_le32(4024); buf[3] = 0; - r = crypto_shash_update(&sdesc.desc, (u8 *)buf, sizeof(buf)); + r = crypto_shash_update(desc, (u8 *)buf, sizeof(buf)); if (r) return r; /* No MD5 padding here */ - r = crypto_shash_export(&sdesc.desc, &md5state); + r = crypto_shash_export(desc, &md5state); if (r) return r; @@ -589,6 +611,150 @@ return r; } +static void crypt_iv_tcw_dtr(struct crypt_config *cc) +{ + struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; + + kzfree(tcw->iv_seed); + tcw->iv_seed = NULL; + kzfree(tcw->whitening); + tcw->whitening = NULL; + + if (tcw->crc32_tfm && !IS_ERR(tcw->crc32_tfm)) + crypto_free_shash(tcw->crc32_tfm); + tcw->crc32_tfm = NULL; +} + +static int crypt_iv_tcw_ctr(struct crypt_config *cc, struct dm_target *ti, + const char *opts) +{ + struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; + + if (cc->key_size <= (cc->iv_size + TCW_WHITENING_SIZE)) { + ti->error = "Wrong key size for TCW"; + return -EINVAL; + } + + tcw->crc32_tfm = crypto_alloc_shash("crc32", 0, 0); + if (IS_ERR(tcw->crc32_tfm)) { + ti->error = "Error initializing CRC32 in TCW"; + return PTR_ERR(tcw->crc32_tfm); + } + + tcw->iv_seed = kzalloc(cc->iv_size, GFP_KERNEL); + tcw->whitening = kzalloc(TCW_WHITENING_SIZE, GFP_KERNEL); + if (!tcw->iv_seed || !tcw->whitening) { + crypt_iv_tcw_dtr(cc); + ti->error = "Error allocating seed storage in TCW"; + return -ENOMEM; + } + + return 0; +} + +static int crypt_iv_tcw_init(struct crypt_config *cc) +{ + struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; + int key_offset = cc->key_size - cc->iv_size - TCW_WHITENING_SIZE; + + memcpy(tcw->iv_seed, &cc->key[key_offset], cc->iv_size); + memcpy(tcw->whitening, &cc->key[key_offset + cc->iv_size], + TCW_WHITENING_SIZE); + + return 0; +} + +static int crypt_iv_tcw_wipe(struct crypt_config *cc) +{ + struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; + + memset(tcw->iv_seed, 0, cc->iv_size); + memset(tcw->whitening, 0, TCW_WHITENING_SIZE); + + return 0; +} + +static int crypt_iv_tcw_whitening(struct crypt_config *cc, + struct dm_crypt_request *dmreq, + u8 *data) +{ + struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; + u64 sector = cpu_to_le64((u64)dmreq->iv_sector); + u8 buf[TCW_WHITENING_SIZE]; + SHASH_DESC_ON_STACK(desc, tcw->crc32_tfm); + int i, r; + + /* xor whitening with sector number */ + memcpy(buf, tcw->whitening, TCW_WHITENING_SIZE); + crypto_xor(buf, (u8 *)§or, 8); + crypto_xor(&buf[8], (u8 *)§or, 8); + + /* calculate crc32 for every 32bit part and xor it */ + desc->tfm = tcw->crc32_tfm; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + for (i = 0; i < 4; i++) { + r = crypto_shash_init(desc); + if (r) + goto out; + r = crypto_shash_update(desc, &buf[i * 4], 4); + if (r) + goto out; + r = crypto_shash_final(desc, &buf[i * 4]); + if (r) + goto out; + } + crypto_xor(&buf[0], &buf[12], 4); + crypto_xor(&buf[4], &buf[8], 4); + + /* apply whitening (8 bytes) to whole sector */ + for (i = 0; i < ((1 << SECTOR_SHIFT) / 8); i++) + crypto_xor(data + i * 8, buf, 8); +out: + memzero_explicit(buf, sizeof(buf)); + return r; +} + +static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + struct iv_tcw_private *tcw = &cc->iv_gen_private.tcw; + u64 sector = cpu_to_le64((u64)dmreq->iv_sector); + u8 *src; + int r = 0; + + /* Remove whitening from ciphertext */ + if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) { + src = kmap_atomic(sg_page(&dmreq->sg_in)); + r = crypt_iv_tcw_whitening(cc, dmreq, src + dmreq->sg_in.offset); + kunmap_atomic(src); + } + + /* Calculate IV */ + memcpy(iv, tcw->iv_seed, cc->iv_size); + crypto_xor(iv, (u8 *)§or, 8); + if (cc->iv_size > 8) + crypto_xor(&iv[8], (u8 *)§or, cc->iv_size - 8); + + return r; +} + +static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + u8 *dst; + int r; + + if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) + return 0; + + /* Apply whitening on ciphertext */ + dst = kmap_atomic(sg_page(&dmreq->sg_out)); + r = crypt_iv_tcw_whitening(cc, dmreq, dst + dmreq->sg_out.offset); + kunmap_atomic(dst); + + return r; +} + static struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen }; @@ -624,6 +790,15 @@ .post = crypt_iv_lmk_post }; +static struct crypt_iv_operations crypt_iv_tcw_ops = { + .ctr = crypt_iv_tcw_ctr, + .dtr = crypt_iv_tcw_dtr, + .init = crypt_iv_tcw_init, + .wipe = crypt_iv_tcw_wipe, + .generator = crypt_iv_tcw_gen, + .post = crypt_iv_tcw_post +}; + static void crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, struct bio *bio_out, struct bio *bio_in, @@ -631,10 +806,10 @@ { ctx->bio_in = bio_in; ctx->bio_out = bio_out; - ctx->offset_in = 0; - ctx->offset_out = 0; - ctx->idx_in = bio_in ? bio_in->bi_idx : 0; - ctx->idx_out = bio_out ? bio_out->bi_idx : 0; + if (bio_in) + ctx->iter_in = bio_in->bi_iter; + if (bio_out) + ctx->iter_out = bio_out->bi_iter; ctx->cc_sector = sector + cc->iv_offset; init_completion(&ctx->restart); } @@ -662,8 +837,8 @@ struct convert_context *ctx, struct ablkcipher_request *req) { - struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in); - struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); + struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in); + struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out); struct dm_crypt_request *dmreq; u8 *iv; int r; @@ -674,24 +849,15 @@ dmreq->iv_sector = ctx->cc_sector; dmreq->ctx = ctx; sg_init_table(&dmreq->sg_in, 1); - sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, - bv_in->bv_offset + ctx->offset_in); + sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT, + bv_in.bv_offset); sg_init_table(&dmreq->sg_out, 1); - sg_set_page(&dmreq->sg_out, bv_out->bv_page, 1 << SECTOR_SHIFT, - bv_out->bv_offset + ctx->offset_out); + sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT, + bv_out.bv_offset); - ctx->offset_in += 1 << SECTOR_SHIFT; - if (ctx->offset_in >= bv_in->bv_len) { - ctx->offset_in = 0; - ctx->idx_in++; - } - - ctx->offset_out += 1 << SECTOR_SHIFT; - if (ctx->offset_out >= bv_out->bv_len) { - ctx->offset_out = 0; - ctx->idx_out++; - } + bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT); if (cc->iv_gen_ops) { r = cc->iv_gen_ops->generator(cc, iv, dmreq); @@ -725,11 +891,25 @@ ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); + + /* + * Use REQ_MAY_BACKLOG so a cipher driver internally backlogs + * requests if driver request queue is full. + */ ablkcipher_request_set_callback(ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, kcryptd_async_done, dmreq_of_req(cc, ctx->req)); } +static void crypt_free_req(struct crypt_config *cc, + struct ablkcipher_request *req, struct bio *base_bio) +{ + struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); + + if ((struct ablkcipher_request *)(io + 1) != req) + mempool_free(req, cc->req_pool); +} + /* * Encrypt / decrypt data from one bio to another one (can be the same one) */ @@ -740,8 +920,7 @@ atomic_set(&ctx->cc_pending, 1); - while(ctx->idx_in < ctx->bio_in->bi_vcnt && - ctx->idx_out < ctx->bio_out->bi_vcnt) { + while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { crypt_alloc_req(cc, ctx); @@ -750,24 +929,32 @@ r = crypt_convert_block(cc, ctx, ctx->req); switch (r) { - /* async */ + /* + * The request was queued by a crypto driver + * but the driver request queue is full, let's wait. + */ case -EBUSY: wait_for_completion(&ctx->restart); - INIT_COMPLETION(ctx->restart); - /* fall through*/ + reinit_completion(&ctx->restart); + /* fall through */ + /* + * The request is queued and processed asynchronously, + * completion function kcryptd_async_done() will be called. + */ case -EINPROGRESS: ctx->req = NULL; ctx->cc_sector++; continue; - - /* sync */ + /* + * The request was already processed (synchronously). + */ case 0: atomic_dec(&ctx->cc_pending); ctx->cc_sector++; cond_resched(); continue; - /* error */ + /* There was an error while processing the request. */ default: atomic_dec(&ctx->cc_pending); return r; @@ -777,57 +964,71 @@ return 0; } +static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); + /* * Generate a new unfragmented bio with the given size - * This should never violate the device limitations - * May return a smaller bio when running out of pages, indicated by - * *out_of_pages set to 1. + * This should never violate the device limitations (but only because + * max_segment_size is being constrained to PAGE_SIZE). + * + * This function may be called concurrently. If we allocate from the mempool + * concurrently, there is a possibility of deadlock. For example, if we have + * mempool of 256 pages, two processes, each wanting 256, pages allocate from + * the mempool concurrently, it may deadlock in a situation where both processes + * have allocated 128 pages and the mempool is exhausted. + * + * In order to avoid this scenario we allocate the pages under a mutex. + * + * In order to not degrade performance with excessive locking, we try + * non-blocking allocations without a mutex first but on failure we fallback + * to blocking allocations with a mutex. */ -static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, - unsigned *out_of_pages) +static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) { struct crypt_config *cc = io->cc; struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; - unsigned i, len; + gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; + unsigned i, len, remaining_size; struct page *page; + struct bio_vec *bvec; + +retry: + if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) + mutex_lock(&cc->bio_alloc_lock); clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); if (!clone) - return NULL; + goto return_clone; clone_init(io, clone); - *out_of_pages = 0; + + remaining_size = size; for (i = 0; i < nr_iovecs; i++) { page = mempool_alloc(cc->page_pool, gfp_mask); if (!page) { - *out_of_pages = 1; - break; + crypt_free_buffer_pages(cc, clone); + bio_put(clone); + gfp_mask |= __GFP_DIRECT_RECLAIM; + goto retry; } - /* - * If additional pages cannot be allocated without waiting, - * return a partially-allocated bio. The caller will then try - * to allocate more bios while submitting this partial bio. - */ - gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; + len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; - len = (size > PAGE_SIZE) ? PAGE_SIZE : size; + bvec = &clone->bi_io_vec[clone->bi_vcnt++]; + bvec->bv_page = page; + bvec->bv_len = len; + bvec->bv_offset = 0; - if (!bio_add_page(clone, page, len, 0)) { - mempool_free(page, cc->page_pool); - break; - } + clone->bi_iter.bi_size += len; - size -= len; + remaining_size -= len; } - if (!clone->bi_size) { - bio_put(clone); - return NULL; - } +return_clone: + if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) + mutex_unlock(&cc->bio_alloc_lock); return clone; } @@ -844,21 +1045,15 @@ } } -static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, - struct bio *bio, sector_t sector) +static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, + struct bio *bio, sector_t sector) { - struct dm_crypt_io *io; - - io = mempool_alloc(cc->io_pool, GFP_NOIO); io->cc = cc; io->base_bio = bio; io->sector = sector; io->error = 0; - io->base_io = NULL; io->ctx.req = NULL; atomic_set(&io->io_pending, 0); - - return io; } static void crypt_inc_pending(struct dm_crypt_io *io) @@ -869,29 +1064,21 @@ /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. - * If base_io is set, wait for the last fragment to complete. */ static void crypt_dec_pending(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; - struct dm_crypt_io *base_io = io->base_io; int error = io->error; if (!atomic_dec_and_test(&io->io_pending)) return; if (io->ctx.req) - mempool_free(io->ctx.req, cc->req_pool); - mempool_free(io, cc->io_pool); + crypt_free_req(cc, io->ctx.req, base_bio); - if (likely(!base_io)) - bio_endio(base_bio, error); - else { - if (error && !base_io->error) - base_io->error = error; - crypt_dec_pending(base_io); - } + base_bio->bi_error = error; + bio_endio(base_bio); } /* @@ -911,14 +1098,12 @@ * The work is done per CPU global for all dm-crypt instances. * They should not depend on each other and do not block. */ -static void crypt_endio(struct bio *clone, int error) +static void crypt_endio(struct bio *clone) { struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned rw = bio_data_dir(clone); - - if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) - error = -EIO; + int error; /* * free the processed pages @@ -926,6 +1111,7 @@ if (rw == WRITE) crypt_free_buffer_pages(cc, clone); + error = clone->bi_error; bio_put(clone); if (rw == READ && !error) { @@ -952,58 +1138,118 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) { struct crypt_config *cc = io->cc; - struct bio *base_bio = io->base_bio; struct bio *clone; /* - * The block layer might modify the bvec array, so always - * copy the required bvecs because we need the original - * one in order to decrypt the whole bio data *afterwards*. + * We need the original biovec array in order to decrypt + * the whole bio data *afterwards* -- thanks to immutable + * biovecs we don't need to worry about the block layer + * modifying the biovec array; so leverage bio_clone_fast(). */ - clone = bio_clone_bioset(base_bio, gfp, cc->bs); + clone = bio_clone_fast(io->base_bio, gfp, cc->bs); if (!clone) return 1; crypt_inc_pending(io); clone_init(io, clone); - clone->bi_sector = cc->start + io->sector; + clone->bi_iter.bi_sector = cc->start + io->sector; generic_make_request(clone); return 0; } +static void kcryptd_io_read_work(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + + crypt_inc_pending(io); + if (kcryptd_io_read(io, GFP_NOIO)) + io->error = -ENOMEM; + crypt_dec_pending(io); +} + +static void kcryptd_queue_read(struct dm_crypt_io *io) +{ + struct crypt_config *cc = io->cc; + + INIT_WORK(&io->work, kcryptd_io_read_work); + queue_work(cc->io_queue, &io->work); +} + static void kcryptd_io_write(struct dm_crypt_io *io) { struct bio *clone = io->ctx.bio_out; + generic_make_request(clone); } -static void kcryptd_io(struct work_struct *work) +#define crypt_io_from_node(node) rb_entry((node), struct dm_crypt_io, rb_node) + +static int dmcrypt_write(void *data) { - struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = data; + struct dm_crypt_io *io; - if (bio_data_dir(io->base_bio) == READ) { - crypt_inc_pending(io); - if (kcryptd_io_read(io, GFP_NOIO)) - io->error = -ENOMEM; - crypt_dec_pending(io); - } else - kcryptd_io_write(io); -} + while (1) { + struct rb_root write_tree; + struct blk_plug plug; -static void kcryptd_queue_io(struct dm_crypt_io *io) -{ - struct crypt_config *cc = io->cc; + DECLARE_WAITQUEUE(wait, current); - INIT_WORK(&io->work, kcryptd_io); - queue_work(cc->io_queue, &io->work); + spin_lock_irq(&cc->write_thread_wait.lock); +continue_locked: + + if (!RB_EMPTY_ROOT(&cc->write_tree)) + goto pop_from_list; + + set_current_state(TASK_INTERRUPTIBLE); + __add_wait_queue(&cc->write_thread_wait, &wait); + + spin_unlock_irq(&cc->write_thread_wait.lock); + + if (unlikely(kthread_should_stop())) { + set_task_state(current, TASK_RUNNING); + remove_wait_queue(&cc->write_thread_wait, &wait); + break; + } + + schedule(); + + set_task_state(current, TASK_RUNNING); + spin_lock_irq(&cc->write_thread_wait.lock); + __remove_wait_queue(&cc->write_thread_wait, &wait); + goto continue_locked; + +pop_from_list: + write_tree = cc->write_tree; + cc->write_tree = RB_ROOT; + spin_unlock_irq(&cc->write_thread_wait.lock); + + BUG_ON(rb_parent(write_tree.rb_node)); + + /* + * Note: we cannot walk the tree here with rb_next because + * the structures may be freed when kcryptd_io_write is called. + */ + blk_start_plug(&plug); + do { + io = crypt_io_from_node(rb_first(&write_tree)); + rb_erase(&io->rb_node, &write_tree); + kcryptd_io_write(io); + } while (!RB_EMPTY_ROOT(&write_tree)); + blk_finish_plug(&plug); + } + return 0; } static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) { struct bio *clone = io->ctx.bio_out; struct crypt_config *cc = io->cc; + unsigned long flags; + sector_t sector; + struct rb_node **rbp, *parent; if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); @@ -1013,24 +1259,38 @@ } /* crypt_convert should have filled the clone bio */ - BUG_ON(io->ctx.idx_out < clone->bi_vcnt); + BUG_ON(io->ctx.iter_out.bi_size); - clone->bi_sector = cc->start + io->sector; + clone->bi_iter.bi_sector = cc->start + io->sector; - if (async) - kcryptd_queue_io(io); - else + if (likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) { generic_make_request(clone); + return; + } + + spin_lock_irqsave(&cc->write_thread_wait.lock, flags); + rbp = &cc->write_tree.rb_node; + parent = NULL; + sector = io->sector; + while (*rbp) { + parent = *rbp; + if (sector < crypt_io_from_node(parent)->sector) + rbp = &(*rbp)->rb_left; + else + rbp = &(*rbp)->rb_right; + } + rb_link_node(&io->rb_node, parent, rbp); + rb_insert_color(&io->rb_node, &cc->write_tree); + + wake_up_locked(&cc->write_thread_wait); + spin_unlock_irqrestore(&cc->write_thread_wait.lock, flags); } static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; struct bio *clone; - struct dm_crypt_io *new_io; int crypt_finished; - unsigned out_of_pages = 0; - unsigned remaining = io->base_bio->bi_size; sector_t sector = io->sector; int r; @@ -1040,81 +1300,30 @@ crypt_inc_pending(io); crypt_convert_init(cc, &io->ctx, NULL, io->base_bio, sector); - /* - * The allocated buffers can be smaller than the whole bio, - * so repeat the whole process until all the data can be handled. - */ - while (remaining) { - clone = crypt_alloc_buffer(io, remaining, &out_of_pages); - if (unlikely(!clone)) { - io->error = -ENOMEM; - break; - } - - io->ctx.bio_out = clone; - io->ctx.idx_out = 0; - - remaining -= clone->bi_size; - sector += bio_sectors(clone); - - crypt_inc_pending(io); - - r = crypt_convert(cc, &io->ctx); - if (r < 0) - io->error = -EIO; - - crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); - - /* Encryption was already finished, submit io now */ - if (crypt_finished) { - kcryptd_crypt_write_io_submit(io, 0); - - /* - * If there was an error, do not try next fragments. - * For async, error is processed in async handler. - */ - if (unlikely(r < 0)) - break; + clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); + if (unlikely(!clone)) { + io->error = -EIO; + goto dec; + } - io->sector = sector; - } + io->ctx.bio_out = clone; + io->ctx.iter_out = clone->bi_iter; - /* - * Out of memory -> run queues - * But don't wait if split was due to the io size restriction - */ - if (unlikely(out_of_pages)) - congestion_wait(BLK_RW_ASYNC, HZ/100); + sector += bio_sectors(clone); - /* - * With async crypto it is unsafe to share the crypto context - * between fragments, so switch to a new dm_crypt_io structure. - */ - if (unlikely(!crypt_finished && remaining)) { - new_io = crypt_io_alloc(io->cc, io->base_bio, - sector); - crypt_inc_pending(new_io); - crypt_convert_init(cc, &new_io->ctx, NULL, - io->base_bio, sector); - new_io->ctx.idx_in = io->ctx.idx_in; - new_io->ctx.offset_in = io->ctx.offset_in; - - /* - * Fragments after the first use the base_io - * pending count. - */ - if (!io->base_io) - new_io->base_io = io; - else { - new_io->base_io = io->base_io; - crypt_inc_pending(io->base_io); - crypt_dec_pending(io); - } + crypt_inc_pending(io); + r = crypt_convert(cc, &io->ctx); + if (r) + io->error = -EIO; + crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); - io = new_io; - } + /* Encryption was already finished, submit io now */ + if (crypt_finished) { + kcryptd_crypt_write_io_submit(io, 0); + io->sector = sector; } +dec: crypt_dec_pending(io); } @@ -1151,6 +1360,11 @@ struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); struct crypt_config *cc = io->cc; + /* + * A request from crypto driver backlog is going to be processed now, + * finish the completion and continue in crypt_convert(). + * (Callback will be called for the second time for this request.) + */ if (error == -EINPROGRESS) { complete(&ctx->restart); return; @@ -1162,7 +1376,7 @@ if (error < 0) io->error = -EIO; - mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); + crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); if (!atomic_dec_and_test(&ctx->cc_pending)) return; @@ -1256,9 +1470,12 @@ static int crypt_setkey_allcpus(struct crypt_config *cc) { - unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); + unsigned subkey_size; int err = 0, i, r; + /* Ignore extra keys (which are used for IV etc) */ + subkey_size = (cc->key_size - cc->key_extra_size) >> ilog2(cc->tfms_count); + for (i = 0; i < cc->tfms_count; i++) { r = crypto_ablkcipher_setkey(cc->tfms[i], cc->key + (i * subkey_size), @@ -1317,6 +1534,9 @@ if (!cc) return; + if (cc->write_thread) + kthread_stop(cc->write_thread); + if (cc->io_queue) destroy_workqueue(cc->io_queue); if (cc->crypt_queue) @@ -1327,12 +1547,8 @@ if (cc->bs) bioset_free(cc->bs); - if (cc->page_pool) - mempool_destroy(cc->page_pool); - if (cc->req_pool) - mempool_destroy(cc->req_pool); - if (cc->io_pool) - mempool_destroy(cc->io_pool); + mempool_destroy(cc->page_pool); + mempool_destroy(cc->req_pool); if (cc->iv_gen_ops && cc->iv_gen_ops->dtr) cc->iv_gen_ops->dtr(cc); @@ -1351,7 +1567,7 @@ char *cipher_in, char *key) { struct crypt_config *cc = ti->private; - char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; + char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount, *qcengine; char *cipher_api = NULL; int ret = -EINVAL; char dummy; @@ -1382,12 +1598,18 @@ return -EINVAL; } cc->key_parts = cc->tfms_count; + cc->key_extra_size = 0; cc->cipher = kstrdup(cipher, GFP_KERNEL); if (!cc->cipher) goto bad_mem; chainmode = strsep(&tmp, "-"); + if (strnstr(tmp, "qce", sizeof("qce"))) + qcengine = strsep(&tmp, "-"); + else + qcengine = NULL; + ivopts = strsep(&tmp, "-"); ivmode = strsep(&ivopts, ":"); @@ -1412,8 +1634,12 @@ if (!cipher_api) goto bad_mem; - ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME, - "%s(%s)", chainmode, cipher); + if (qcengine) + ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME, + "%s-%s-%s", chainmode, cipher, qcengine); + else + ret = snprintf(cipher_api, CRYPTO_MAX_ALG_NAME, + "%s(%s)", chainmode, cipher); if (ret < 0) { kfree(cipher_api); goto bad_mem; @@ -1426,13 +1652,6 @@ goto bad; } - /* Initialize and set key */ - ret = crypt_set_key(cc, key); - if (ret < 0) { - ti->error = "Error decoding and setting key"; - goto bad; - } - /* Initialize IV */ cc->iv_size = crypto_ablkcipher_ivsize(any_tfm(cc)); if (cc->iv_size) @@ -1459,18 +1678,33 @@ cc->iv_gen_ops = &crypt_iv_null_ops; else if (strcmp(ivmode, "lmk") == 0) { cc->iv_gen_ops = &crypt_iv_lmk_ops; - /* Version 2 and 3 is recognised according + /* + * Version 2 and 3 is recognised according * to length of provided multi-key string. * If present (version 3), last key is used as IV seed. + * All keys (including IV seed) are always the same size. */ - if (cc->key_size % cc->key_parts) + if (cc->key_size % cc->key_parts) { cc->key_parts++; + cc->key_extra_size = cc->key_size / cc->key_parts; + } + } else if (strcmp(ivmode, "tcw") == 0) { + cc->iv_gen_ops = &crypt_iv_tcw_ops; + cc->key_parts += 2; /* IV + whitening */ + cc->key_extra_size = cc->iv_size + TCW_WHITENING_SIZE; } else { ret = -EINVAL; ti->error = "Invalid IV mode"; goto bad; } + /* Initialize and set key */ + ret = crypt_set_key(cc, key); + if (ret < 0) { + ti->error = "Error decoding and setting key"; + goto bad; + } + /* Allocate IV */ if (cc->iv_gen_ops && cc->iv_gen_ops->ctr) { ret = cc->iv_gen_ops->ctr(cc, ti, ivopts); @@ -1515,7 +1749,7 @@ char dummy; static struct dm_arg _args[] = { - {0, 1, "Invalid number of feature args"}, + {0, 3, "Invalid number of feature args"}, }; if (argc < 5) { @@ -1537,13 +1771,6 @@ if (ret < 0) goto bad; - ret = -ENOMEM; - cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool); - if (!cc->io_pool) { - ti->error = "Cannot allocate crypt io mempool"; - goto bad; - } - cc->dmreq_start = sizeof(struct ablkcipher_request); cc->dmreq_start += crypto_ablkcipher_reqsize(any_tfm(cc)); cc->dmreq_start = ALIGN(cc->dmreq_start, __alignof__(struct dm_crypt_request)); @@ -1561,6 +1788,7 @@ iv_size_padding = crypto_ablkcipher_alignmask(any_tfm(cc)); } + ret = -ENOMEM; cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size); if (!cc->req_pool) { @@ -1568,7 +1796,12 @@ goto bad; } - cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); + cc->per_bio_data_size = ti->per_bio_data_size = + ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + + sizeof(struct dm_crypt_request) + iv_size_padding + cc->iv_size, + ARCH_KMALLOC_MINALIGN); + + cc->page_pool = mempool_create_page_pool(BIO_MAX_PAGES, 0); if (!cc->page_pool) { ti->error = "Cannot allocate page mempool"; goto bad; @@ -1580,6 +1813,8 @@ goto bad; } + mutex_init(&cc->bio_alloc_lock); + ret = -EINVAL; if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid iv_offset sector"; @@ -1587,11 +1822,13 @@ } cc->iv_offset = tmpll; - if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) { + ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev); + if (ret) { ti->error = "Device lookup failed"; goto bad; } + ret = -EINVAL; if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; @@ -1610,38 +1847,59 @@ if (ret) goto bad; - opt_string = dm_shift_arg(&as); + ret = -EINVAL; + while (opt_params--) { + opt_string = dm_shift_arg(&as); + if (!opt_string) { + ti->error = "Not enough feature arguments"; + goto bad; + } + + if (!strcasecmp(opt_string, "allow_discards")) + ti->num_discard_bios = 1; - if (opt_params == 1 && opt_string && - !strcasecmp(opt_string, "allow_discards")) - ti->num_discard_bios = 1; - else if (opt_params) { - ret = -EINVAL; - ti->error = "Invalid feature arguments"; - goto bad; + else if (!strcasecmp(opt_string, "same_cpu_crypt")) + set_bit(DM_CRYPT_SAME_CPU, &cc->flags); + + else if (!strcasecmp(opt_string, "submit_from_crypt_cpus")) + set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + + else { + ti->error = "Invalid feature arguments"; + goto bad; + } } } ret = -ENOMEM; - cc->io_queue = alloc_workqueue("kcryptd_io", - WQ_NON_REENTRANT| - WQ_MEM_RECLAIM, - 1); + cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1); if (!cc->io_queue) { ti->error = "Couldn't create kcryptd io queue"; goto bad; } - cc->crypt_queue = alloc_workqueue("kcryptd", - WQ_NON_REENTRANT| - WQ_CPU_INTENSIVE| - WQ_MEM_RECLAIM, - 1); + if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) + cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1); + else + cc->crypt_queue = alloc_workqueue("kcryptd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, + num_online_cpus()); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; goto bad; } + init_waitqueue_head(&cc->write_thread_wait); + cc->write_tree = RB_ROOT; + + cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write"); + if (IS_ERR(cc->write_thread)) { + ret = PTR_ERR(cc->write_thread); + cc->write_thread = NULL; + ti->error = "Couldn't spawn write thread"; + goto bad; + } + wake_up_process(cc->write_thread); + ti->num_flush_bios = 1; ti->discard_zeroes_data_unsupported = true; @@ -1665,15 +1923,25 @@ if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { bio->bi_bdev = cc->dev->bdev; if (bio_sectors(bio)) - bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); + bio->bi_iter.bi_sector = cc->start + + dm_target_offset(ti, bio->bi_iter.bi_sector); return DM_MAPIO_REMAPPED; } - io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_sector)); + /* + * Check if bio is too large, split as needed. + */ + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + bio_data_dir(bio) == WRITE) + dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + + io = dm_per_bio_data(bio, cc->per_bio_data_size); + crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); + io->ctx.req = (struct ablkcipher_request *)(io + 1); if (bio_data_dir(io->base_bio) == READ) { if (kcryptd_io_read(io, GFP_NOWAIT)) - kcryptd_queue_io(io); + kcryptd_queue_read(io); } else kcryptd_queue_crypt(io); @@ -1685,6 +1953,7 @@ { struct crypt_config *cc = ti->private; unsigned i, sz = 0; + int num_feature_args = 0; switch (type) { case STATUSTYPE_INFO: @@ -1703,8 +1972,18 @@ DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); - if (ti->num_discard_bios) - DMEMIT(" 1 allow_discards"); + num_feature_args += !!ti->num_discard_bios; + num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags); + num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags); + if (num_feature_args) { + DMEMIT(" %d", num_feature_args); + if (ti->num_discard_bios) + DMEMIT(" allow_discards"); + if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) + DMEMIT(" same_cpu_crypt"); + if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) + DMEMIT(" submit_from_crypt_cpus"); + } break; } @@ -1776,21 +2055,6 @@ return -EINVAL; } -static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, - struct bio_vec *biovec, int max_size) -{ - struct crypt_config *cc = ti->private; - struct request_queue *q = bdev_get_queue(cc->dev->bdev); - - if (!q->merge_bvec_fn) - return max_size; - - bvm->bi_bdev = cc->dev->bdev; - bvm->bi_sector = cc->start + dm_target_offset(ti, bvm->bi_sector); - - return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); -} - static int crypt_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -1799,9 +2063,20 @@ return fn(ti, cc->dev, cc->start, ti->len, data); } +static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + /* + * Unfortunate constraint that is required to avoid the potential + * for exceeding underlying device's max_segments limits -- due to + * crypt_alloc_buffer() possibly allocating pages for the encryption + * bio that are not as physically contiguous as the original bio. + */ + limits->max_segment_size = PAGE_SIZE; +} + static struct target_type crypt_target = { .name = "crypt", - .version = {1, 12, 1}, + .version = {1, 14, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -1811,23 +2086,17 @@ .preresume = crypt_preresume, .resume = crypt_resume, .message = crypt_message, - .merge = crypt_merge, .iterate_devices = crypt_iterate_devices, + .io_hints = crypt_io_hints, }; static int __init dm_crypt_init(void) { int r; - _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0); - if (!_crypt_io_pool) - return -ENOMEM; - r = dm_register_target(&crypt_target); - if (r < 0) { + if (r < 0) DMERR("register failed %d", r); - kmem_cache_destroy(_crypt_io_pool); - } return r; } @@ -1835,12 +2104,11 @@ static void __exit dm_crypt_exit(void) { dm_unregister_target(&crypt_target); - kmem_cache_destroy(_crypt_io_pool); } module_init(dm_crypt_init); module_exit(dm_crypt_exit); -MODULE_AUTHOR("Christophe Saout "); +MODULE_AUTHOR("Jana Saout "); MODULE_DESCRIPTION(DM_NAME " target for transparent encryption / decryption"); MODULE_LICENSE("GPL");