--- zzzz-none-000/linux-3.10.107/fs/btrfs/raid56.c	2017-06-27 09:49:32.000000000 +0000
+++ scorpion-7490-727/linux-3.10.107/fs/btrfs/raid56.c	2021-02-04 17:41:59.000000000 +0000
@@ -33,7 +33,6 @@
 #include <linux/raid/xor.h>
 #include <linux/vmalloc.h>
 #include <asm/div64.h>
-#include "compat.h"
 #include "ctree.h"
 #include "extent_map.h"
 #include "disk-io.h"
@@ -59,20 +58,19 @@
  */
 #define RBIO_CACHE_READY_BIT	3
 
-
 #define RBIO_CACHE_SIZE 1024
 
+enum btrfs_rbio_ops {
+	BTRFS_RBIO_WRITE,
+	BTRFS_RBIO_READ_REBUILD,
+	BTRFS_RBIO_PARITY_SCRUB,
+	BTRFS_RBIO_REBUILD_MISSING,
+};
+
 struct btrfs_raid_bio {
 	struct btrfs_fs_info *fs_info;
 	struct btrfs_bio *bbio;
 
-	/*
-	 * logical block numbers for the start of each stripe
-	 * The last one or two are p/q.  These are sorted,
-	 * so raid_map[0] is the start of our full stripe
-	 */
-	u64 *raid_map;
-
 	/* while we're doing rmw on a stripe
 	 * we put it into a hash table so we can
 	 * lock the stripe and merge more rbios
@@ -118,13 +116,16 @@
 	/* number of data stripes (no p/q) */
 	int nr_data;
 
+	int real_stripes;
+
+	int stripe_npages;
 	/*
 	 * set if we're doing a parity rebuild
 	 * for a read from higher up, which is handled
 	 * differently from a parity rebuild as part of
 	 * rmw
 	 */
-	int read_rebuild;
+	enum btrfs_rbio_ops operation;
 
 	/* first bad stripe */
 	int faila;
@@ -132,6 +133,7 @@
 	/* second bad stripe (for raid6 use) */
 	int failb;
 
+	int scrubp;
 	/*
 	 * number of pages needed to represent the full
 	 * stripe
@@ -145,8 +147,13 @@
 	 */
 	int bio_list_bytes;
 
+	int generic_bio_cnt;
+
 	atomic_t refs;
 
+	atomic_t stripes_pending;
+
+	atomic_t error;
 	/*
 	 * these are two arrays of pointers.  We allocate the
 	 * rbio big enough to hold them both and setup their
@@ -163,6 +170,11 @@
 	 * here for faster lookup
 	 */
 	struct page **bio_pages;
+
+	/*
+	 * bitmap to record which horizontal stripe has data
+	 */
+	unsigned long *dbitmap;
 };
 
 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
@@ -177,6 +189,10 @@
 static void index_rbio_pages(struct btrfs_raid_bio *rbio);
 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
 
+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
+					 int need_check);
+static void async_scrub_parity(struct btrfs_raid_bio *rbio);
+
 /*
  * the stripe hash table is used for locking, and to collect
  * bios in hopes of making a full stripe
@@ -222,12 +238,8 @@
 	}
 
 	x = cmpxchg(&info->stripe_hash_table, NULL, table);
-	if (x) {
-		if (is_vmalloc_addr(x))
-			vfree(x);
-		else
-			kfree(x);
-	}
+	if (x)
+		kvfree(x);
 	return 0;
 }
 
@@ -272,7 +284,7 @@
  */
 static int rbio_bucket(struct btrfs_raid_bio *rbio)
 {
-	u64 num = rbio->raid_map[0];
+	u64 num = rbio->bbio->raid_map[0];
 
 	/*
 	 * we shift down quite a bit.  We're using byte
@@ -325,6 +337,7 @@
 {
 	bio_list_merge(&dest->bio_list, &victim->bio_list);
 	dest->bio_list_bytes += victim->bio_list_bytes;
+	dest->generic_bio_cnt += victim->generic_bio_cnt;
 	bio_list_init(&victim->bio_list);
 }
 
@@ -437,10 +450,7 @@
 	if (!info->stripe_hash_table)
 		return;
 	btrfs_clear_rbio_cache(info);
-	if (is_vmalloc_addr(info->stripe_hash_table))
-		vfree(info->stripe_hash_table);
-	else
-		kfree(info->stripe_hash_table);
+	kvfree(info->stripe_hash_table);
 	info->stripe_hash_table = NULL;
 }
 
@@ -574,15 +584,28 @@
 	    test_bit(RBIO_CACHE_BIT, &cur->flags))
 		return 0;
 
-	if (last->raid_map[0] !=
-	    cur->raid_map[0])
+	if (last->bbio->raid_map[0] !=
+	    cur->bbio->raid_map[0])
 		return 0;
 
-	/* reads can't merge with writes */
-	if (last->read_rebuild !=
-	    cur->read_rebuild) {
+	/* we can't merge with different operations */
+	if (last->operation != cur->operation)
+		return 0;
+	/*
+	 * We've need read the full stripe from the drive.
+	 * check and repair the parity and write the new results.
+	 *
+	 * We're not allowed to add any new bios to the
+	 * bio list here, anyone else that wants to
+	 * change this stripe needs to do their own rmw.
+	 */
+	if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
+	    cur->operation == BTRFS_RBIO_PARITY_SCRUB)
+		return 0;
+
+	if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
+	    cur->operation == BTRFS_RBIO_REBUILD_MISSING)
 		return 0;
-	}
 
 	return 1;
 }
@@ -602,7 +625,7 @@
  */
 static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
 {
-	if (rbio->nr_data + 1 == rbio->bbio->num_stripes)
+	if (rbio->nr_data + 1 == rbio->real_stripes)
 		return NULL;
 
 	index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
@@ -648,7 +671,7 @@
 	spin_lock_irqsave(&h->lock, flags);
 	list_for_each_entry(cur, &h->hash_list, hash_list) {
 		walk++;
-		if (cur->raid_map[0] == rbio->raid_map[0]) {
+		if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
 			spin_lock(&cur->bio_list_lock);
 
 			/* can we steal this cached rbio's pages? */
@@ -773,15 +796,25 @@
 			spin_unlock(&rbio->bio_list_lock);
 			spin_unlock_irqrestore(&h->lock, flags);
 
-			if (next->read_rebuild)
+			if (next->operation == BTRFS_RBIO_READ_REBUILD)
 				async_read_rebuild(next);
-			else {
+			else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
+				steal_rbio(rbio, next);
+				async_read_rebuild(next);
+			} else if (next->operation == BTRFS_RBIO_WRITE) {
 				steal_rbio(rbio, next);
 				async_rmw_stripe(next);
+			} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
+				steal_rbio(rbio, next);
+				async_scrub_parity(next);
 			}
 
 			goto done_nolock;
-		} else  if (waitqueue_active(&h->wait)) {
+			/*
+			 * The barrier for this waitqueue_active is not needed,
+			 * we're protected by h->lock and can't miss a wakeup.
+			 */
+		} else if (waitqueue_active(&h->wait)) {
 			spin_unlock(&rbio->bio_list_lock);
 			spin_unlock_irqrestore(&h->lock, flags);
 			wake_up(&h->wait);
@@ -815,8 +848,8 @@
 			rbio->stripe_pages[i] = NULL;
 		}
 	}
-	kfree(rbio->raid_map);
-	kfree(rbio->bbio);
+
+	btrfs_put_bbio(rbio->bbio);
 	kfree(rbio);
 }
 
@@ -830,18 +863,21 @@
  * this frees the rbio and runs through all the bios in the
  * bio_list and calls end_io on them
  */
-static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
+static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
 {
 	struct bio *cur = bio_list_get(&rbio->bio_list);
 	struct bio *next;
+
+	if (rbio->generic_bio_cnt)
+		btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
+
 	free_raid_bio(rbio);
 
 	while (cur) {
 		next = cur->bi_next;
 		cur->bi_next = NULL;
-		if (uptodate)
-			set_bit(BIO_UPTODATE, &cur->bi_flags);
-		bio_endio(cur, err);
+		cur->bi_error = err;
+		bio_endio(cur);
 		cur = next;
 	}
 }
@@ -850,25 +886,26 @@
  * end io function used by finish_rmw.  When we finally
  * get here, we've written a full stripe
  */
-static void raid_write_end_io(struct bio *bio, int err)
+static void raid_write_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
+	int err = bio->bi_error;
 
 	if (err)
 		fail_bio_stripe(rbio, bio);
 
 	bio_put(bio);
 
-	if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
+	if (!atomic_dec_and_test(&rbio->stripes_pending))
 		return;
 
 	err = 0;
 
 	/* OK, we have read all the stripes we need to. */
-	if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
+	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
 		err = -EIO;
 
-	rbio_orig_end_io(rbio, err, 0);
+	rbio_orig_end_io(rbio, err);
 	return;
 }
 
@@ -913,7 +950,7 @@
 static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
 {
 	unsigned long nr = stripe_len * nr_stripes;
-	return (nr + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
 }
 
 /*
@@ -921,21 +958,20 @@
  * this does not allocate any pages for rbio->pages.
  */
 static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
-			  struct btrfs_bio *bbio, u64 *raid_map,
-			  u64 stripe_len)
+			  struct btrfs_bio *bbio, u64 stripe_len)
 {
 	struct btrfs_raid_bio *rbio;
 	int nr_data = 0;
-	int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes);
+	int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
+	int num_pages = rbio_nr_pages(stripe_len, real_stripes);
+	int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
 	void *p;
 
-	rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2,
+	rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
+		       DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
 			GFP_NOFS);
-	if (!rbio) {
-		kfree(raid_map);
-		kfree(bbio);
+	if (!rbio)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	bio_list_init(&rbio->bio_list);
 	INIT_LIST_HEAD(&rbio->plug_list);
@@ -943,13 +979,16 @@
 	INIT_LIST_HEAD(&rbio->stripe_cache);
 	INIT_LIST_HEAD(&rbio->hash_list);
 	rbio->bbio = bbio;
-	rbio->raid_map = raid_map;
 	rbio->fs_info = root->fs_info;
 	rbio->stripe_len = stripe_len;
 	rbio->nr_pages = num_pages;
+	rbio->real_stripes = real_stripes;
+	rbio->stripe_npages = stripe_npages;
 	rbio->faila = -1;
 	rbio->failb = -1;
 	atomic_set(&rbio->refs, 1);
+	atomic_set(&rbio->error, 0);
+	atomic_set(&rbio->stripes_pending, 0);
 
 	/*
 	 * the stripe_pages and bio_pages array point to the extra
@@ -958,11 +997,14 @@
 	p = rbio + 1;
 	rbio->stripe_pages = p;
 	rbio->bio_pages = p + sizeof(struct page *) * num_pages;
+	rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
 
-	if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
-		nr_data = bbio->num_stripes - 2;
+	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
+		nr_data = real_stripes - 1;
+	else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
+		nr_data = real_stripes - 2;
 	else
-		nr_data = bbio->num_stripes - 1;
+		BUG();
 
 	rbio->nr_data = nr_data;
 	return rbio;
@@ -1033,15 +1075,15 @@
 
 	/* see if we can add this page onto our existing bio */
 	if (last) {
-		last_end = (u64)last->bi_sector << 9;
-		last_end += last->bi_size;
+		last_end = (u64)last->bi_iter.bi_sector << 9;
+		last_end += last->bi_iter.bi_size;
 
 		/*
 		 * we can't merge these if they are from different
 		 * devices or if they are not contiguous
 		 */
 		if (last_end == disk_start && stripe->dev->bdev &&
-		    test_bit(BIO_UPTODATE, &last->bi_flags) &&
+		    !last->bi_error &&
 		    last->bi_bdev == stripe->dev->bdev) {
 			ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
 			if (ret == PAGE_CACHE_SIZE)
@@ -1054,10 +1096,9 @@
 	if (!bio)
 		return -ENOMEM;
 
-	bio->bi_size = 0;
+	bio->bi_iter.bi_size = 0;
 	bio->bi_bdev = stripe->dev->bdev;
-	bio->bi_sector = disk_start >> 9;
-	set_bit(BIO_UPTODATE, &bio->bi_flags);
+	bio->bi_iter.bi_sector = disk_start >> 9;
 
 	bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
 	bio_list_add(bio_list, bio);
@@ -1074,7 +1115,7 @@
 static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
 {
 	if (rbio->faila >= 0 || rbio->failb >= 0) {
-		BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1);
+		BUG_ON(rbio->faila == rbio->real_stripes - 1);
 		__raid56_parity_recover(rbio);
 	} else {
 		finish_rmw(rbio);
@@ -1112,8 +1153,8 @@
 
 	spin_lock_irq(&rbio->bio_list_lock);
 	bio_list_for_each(bio, &rbio->bio_list) {
-		start = (u64)bio->bi_sector << 9;
-		stripe_offset = start - rbio->raid_map[0];
+		start = (u64)bio->bi_iter.bi_sector << 9;
+		stripe_offset = start - rbio->bbio->raid_map[0];
 		page_index = stripe_offset >> PAGE_CACHE_SHIFT;
 
 		for (i = 0; i < bio->bi_vcnt; i++) {
@@ -1135,7 +1176,7 @@
 static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 {
 	struct btrfs_bio *bbio = rbio->bbio;
-	void *pointers[bbio->num_stripes];
+	void *pointers[rbio->real_stripes];
 	int stripe_len = rbio->stripe_len;
 	int nr_data = rbio->nr_data;
 	int stripe;
@@ -1149,11 +1190,11 @@
 
 	bio_list_init(&bio_list);
 
-	if (bbio->num_stripes - rbio->nr_data == 1) {
-		p_stripe = bbio->num_stripes - 1;
-	} else if (bbio->num_stripes - rbio->nr_data == 2) {
-		p_stripe = bbio->num_stripes - 2;
-		q_stripe = bbio->num_stripes - 1;
+	if (rbio->real_stripes - rbio->nr_data == 1) {
+		p_stripe = rbio->real_stripes - 1;
+	} else if (rbio->real_stripes - rbio->nr_data == 2) {
+		p_stripe = rbio->real_stripes - 2;
+		q_stripe = rbio->real_stripes - 1;
 	} else {
 		BUG();
 	}
@@ -1170,7 +1211,7 @@
 	set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
 	spin_unlock_irq(&rbio->bio_list_lock);
 
-	atomic_set(&rbio->bbio->error, 0);
+	atomic_set(&rbio->error, 0);
 
 	/*
 	 * now that we've set rmw_locked, run through the
@@ -1210,7 +1251,7 @@
 			SetPageUptodate(p);
 			pointers[stripe++] = kmap(p);
 
-			raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE,
+			raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
 						pointers);
 		} else {
 			/* raid5 */
@@ -1219,7 +1260,7 @@
 		}
 
 
-		for (stripe = 0; stripe < bbio->num_stripes; stripe++)
+		for (stripe = 0; stripe < rbio->real_stripes; stripe++)
 			kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
 	}
 
@@ -1228,7 +1269,7 @@
 	 * higher layers (the bio_list in our rbio) and our p/q.  Ignore
 	 * everything else.
 	 */
-	for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
+	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
 		for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
 			struct page *page;
 			if (stripe < rbio->nr_data) {
@@ -1246,8 +1287,34 @@
 		}
 	}
 
-	atomic_set(&bbio->stripes_pending, bio_list_size(&bio_list));
-	BUG_ON(atomic_read(&bbio->stripes_pending) == 0);
+	if (likely(!bbio->num_tgtdevs))
+		goto write_data;
+
+	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+		if (!bbio->tgtdev_map[stripe])
+			continue;
+
+		for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+			struct page *page;
+			if (stripe < rbio->nr_data) {
+				page = page_in_rbio(rbio, stripe, pagenr, 1);
+				if (!page)
+					continue;
+			} else {
+			       page = rbio_stripe_page(rbio, stripe, pagenr);
+			}
+
+			ret = rbio_add_io_page(rbio, &bio_list, page,
+					       rbio->bbio->tgtdev_map[stripe],
+					       pagenr, rbio->stripe_len);
+			if (ret)
+				goto cleanup;
+		}
+	}
+
+write_data:
+	atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
+	BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
 
 	while (1) {
 		bio = bio_list_pop(&bio_list);
@@ -1256,13 +1323,12 @@
 
 		bio->bi_private = rbio;
 		bio->bi_end_io = raid_write_end_io;
-		BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
 		submit_bio(WRITE, bio);
 	}
 	return;
 
 cleanup:
-	rbio_orig_end_io(rbio, -EIO, 0);
+	rbio_orig_end_io(rbio, -EIO);
 }
 
 /*
@@ -1273,7 +1339,7 @@
 static int find_bio_stripe(struct btrfs_raid_bio *rbio,
 			   struct bio *bio)
 {
-	u64 physical = bio->bi_sector;
+	u64 physical = bio->bi_iter.bi_sector;
 	u64 stripe_start;
 	int i;
 	struct btrfs_bio_stripe *stripe;
@@ -1284,7 +1350,8 @@
 		stripe = &rbio->bbio->stripes[i];
 		stripe_start = stripe->physical;
 		if (physical >= stripe_start &&
-		    physical < stripe_start + rbio->stripe_len) {
+		    physical < stripe_start + rbio->stripe_len &&
+		    bio->bi_bdev == stripe->dev->bdev) {
 			return i;
 		}
 	}
@@ -1299,14 +1366,14 @@
 static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
 				   struct bio *bio)
 {
-	u64 logical = bio->bi_sector;
+	u64 logical = bio->bi_iter.bi_sector;
 	u64 stripe_start;
 	int i;
 
 	logical <<= 9;
 
 	for (i = 0; i < rbio->nr_data; i++) {
-		stripe_start = rbio->raid_map[i];
+		stripe_start = rbio->bbio->raid_map[i];
 		if (logical >= stripe_start &&
 		    logical < stripe_start + rbio->stripe_len) {
 			return i;
@@ -1332,11 +1399,11 @@
 	if (rbio->faila == -1) {
 		/* first failure on this rbio */
 		rbio->faila = failed;
-		atomic_inc(&rbio->bbio->error);
+		atomic_inc(&rbio->error);
 	} else if (rbio->failb == -1) {
 		/* second failure on this rbio */
 		rbio->failb = failed;
-		atomic_inc(&rbio->bbio->error);
+		atomic_inc(&rbio->error);
 	} else {
 		ret = -EIO;
 	}
@@ -1384,22 +1451,21 @@
  * This will usually kick off finish_rmw once all the bios are read in, but it
  * may trigger parity reconstruction if we had any errors along the way
  */
-static void raid_rmw_end_io(struct bio *bio, int err)
+static void raid_rmw_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
-	if (err)
+	if (bio->bi_error)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
 
 	bio_put(bio);
 
-	if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
+	if (!atomic_dec_and_test(&rbio->stripes_pending))
 		return;
 
-	err = 0;
-	if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
+	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
 		goto cleanup;
 
 	/*
@@ -1412,25 +1478,25 @@
 
 cleanup:
 
-	rbio_orig_end_io(rbio, -EIO, 0);
+	rbio_orig_end_io(rbio, -EIO);
 }
 
 static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
 {
-	rbio->work.flags = 0;
-	rbio->work.func = rmw_work;
+	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
+			rmw_work, NULL, NULL);
 
-	btrfs_queue_worker(&rbio->fs_info->rmw_workers,
-			   &rbio->work);
+	btrfs_queue_work(rbio->fs_info->rmw_workers,
+			 &rbio->work);
 }
 
 static void async_read_rebuild(struct btrfs_raid_bio *rbio)
 {
-	rbio->work.flags = 0;
-	rbio->work.func = read_rebuild_work;
+	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
+			read_rebuild_work, NULL, NULL);
 
-	btrfs_queue_worker(&rbio->fs_info->rmw_workers,
-			   &rbio->work);
+	btrfs_queue_work(rbio->fs_info->rmw_workers,
+			 &rbio->work);
 }
 
 /*
@@ -1440,10 +1506,9 @@
 static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
 {
 	int bios_to_read = 0;
-	struct btrfs_bio *bbio = rbio->bbio;
 	struct bio_list bio_list;
 	int ret;
-	int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
 	int pagenr;
 	int stripe;
 	struct bio *bio;
@@ -1456,7 +1521,7 @@
 
 	index_rbio_pages(rbio);
 
-	atomic_set(&rbio->bbio->error, 0);
+	atomic_set(&rbio->error, 0);
 	/*
 	 * build a list of bios to read all the missing parts of this
 	 * stripe
@@ -1504,7 +1569,7 @@
 	 * the bbio may be freed once we submit the last bio.  Make sure
 	 * not to touch it after that
 	 */
-	atomic_set(&bbio->stripes_pending, bios_to_read);
+	atomic_set(&rbio->stripes_pending, bios_to_read);
 	while (1) {
 		bio = bio_list_pop(&bio_list);
 		if (!bio)
@@ -1516,14 +1581,13 @@
 		btrfs_bio_wq_end_io(rbio->fs_info, bio,
 				    BTRFS_WQ_ENDIO_RAID56);
 
-		BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
 		submit_bio(READ, bio);
 	}
 	/* the actual write will happen once the reads are done */
 	return 0;
 
 cleanup:
-	rbio_orig_end_io(rbio, -EIO, 0);
+	rbio_orig_end_io(rbio, -EIO);
 	return -EIO;
 
 finish:
@@ -1540,8 +1604,10 @@
 	int ret;
 
 	ret = alloc_rbio_parity_pages(rbio);
-	if (ret)
+	if (ret) {
+		__free_raid_bio(rbio);
 		return ret;
+	}
 
 	ret = lock_stripe_add(rbio);
 	if (ret == 0)
@@ -1601,8 +1667,8 @@
 						 plug_list);
 	struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
 						 plug_list);
-	u64 a_sector = ra->bio_list.head->bi_sector;
-	u64 b_sector = rb->bio_list.head->bi_sector;
+	u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
+	u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
 
 	if (a_sector < b_sector)
 		return -1;
@@ -1666,10 +1732,10 @@
 	plug = container_of(cb, struct btrfs_plug_cb, cb);
 
 	if (from_schedule) {
-		plug->work.flags = 0;
-		plug->work.func = unplug_work;
-		btrfs_queue_worker(&plug->info->rmw_workers,
-				   &plug->work);
+		btrfs_init_work(&plug->work, btrfs_rmw_helper,
+				unplug_work, NULL, NULL);
+		btrfs_queue_work(plug->info->rmw_workers,
+				 &plug->work);
 		return;
 	}
 	run_plug(plug);
@@ -1679,28 +1745,35 @@
  * our main entry point for writes from the rest of the FS.
  */
 int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
-			struct btrfs_bio *bbio, u64 *raid_map,
-			u64 stripe_len)
+			struct btrfs_bio *bbio, u64 stripe_len)
 {
 	struct btrfs_raid_bio *rbio;
 	struct btrfs_plug_cb *plug = NULL;
 	struct blk_plug_cb *cb;
+	int ret;
 
-	rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
+	rbio = alloc_rbio(root, bbio, stripe_len);
 	if (IS_ERR(rbio)) {
-		kfree(raid_map);
-		kfree(bbio);
+		btrfs_put_bbio(bbio);
 		return PTR_ERR(rbio);
 	}
 	bio_list_add(&rbio->bio_list, bio);
-	rbio->bio_list_bytes = bio->bi_size;
+	rbio->bio_list_bytes = bio->bi_iter.bi_size;
+	rbio->operation = BTRFS_RBIO_WRITE;
+
+	btrfs_bio_counter_inc_noblocked(root->fs_info);
+	rbio->generic_bio_cnt = 1;
 
 	/*
 	 * don't plug on full rbios, just get them out the door
 	 * as quickly as we can
 	 */
-	if (rbio_is_full(rbio))
-		return full_stripe_write(rbio);
+	if (rbio_is_full(rbio)) {
+		ret = full_stripe_write(rbio);
+		if (ret)
+			btrfs_bio_counter_dec(root->fs_info);
+		return ret;
+	}
 
 	cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
 			       sizeof(*plug));
@@ -1711,10 +1784,13 @@
 			INIT_LIST_HEAD(&plug->rbio_list);
 		}
 		list_add_tail(&rbio->plug_list, &plug->rbio_list);
+		ret = 0;
 	} else {
-		return __raid56_parity_write(rbio);
+		ret = __raid56_parity_write(rbio);
+		if (ret)
+			btrfs_bio_counter_dec(root->fs_info);
 	}
-	return 0;
+	return ret;
 }
 
 /*
@@ -1727,13 +1803,12 @@
 	int pagenr, stripe;
 	void **pointers;
 	int faila = -1, failb = -1;
-	int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
 	struct page *page;
 	int err;
 	int i;
 
-	pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *),
-			   GFP_NOFS);
+	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
 	if (!pointers) {
 		err = -ENOMEM;
 		goto cleanup_io;
@@ -1742,7 +1817,8 @@
 	faila = rbio->faila;
 	failb = rbio->failb;
 
-	if (rbio->read_rebuild) {
+	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
 		spin_lock_irq(&rbio->bio_list_lock);
 		set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
 		spin_unlock_irq(&rbio->bio_list_lock);
@@ -1751,15 +1827,24 @@
 	index_rbio_pages(rbio);
 
 	for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+		/*
+		 * Now we just use bitmap to mark the horizontal stripes in
+		 * which we have data when doing parity scrub.
+		 */
+		if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
+		    !test_bit(pagenr, rbio->dbitmap))
+			continue;
+
 		/* setup our array of pointers with pages
 		 * from each stripe
 		 */
-		for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
+		for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
 			/*
 			 * if we're rebuilding a read, we have to use
 			 * pages from the bio list
 			 */
-			if (rbio->read_rebuild &&
+			if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+			     rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
 			    (stripe == faila || stripe == failb)) {
 				page = page_in_rbio(rbio, stripe, pagenr, 0);
 			} else {
@@ -1769,9 +1854,7 @@
 		}
 
 		/* all raid6 handling here */
-		if (rbio->raid_map[rbio->bbio->num_stripes - 1] ==
-		    RAID6_Q_STRIPE) {
-
+		if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
 			/*
 			 * single failure, rebuild from parity raid5
 			 * style
@@ -1806,8 +1889,9 @@
 			 * here due to a crc mismatch and we can't give them the
 			 * data they want
 			 */
-			if (rbio->raid_map[failb] == RAID6_Q_STRIPE) {
-				if (rbio->raid_map[faila] == RAID5_P_STRIPE) {
+			if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
+				if (rbio->bbio->raid_map[faila] ==
+				    RAID5_P_STRIPE) {
 					err = -EIO;
 					goto cleanup;
 				}
@@ -1818,11 +1902,11 @@
 				goto pstripe;
 			}
 
-			if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
-				raid6_datap_recov(rbio->bbio->num_stripes,
+			if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
+				raid6_datap_recov(rbio->real_stripes,
 						  PAGE_SIZE, faila, pointers);
 			} else {
-				raid6_2data_recov(rbio->bbio->num_stripes,
+				raid6_2data_recov(rbio->real_stripes,
 						  PAGE_SIZE, faila, failb,
 						  pointers);
 			}
@@ -1852,7 +1936,7 @@
 		 * know they can be trusted.  If this was a read reconstruction,
 		 * other endio functions will fiddle the uptodate bits
 		 */
-		if (!rbio->read_rebuild) {
+		if (rbio->operation == BTRFS_RBIO_WRITE) {
 			for (i = 0;  i < nr_pages; i++) {
 				if (faila != -1) {
 					page = rbio_stripe_page(rbio, faila, i);
@@ -1864,12 +1948,13 @@
 				}
 			}
 		}
-		for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
+		for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
 			/*
 			 * if we're rebuilding a read, we have to use
 			 * pages from the bio list
 			 */
-			if (rbio->read_rebuild &&
+			if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+			     rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
 			    (stripe == faila || stripe == failb)) {
 				page = page_in_rbio(rbio, stripe, pagenr, 0);
 			} else {
@@ -1884,20 +1969,27 @@
 	kfree(pointers);
 
 cleanup_io:
-
-	if (rbio->read_rebuild) {
+	if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
 		if (err == 0)
 			cache_rbio_pages(rbio);
 		else
 			clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 
-		rbio_orig_end_io(rbio, err, err == 0);
+		rbio_orig_end_io(rbio, err);
+	} else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
+		rbio_orig_end_io(rbio, err);
 	} else if (err == 0) {
 		rbio->faila = -1;
 		rbio->failb = -1;
-		finish_rmw(rbio);
+
+		if (rbio->operation == BTRFS_RBIO_WRITE)
+			finish_rmw(rbio);
+		else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
+			finish_parity_scrub(rbio, 0);
+		else
+			BUG();
 	} else {
-		rbio_orig_end_io(rbio, err, 0);
+		rbio_orig_end_io(rbio, err);
 	}
 }
 
@@ -1905,7 +1997,7 @@
  * This is called only for stripes we've read from disk to
  * reconstruct the parity.
  */
-static void raid_recover_end_io(struct bio *bio, int err)
+static void raid_recover_end_io(struct bio *bio)
 {
 	struct btrfs_raid_bio *rbio = bio->bi_private;
 
@@ -1913,17 +2005,17 @@
 	 * we only read stripe pages off the disk, set them
 	 * up to date if there were no errors
 	 */
-	if (err)
+	if (bio->bi_error)
 		fail_bio_stripe(rbio, bio);
 	else
 		set_bio_pages_uptodate(bio);
 	bio_put(bio);
 
-	if (!atomic_dec_and_test(&rbio->bbio->stripes_pending))
+	if (!atomic_dec_and_test(&rbio->stripes_pending))
 		return;
 
-	if (atomic_read(&rbio->bbio->error) > rbio->bbio->max_errors)
-		rbio_orig_end_io(rbio, -EIO, 0);
+	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+		rbio_orig_end_io(rbio, -EIO);
 	else
 		__raid_recover_end_io(rbio);
 }
@@ -1939,10 +2031,9 @@
 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
 {
 	int bios_to_read = 0;
-	struct btrfs_bio *bbio = rbio->bbio;
 	struct bio_list bio_list;
 	int ret;
-	int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
 	int pagenr;
 	int stripe;
 	struct bio *bio;
@@ -1953,17 +2044,18 @@
 	if (ret)
 		goto cleanup;
 
-	atomic_set(&rbio->bbio->error, 0);
+	atomic_set(&rbio->error, 0);
 
 	/*
 	 * read everything that hasn't failed.  Thanks to the
 	 * stripe cache, it is possible that some or all of these
 	 * pages are going to be uptodate.
 	 */
-	for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
-		if (rbio->faila == stripe ||
-		    rbio->failb == stripe)
+	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+		if (rbio->faila == stripe || rbio->failb == stripe) {
+			atomic_inc(&rbio->error);
 			continue;
+		}
 
 		for (pagenr = 0; pagenr < nr_pages; pagenr++) {
 			struct page *p;
@@ -1991,7 +2083,7 @@
 		 * were up to date, or we might have no bios to read because
 		 * the devices were gone.
 		 */
-		if (atomic_read(&rbio->bbio->error) <= rbio->bbio->max_errors) {
+		if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
 			__raid_recover_end_io(rbio);
 			goto out;
 		} else {
@@ -2003,7 +2095,7 @@
 	 * the bbio may be freed once we submit the last bio.  Make sure
 	 * not to touch it after that
 	 */
-	atomic_set(&bbio->stripes_pending, bios_to_read);
+	atomic_set(&rbio->stripes_pending, bios_to_read);
 	while (1) {
 		bio = bio_list_pop(&bio_list);
 		if (!bio)
@@ -2015,15 +2107,15 @@
 		btrfs_bio_wq_end_io(rbio->fs_info, bio,
 				    BTRFS_WQ_ENDIO_RAID56);
 
-		BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
 		submit_bio(READ, bio);
 	}
 out:
 	return 0;
 
 cleanup:
-	if (rbio->read_rebuild)
-		rbio_orig_end_io(rbio, -EIO, 0);
+	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
+		rbio_orig_end_io(rbio, -EIO);
 	return -EIO;
 }
 
@@ -2034,34 +2126,45 @@
  * of the drive.
  */
 int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
-			  struct btrfs_bio *bbio, u64 *raid_map,
-			  u64 stripe_len, int mirror_num)
+			  struct btrfs_bio *bbio, u64 stripe_len,
+			  int mirror_num, int generic_io)
 {
 	struct btrfs_raid_bio *rbio;
 	int ret;
 
-	rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
+	rbio = alloc_rbio(root, bbio, stripe_len);
 	if (IS_ERR(rbio)) {
+		if (generic_io)
+			btrfs_put_bbio(bbio);
 		return PTR_ERR(rbio);
 	}
 
-	rbio->read_rebuild = 1;
+	rbio->operation = BTRFS_RBIO_READ_REBUILD;
 	bio_list_add(&rbio->bio_list, bio);
-	rbio->bio_list_bytes = bio->bi_size;
+	rbio->bio_list_bytes = bio->bi_iter.bi_size;
 
 	rbio->faila = find_logical_bio_stripe(rbio, bio);
 	if (rbio->faila == -1) {
 		BUG();
+		if (generic_io)
+			btrfs_put_bbio(bbio);
 		kfree(rbio);
 		return -EIO;
 	}
 
+	if (generic_io) {
+		btrfs_bio_counter_inc_noblocked(root->fs_info);
+		rbio->generic_bio_cnt = 1;
+	} else {
+		btrfs_get_bbio(bbio);
+	}
+
 	/*
 	 * reconstruct from the q stripe if they are
 	 * asking for mirror 3
 	 */
 	if (mirror_num == 3)
-		rbio->failb = bbio->num_stripes - 2;
+		rbio->failb = rbio->real_stripes - 2;
 
 	ret = lock_stripe_add(rbio);
 
@@ -2098,3 +2201,535 @@
 	rbio = container_of(work, struct btrfs_raid_bio, work);
 	__raid56_parity_recover(rbio);
 }
+
+/*
+ * The following code is used to scrub/replace the parity stripe
+ *
+ * Note: We need make sure all the pages that add into the scrub/replace
+ * raid bio are correct and not be changed during the scrub/replace. That
+ * is those pages just hold metadata or file data with checksum.
+ */
+
+struct btrfs_raid_bio *
+raid56_parity_alloc_scrub_rbio(struct btrfs_root *root, struct bio *bio,
+			       struct btrfs_bio *bbio, u64 stripe_len,
+			       struct btrfs_device *scrub_dev,
+			       unsigned long *dbitmap, int stripe_nsectors)
+{
+	struct btrfs_raid_bio *rbio;
+	int i;
+
+	rbio = alloc_rbio(root, bbio, stripe_len);
+	if (IS_ERR(rbio))
+		return NULL;
+	bio_list_add(&rbio->bio_list, bio);
+	/*
+	 * This is a special bio which is used to hold the completion handler
+	 * and make the scrub rbio is similar to the other types
+	 */
+	ASSERT(!bio->bi_iter.bi_size);
+	rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
+
+	for (i = 0; i < rbio->real_stripes; i++) {
+		if (bbio->stripes[i].dev == scrub_dev) {
+			rbio->scrubp = i;
+			break;
+		}
+	}
+
+	/* Now we just support the sectorsize equals to page size */
+	ASSERT(root->sectorsize == PAGE_SIZE);
+	ASSERT(rbio->stripe_npages == stripe_nsectors);
+	bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
+
+	return rbio;
+}
+
+/* Used for both parity scrub and missing. */
+void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
+			    u64 logical)
+{
+	int stripe_offset;
+	int index;
+
+	ASSERT(logical >= rbio->bbio->raid_map[0]);
+	ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
+				rbio->stripe_len * rbio->nr_data);
+	stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
+	index = stripe_offset >> PAGE_CACHE_SHIFT;
+	rbio->bio_pages[index] = page;
+}
+
+/*
+ * We just scrub the parity that we have correct data on the same horizontal,
+ * so we needn't allocate all pages for all the stripes.
+ */
+static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
+{
+	int i;
+	int bit;
+	int index;
+	struct page *page;
+
+	for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) {
+		for (i = 0; i < rbio->real_stripes; i++) {
+			index = i * rbio->stripe_npages + bit;
+			if (rbio->stripe_pages[index])
+				continue;
+
+			page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+			if (!page)
+				return -ENOMEM;
+			rbio->stripe_pages[index] = page;
+			ClearPageUptodate(page);
+		}
+	}
+	return 0;
+}
+
+/*
+ * end io function used by finish_rmw.  When we finally
+ * get here, we've written a full stripe
+ */
+static void raid_write_parity_end_io(struct bio *bio)
+{
+	struct btrfs_raid_bio *rbio = bio->bi_private;
+	int err = bio->bi_error;
+
+	if (bio->bi_error)
+		fail_bio_stripe(rbio, bio);
+
+	bio_put(bio);
+
+	if (!atomic_dec_and_test(&rbio->stripes_pending))
+		return;
+
+	err = 0;
+
+	if (atomic_read(&rbio->error))
+		err = -EIO;
+
+	rbio_orig_end_io(rbio, err);
+}
+
+static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
+					 int need_check)
+{
+	struct btrfs_bio *bbio = rbio->bbio;
+	void *pointers[rbio->real_stripes];
+	DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
+	int nr_data = rbio->nr_data;
+	int stripe;
+	int pagenr;
+	int p_stripe = -1;
+	int q_stripe = -1;
+	struct page *p_page = NULL;
+	struct page *q_page = NULL;
+	struct bio_list bio_list;
+	struct bio *bio;
+	int is_replace = 0;
+	int ret;
+
+	bio_list_init(&bio_list);
+
+	if (rbio->real_stripes - rbio->nr_data == 1) {
+		p_stripe = rbio->real_stripes - 1;
+	} else if (rbio->real_stripes - rbio->nr_data == 2) {
+		p_stripe = rbio->real_stripes - 2;
+		q_stripe = rbio->real_stripes - 1;
+	} else {
+		BUG();
+	}
+
+	if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
+		is_replace = 1;
+		bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
+	}
+
+	/*
+	 * Because the higher layers(scrubber) are unlikely to
+	 * use this area of the disk again soon, so don't cache
+	 * it.
+	 */
+	clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
+
+	if (!need_check)
+		goto writeback;
+
+	p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+	if (!p_page)
+		goto cleanup;
+	SetPageUptodate(p_page);
+
+	if (q_stripe != -1) {
+		q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+		if (!q_page) {
+			__free_page(p_page);
+			goto cleanup;
+		}
+		SetPageUptodate(q_page);
+	}
+
+	atomic_set(&rbio->error, 0);
+
+	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
+		struct page *p;
+		void *parity;
+		/* first collect one page from each data stripe */
+		for (stripe = 0; stripe < nr_data; stripe++) {
+			p = page_in_rbio(rbio, stripe, pagenr, 0);
+			pointers[stripe] = kmap(p);
+		}
+
+		/* then add the parity stripe */
+		pointers[stripe++] = kmap(p_page);
+
+		if (q_stripe != -1) {
+
+			/*
+			 * raid6, add the qstripe and call the
+			 * library function to fill in our p/q
+			 */
+			pointers[stripe++] = kmap(q_page);
+
+			raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
+						pointers);
+		} else {
+			/* raid5 */
+			memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+			run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+		}
+
+		/* Check scrubbing pairty and repair it */
+		p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
+		parity = kmap(p);
+		if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
+			memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
+		else
+			/* Parity is right, needn't writeback */
+			bitmap_clear(rbio->dbitmap, pagenr, 1);
+		kunmap(p);
+
+		for (stripe = 0; stripe < rbio->real_stripes; stripe++)
+			kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
+	}
+
+	__free_page(p_page);
+	if (q_page)
+		__free_page(q_page);
+
+writeback:
+	/*
+	 * time to start writing.  Make bios for everything from the
+	 * higher layers (the bio_list in our rbio) and our p/q.  Ignore
+	 * everything else.
+	 */
+	for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
+		struct page *page;
+
+		page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
+		ret = rbio_add_io_page(rbio, &bio_list,
+			       page, rbio->scrubp, pagenr, rbio->stripe_len);
+		if (ret)
+			goto cleanup;
+	}
+
+	if (!is_replace)
+		goto submit_write;
+
+	for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) {
+		struct page *page;
+
+		page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
+		ret = rbio_add_io_page(rbio, &bio_list, page,
+				       bbio->tgtdev_map[rbio->scrubp],
+				       pagenr, rbio->stripe_len);
+		if (ret)
+			goto cleanup;
+	}
+
+submit_write:
+	nr_data = bio_list_size(&bio_list);
+	if (!nr_data) {
+		/* Every parity is right */
+		rbio_orig_end_io(rbio, 0);
+		return;
+	}
+
+	atomic_set(&rbio->stripes_pending, nr_data);
+
+	while (1) {
+		bio = bio_list_pop(&bio_list);
+		if (!bio)
+			break;
+
+		bio->bi_private = rbio;
+		bio->bi_end_io = raid_write_parity_end_io;
+		submit_bio(WRITE, bio);
+	}
+	return;
+
+cleanup:
+	rbio_orig_end_io(rbio, -EIO);
+}
+
+static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
+{
+	if (stripe >= 0 && stripe < rbio->nr_data)
+		return 1;
+	return 0;
+}
+
+/*
+ * While we're doing the parity check and repair, we could have errors
+ * in reading pages off the disk.  This checks for errors and if we're
+ * not able to read the page it'll trigger parity reconstruction.  The
+ * parity scrub will be finished after we've reconstructed the failed
+ * stripes
+ */
+static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
+{
+	if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+		goto cleanup;
+
+	if (rbio->faila >= 0 || rbio->failb >= 0) {
+		int dfail = 0, failp = -1;
+
+		if (is_data_stripe(rbio, rbio->faila))
+			dfail++;
+		else if (is_parity_stripe(rbio->faila))
+			failp = rbio->faila;
+
+		if (is_data_stripe(rbio, rbio->failb))
+			dfail++;
+		else if (is_parity_stripe(rbio->failb))
+			failp = rbio->failb;
+
+		/*
+		 * Because we can not use a scrubbing parity to repair
+		 * the data, so the capability of the repair is declined.
+		 * (In the case of RAID5, we can not repair anything)
+		 */
+		if (dfail > rbio->bbio->max_errors - 1)
+			goto cleanup;
+
+		/*
+		 * If all data is good, only parity is correctly, just
+		 * repair the parity.
+		 */
+		if (dfail == 0) {
+			finish_parity_scrub(rbio, 0);
+			return;
+		}
+
+		/*
+		 * Here means we got one corrupted data stripe and one
+		 * corrupted parity on RAID6, if the corrupted parity
+		 * is scrubbing parity, luckly, use the other one to repair
+		 * the data, or we can not repair the data stripe.
+		 */
+		if (failp != rbio->scrubp)
+			goto cleanup;
+
+		__raid_recover_end_io(rbio);
+	} else {
+		finish_parity_scrub(rbio, 1);
+	}
+	return;
+
+cleanup:
+	rbio_orig_end_io(rbio, -EIO);
+}
+
+/*
+ * end io for the read phase of the rmw cycle.  All the bios here are physical
+ * stripe bios we've read from the disk so we can recalculate the parity of the
+ * stripe.
+ *
+ * This will usually kick off finish_rmw once all the bios are read in, but it
+ * may trigger parity reconstruction if we had any errors along the way
+ */
+static void raid56_parity_scrub_end_io(struct bio *bio)
+{
+	struct btrfs_raid_bio *rbio = bio->bi_private;
+
+	if (bio->bi_error)
+		fail_bio_stripe(rbio, bio);
+	else
+		set_bio_pages_uptodate(bio);
+
+	bio_put(bio);
+
+	if (!atomic_dec_and_test(&rbio->stripes_pending))
+		return;
+
+	/*
+	 * this will normally call finish_rmw to start our write
+	 * but if there are any failed stripes we'll reconstruct
+	 * from parity first
+	 */
+	validate_rbio_for_parity_scrub(rbio);
+}
+
+static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
+{
+	int bios_to_read = 0;
+	struct bio_list bio_list;
+	int ret;
+	int pagenr;
+	int stripe;
+	struct bio *bio;
+
+	ret = alloc_rbio_essential_pages(rbio);
+	if (ret)
+		goto cleanup;
+
+	bio_list_init(&bio_list);
+
+	atomic_set(&rbio->error, 0);
+	/*
+	 * build a list of bios to read all the missing parts of this
+	 * stripe
+	 */
+	for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
+		for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
+			struct page *page;
+			/*
+			 * we want to find all the pages missing from
+			 * the rbio and read them from the disk.  If
+			 * page_in_rbio finds a page in the bio list
+			 * we don't need to read it off the stripe.
+			 */
+			page = page_in_rbio(rbio, stripe, pagenr, 1);
+			if (page)
+				continue;
+
+			page = rbio_stripe_page(rbio, stripe, pagenr);
+			/*
+			 * the bio cache may have handed us an uptodate
+			 * page.  If so, be happy and use it
+			 */
+			if (PageUptodate(page))
+				continue;
+
+			ret = rbio_add_io_page(rbio, &bio_list, page,
+				       stripe, pagenr, rbio->stripe_len);
+			if (ret)
+				goto cleanup;
+		}
+	}
+
+	bios_to_read = bio_list_size(&bio_list);
+	if (!bios_to_read) {
+		/*
+		 * this can happen if others have merged with
+		 * us, it means there is nothing left to read.
+		 * But if there are missing devices it may not be
+		 * safe to do the full stripe write yet.
+		 */
+		goto finish;
+	}
+
+	/*
+	 * the bbio may be freed once we submit the last bio.  Make sure
+	 * not to touch it after that
+	 */
+	atomic_set(&rbio->stripes_pending, bios_to_read);
+	while (1) {
+		bio = bio_list_pop(&bio_list);
+		if (!bio)
+			break;
+
+		bio->bi_private = rbio;
+		bio->bi_end_io = raid56_parity_scrub_end_io;
+
+		btrfs_bio_wq_end_io(rbio->fs_info, bio,
+				    BTRFS_WQ_ENDIO_RAID56);
+
+		submit_bio(READ, bio);
+	}
+	/* the actual write will happen once the reads are done */
+	return;
+
+cleanup:
+	rbio_orig_end_io(rbio, -EIO);
+	return;
+
+finish:
+	validate_rbio_for_parity_scrub(rbio);
+}
+
+static void scrub_parity_work(struct btrfs_work *work)
+{
+	struct btrfs_raid_bio *rbio;
+
+	rbio = container_of(work, struct btrfs_raid_bio, work);
+	raid56_parity_scrub_stripe(rbio);
+}
+
+static void async_scrub_parity(struct btrfs_raid_bio *rbio)
+{
+	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
+			scrub_parity_work, NULL, NULL);
+
+	btrfs_queue_work(rbio->fs_info->rmw_workers,
+			 &rbio->work);
+}
+
+void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
+{
+	if (!lock_stripe_add(rbio))
+		async_scrub_parity(rbio);
+}
+
+/* The following code is used for dev replace of a missing RAID 5/6 device. */
+
+struct btrfs_raid_bio *
+raid56_alloc_missing_rbio(struct btrfs_root *root, struct bio *bio,
+			  struct btrfs_bio *bbio, u64 length)
+{
+	struct btrfs_raid_bio *rbio;
+
+	rbio = alloc_rbio(root, bbio, length);
+	if (IS_ERR(rbio))
+		return NULL;
+
+	rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
+	bio_list_add(&rbio->bio_list, bio);
+	/*
+	 * This is a special bio which is used to hold the completion handler
+	 * and make the scrub rbio is similar to the other types
+	 */
+	ASSERT(!bio->bi_iter.bi_size);
+
+	rbio->faila = find_logical_bio_stripe(rbio, bio);
+	if (rbio->faila == -1) {
+		BUG();
+		kfree(rbio);
+		return NULL;
+	}
+
+	return rbio;
+}
+
+static void missing_raid56_work(struct btrfs_work *work)
+{
+	struct btrfs_raid_bio *rbio;
+
+	rbio = container_of(work, struct btrfs_raid_bio, work);
+	__raid56_parity_recover(rbio);
+}
+
+static void async_missing_raid56(struct btrfs_raid_bio *rbio)
+{
+	btrfs_init_work(&rbio->work, btrfs_rmw_helper,
+			missing_raid56_work, NULL, NULL);
+
+	btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
+}
+
+void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
+{
+	if (!lock_stripe_add(rbio))
+		async_missing_raid56(rbio);
+}