--- zzzz-none-000/linux-3.10.107/drivers/mtd/nand/gpmi-nand/gpmi-lib.c	2017-06-27 09:49:32.000000000 +0000
+++ scorpion-7490-727/linux-3.10.107/drivers/mtd/nand/gpmi-nand/gpmi-lib.c	2021-02-04 17:41:59.000000000 +0000
@@ -20,6 +20,7 @@
  */
 #include <linux/delay.h>
 #include <linux/clk.h>
+#include <linux/slab.h>
 
 #include "gpmi-nand.h"
 #include "gpmi-regs.h"
@@ -187,6 +188,12 @@
 	/* Select BCH ECC. */
 	writel(BM_GPMI_CTRL1_BCH_MODE, r->gpmi_regs + HW_GPMI_CTRL1_SET);
 
+	/*
+	 * Decouple the chip select from dma channel. We use dma0 for all
+	 * the chips.
+	 */
+	writel(BM_GPMI_CTRL1_DECOUPLE_CS, r->gpmi_regs + HW_GPMI_CTRL1_SET);
+
 	gpmi_disable_clk(this);
 	return 0;
 err_out:
@@ -201,30 +208,41 @@
 	u32 reg;
 	int i;
 
-	pr_err("Show GPMI registers :\n");
+	dev_err(this->dev, "Show GPMI registers :\n");
 	for (i = 0; i <= HW_GPMI_DEBUG / 0x10 + 1; i++) {
 		reg = readl(r->gpmi_regs + i * 0x10);
-		pr_err("offset 0x%.3x : 0x%.8x\n", i * 0x10, reg);
+		dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg);
 	}
 
 	/* start to print out the BCH info */
-	pr_err("Show BCH registers :\n");
+	dev_err(this->dev, "Show BCH registers :\n");
 	for (i = 0; i <= HW_BCH_VERSION / 0x10 + 1; i++) {
 		reg = readl(r->bch_regs + i * 0x10);
-		pr_err("offset 0x%.3x : 0x%.8x\n", i * 0x10, reg);
+		dev_err(this->dev, "offset 0x%.3x : 0x%.8x\n", i * 0x10, reg);
 	}
-	pr_err("BCH Geometry :\n");
-	pr_err("GF length              : %u\n", geo->gf_len);
-	pr_err("ECC Strength           : %u\n", geo->ecc_strength);
-	pr_err("Page Size in Bytes     : %u\n", geo->page_size);
-	pr_err("Metadata Size in Bytes : %u\n", geo->metadata_size);
-	pr_err("ECC Chunk Size in Bytes: %u\n", geo->ecc_chunk_size);
-	pr_err("ECC Chunk Count        : %u\n", geo->ecc_chunk_count);
-	pr_err("Payload Size in Bytes  : %u\n", geo->payload_size);
-	pr_err("Auxiliary Size in Bytes: %u\n", geo->auxiliary_size);
-	pr_err("Auxiliary Status Offset: %u\n", geo->auxiliary_status_offset);
-	pr_err("Block Mark Byte Offset : %u\n", geo->block_mark_byte_offset);
-	pr_err("Block Mark Bit Offset  : %u\n", geo->block_mark_bit_offset);
+	dev_err(this->dev, "BCH Geometry :\n"
+		"GF length              : %u\n"
+		"ECC Strength           : %u\n"
+		"Page Size in Bytes     : %u\n"
+		"Metadata Size in Bytes : %u\n"
+		"ECC Chunk Size in Bytes: %u\n"
+		"ECC Chunk Count        : %u\n"
+		"Payload Size in Bytes  : %u\n"
+		"Auxiliary Size in Bytes: %u\n"
+		"Auxiliary Status Offset: %u\n"
+		"Block Mark Byte Offset : %u\n"
+		"Block Mark Bit Offset  : %u\n",
+		geo->gf_len,
+		geo->ecc_strength,
+		geo->page_size,
+		geo->metadata_size,
+		geo->ecc_chunk_size,
+		geo->ecc_chunk_count,
+		geo->payload_size,
+		geo->auxiliary_size,
+		geo->auxiliary_status_offset,
+		geo->block_mark_byte_offset,
+		geo->block_mark_bit_offset);
 }
 
 /* Configures the geometry for BCH.  */
@@ -259,8 +277,8 @@
 	* chip, otherwise it will lock up. So we skip resetting BCH on the MX23.
 	* On the other hand, the MX28 needs the reset, because one case has been
 	* seen where the BCH produced ECC errors constantly after 10000
-	* consecutive reboots. The latter case has not been seen on the MX23 yet,
-	* still we don't know if it could happen there as well.
+	* consecutive reboots. The latter case has not been seen on the MX23
+	* yet, still we don't know if it could happen there as well.
 	*/
 	ret = gpmi_reset_block(r->bch_regs, GPMI_IS_MX23(this));
 	if (ret)
@@ -347,7 +365,7 @@
 	improved_timing_is_available =
 		(target.tREA_in_ns  >= 0) &&
 		(target.tRLOH_in_ns >= 0) &&
-		(target.tRHOH_in_ns >= 0) ;
+		(target.tRHOH_in_ns >= 0);
 
 	/* Inspect the clock. */
 	nfc->clock_frequency_in_hz = clk_get_rate(r->clock[0]);
@@ -843,7 +861,7 @@
 	struct resources *r = &this->resources;
 	unsigned long rate = clk_get_rate(r->clock[0]);
 	int mode = this->timing_mode;
-	int dll_threshold = 16; /* in ns */
+	int dll_threshold = this->devdata->max_chain_delay;
 	unsigned long delay;
 	unsigned long clk_period;
 	int t_rea;
@@ -868,9 +886,6 @@
 	/* [3] for GPMI_HW_GPMI_CTRL1 */
 	hw->wrn_dly_sel = BV_GPMI_CTRL1_WRN_DLY_SEL_NO_DELAY;
 
-	if (GPMI_IS_MX6Q(this))
-		dll_threshold = 12;
-
 	/*
 	 * Enlarge 10 times for the numerator and denominator in {3}.
 	 * This make us to get more accurate result.
@@ -905,10 +920,14 @@
 	struct resources  *r = &this->resources;
 	struct nand_chip *nand = &this->nand;
 	struct mtd_info	 *mtd = &this->mtd;
-	uint8_t feature[ONFI_SUBFEATURE_PARAM_LEN] = {};
+	uint8_t *feature;
 	unsigned long rate;
 	int ret;
 
+	feature = kzalloc(ONFI_SUBFEATURE_PARAM_LEN, GFP_KERNEL);
+	if (!feature)
+		return -ENOMEM;
+
 	nand->select_chip(mtd, 0);
 
 	/* [1] send SET FEATURE commond to NAND */
@@ -936,11 +955,13 @@
 
 	this->flags |= GPMI_ASYNC_EDO_ENABLED;
 	this->timing_mode = mode;
+	kfree(feature);
 	dev_info(this->dev, "enable the asynchronous EDO mode %d\n", mode);
 	return 0;
 
 err_out:
 	nand->select_chip(mtd, -1);
+	kfree(feature);
 	dev_err(this->dev, "mode:%d ,failed in set feature.\n", mode);
 	return -EINVAL;
 }
@@ -950,7 +971,7 @@
 	struct nand_chip *chip = &this->nand;
 
 	/* Enable the asynchronous EDO feature. */
-	if (GPMI_IS_MX6Q(this) && chip->onfi_version) {
+	if (GPMI_IS_MX6(this) && chip->onfi_version) {
 		int mode = onfi_get_async_timing_mode(chip);
 
 		/* We only support the timing mode 4 and mode 5. */
@@ -980,7 +1001,7 @@
 	/* Enable the clock. */
 	ret = gpmi_enable_clk(this);
 	if (ret) {
-		pr_err("We failed in enable the clk\n");
+		dev_err(this->dev, "We failed in enable the clk\n");
 		goto err_out;
 	}
 
@@ -997,7 +1018,7 @@
 	/* [1] Set HW_GPMI_TIMING0 */
 	reg = BF_GPMI_TIMING0_ADDRESS_SETUP(hw.address_setup_in_cycles) |
 		BF_GPMI_TIMING0_DATA_HOLD(hw.data_hold_in_cycles)         |
-		BF_GPMI_TIMING0_DATA_SETUP(hw.data_setup_in_cycles)       ;
+		BF_GPMI_TIMING0_DATA_SETUP(hw.data_setup_in_cycles);
 
 	writel(reg, gpmi_regs + HW_GPMI_TIMING0);
 
@@ -1072,12 +1093,19 @@
 	if (GPMI_IS_MX23(this)) {
 		mask = MX23_BM_GPMI_DEBUG_READY0 << chip;
 		reg = readl(r->gpmi_regs + HW_GPMI_DEBUG);
-	} else if (GPMI_IS_MX28(this) || GPMI_IS_MX6Q(this)) {
+	} else if (GPMI_IS_MX28(this) || GPMI_IS_MX6(this)) {
+		/*
+		 * In the imx6, all the ready/busy pins are bound
+		 * together. So we only need to check chip 0.
+		 */
+		if (GPMI_IS_MX6(this))
+			chip = 0;
+
 		/* MX28 shares the same R/B register as MX6Q. */
 		mask = MX28_BF_GPMI_STAT_READY_BUSY(1 << chip);
 		reg = readl(r->gpmi_regs + HW_GPMI_STAT);
 	} else
-		pr_err("unknow arch.\n");
+		dev_err(this->dev, "unknown arch.\n");
 	return reg & mask;
 }
 
@@ -1108,10 +1136,8 @@
 	desc = dmaengine_prep_slave_sg(channel,
 					(struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
-	if (!desc) {
-		pr_err("step 1 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [2] send out the COMMAND + ADDRESS string stored in @buffer */
 	sgl = &this->cmd_sgl;
@@ -1121,11 +1147,8 @@
 	desc = dmaengine_prep_slave_sg(channel,
 				sgl, 1, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-
-	if (!desc) {
-		pr_err("step 2 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [3] submit the DMA */
 	set_dma_type(this, DMA_FOR_COMMAND);
@@ -1154,20 +1177,17 @@
 	pio[1] = 0;
 	desc = dmaengine_prep_slave_sg(channel, (struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
-	if (!desc) {
-		pr_err("step 1 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [2] send DMA request */
 	prepare_data_dma(this, DMA_TO_DEVICE);
 	desc = dmaengine_prep_slave_sg(channel, &this->data_sgl,
 					1, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!desc) {
-		pr_err("step 2 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
+
 	/* [3] submit the DMA */
 	set_dma_type(this, DMA_FOR_WRITE_DATA);
 	return start_dma_without_bch_irq(this, desc);
@@ -1191,20 +1211,16 @@
 	desc = dmaengine_prep_slave_sg(channel,
 					(struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE, 0);
-	if (!desc) {
-		pr_err("step 1 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [2] : send DMA request */
 	prepare_data_dma(this, DMA_FROM_DEVICE);
 	desc = dmaengine_prep_slave_sg(channel, &this->data_sgl,
 					1, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!desc) {
-		pr_err("step 2 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [3] : submit the DMA */
 	set_dma_type(this, DMA_FOR_READ_DATA);
@@ -1249,10 +1265,9 @@
 					(struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE,
 					DMA_CTRL_ACK);
-	if (!desc) {
-		pr_err("step 2 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
+
 	set_dma_type(this, DMA_FOR_WRITE_ECC_PAGE);
 	return start_dma_with_bch_irq(this, desc);
 }
@@ -1284,10 +1299,8 @@
 	desc = dmaengine_prep_slave_sg(channel,
 				(struct scatterlist *)pio, 2,
 				DMA_TRANS_NONE, 0);
-	if (!desc) {
-		pr_err("step 1 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [2] Enable the BCH block and read. */
 	command_mode = BV_GPMI_CTRL0_COMMAND_MODE__READ;
@@ -1314,10 +1327,8 @@
 					(struct scatterlist *)pio,
 					ARRAY_SIZE(pio), DMA_TRANS_NONE,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!desc) {
-		pr_err("step 2 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [3] Disable the BCH block */
 	command_mode = BV_GPMI_CTRL0_COMMAND_MODE__WAIT_FOR_READY;
@@ -1335,12 +1346,163 @@
 				(struct scatterlist *)pio, 3,
 				DMA_TRANS_NONE,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!desc) {
-		pr_err("step 3 error\n");
-		return -1;
-	}
+	if (!desc)
+		return -EINVAL;
 
 	/* [4] submit the DMA */
 	set_dma_type(this, DMA_FOR_READ_ECC_PAGE);
 	return start_dma_with_bch_irq(this, desc);
 }
+
+/**
+ * gpmi_copy_bits - copy bits from one memory region to another
+ * @dst: destination buffer
+ * @dst_bit_off: bit offset we're starting to write at
+ * @src: source buffer
+ * @src_bit_off: bit offset we're starting to read from
+ * @nbits: number of bits to copy
+ *
+ * This functions copies bits from one memory region to another, and is used by
+ * the GPMI driver to copy ECC sections which are not guaranteed to be byte
+ * aligned.
+ *
+ * src and dst should not overlap.
+ *
+ */
+void gpmi_copy_bits(u8 *dst, size_t dst_bit_off,
+		    const u8 *src, size_t src_bit_off,
+		    size_t nbits)
+{
+	size_t i;
+	size_t nbytes;
+	u32 src_buffer = 0;
+	size_t bits_in_src_buffer = 0;
+
+	if (!nbits)
+		return;
+
+	/*
+	 * Move src and dst pointers to the closest byte pointer and store bit
+	 * offsets within a byte.
+	 */
+	src += src_bit_off / 8;
+	src_bit_off %= 8;
+
+	dst += dst_bit_off / 8;
+	dst_bit_off %= 8;
+
+	/*
+	 * Initialize the src_buffer value with bits available in the first
+	 * byte of data so that we end up with a byte aligned src pointer.
+	 */
+	if (src_bit_off) {
+		src_buffer = src[0] >> src_bit_off;
+		if (nbits >= (8 - src_bit_off)) {
+			bits_in_src_buffer += 8 - src_bit_off;
+		} else {
+			src_buffer &= GENMASK(nbits - 1, 0);
+			bits_in_src_buffer += nbits;
+		}
+		nbits -= bits_in_src_buffer;
+		src++;
+	}
+
+	/* Calculate the number of bytes that can be copied from src to dst. */
+	nbytes = nbits / 8;
+
+	/* Try to align dst to a byte boundary. */
+	if (dst_bit_off) {
+		if (bits_in_src_buffer < (8 - dst_bit_off) && nbytes) {
+			src_buffer |= src[0] << bits_in_src_buffer;
+			bits_in_src_buffer += 8;
+			src++;
+			nbytes--;
+		}
+
+		if (bits_in_src_buffer >= (8 - dst_bit_off)) {
+			dst[0] &= GENMASK(dst_bit_off - 1, 0);
+			dst[0] |= src_buffer << dst_bit_off;
+			src_buffer >>= (8 - dst_bit_off);
+			bits_in_src_buffer -= (8 - dst_bit_off);
+			dst_bit_off = 0;
+			dst++;
+			if (bits_in_src_buffer > 7) {
+				bits_in_src_buffer -= 8;
+				dst[0] = src_buffer;
+				dst++;
+				src_buffer >>= 8;
+			}
+		}
+	}
+
+	if (!bits_in_src_buffer && !dst_bit_off) {
+		/*
+		 * Both src and dst pointers are byte aligned, thus we can
+		 * just use the optimized memcpy function.
+		 */
+		if (nbytes)
+			memcpy(dst, src, nbytes);
+	} else {
+		/*
+		 * src buffer is not byte aligned, hence we have to copy each
+		 * src byte to the src_buffer variable before extracting a byte
+		 * to store in dst.
+		 */
+		for (i = 0; i < nbytes; i++) {
+			src_buffer |= src[i] << bits_in_src_buffer;
+			dst[i] = src_buffer;
+			src_buffer >>= 8;
+		}
+	}
+	/* Update dst and src pointers */
+	dst += nbytes;
+	src += nbytes;
+
+	/*
+	 * nbits is the number of remaining bits. It should not exceed 8 as
+	 * we've already copied as much bytes as possible.
+	 */
+	nbits %= 8;
+
+	/*
+	 * If there's no more bits to copy to the destination and src buffer
+	 * was already byte aligned, then we're done.
+	 */
+	if (!nbits && !bits_in_src_buffer)
+		return;
+
+	/* Copy the remaining bits to src_buffer */
+	if (nbits)
+		src_buffer |= (*src & GENMASK(nbits - 1, 0)) <<
+			      bits_in_src_buffer;
+	bits_in_src_buffer += nbits;
+
+	/*
+	 * In case there were not enough bits to get a byte aligned dst buffer
+	 * prepare the src_buffer variable to match the dst organization (shift
+	 * src_buffer by dst_bit_off and retrieve the least significant bits
+	 * from dst).
+	 */
+	if (dst_bit_off)
+		src_buffer = (src_buffer << dst_bit_off) |
+			     (*dst & GENMASK(dst_bit_off - 1, 0));
+	bits_in_src_buffer += dst_bit_off;
+
+	/*
+	 * Keep most significant bits from dst if we end up with an unaligned
+	 * number of bits.
+	 */
+	nbytes = bits_in_src_buffer / 8;
+	if (bits_in_src_buffer % 8) {
+		src_buffer |= (dst[nbytes] &
+			       GENMASK(7, bits_in_src_buffer % 8)) <<
+			      (nbytes * 8);
+		nbytes++;
+	}
+
+	/* Copy the remaining bytes to dst */
+	for (i = 0; i < nbytes; i++) {
+		dst[i] = src_buffer;
+		src_buffer >>= 8;
+	}
+}