Merge branch 'master' of git://git.denx.de/u-boot-spi
diff --git a/arch/arm/dts/socfpga.dtsi b/arch/arm/dts/socfpga.dtsi
index 969e5ad..bf791c5 100644
--- a/arch/arm/dts/socfpga.dtsi
+++ b/arch/arm/dts/socfpga.dtsi
@@ -637,19 +637,19 @@
 			interrupts = <0 151 4>;
 			clocks = <&qspi_clk>;
 			ext-decoder = <0>;  /* external decoder */
-			num-chipselect = <4>;
+			num-cs = <4>;
 			fifo-depth = <128>;
 			bus-num = <2>;
 			status = "disabled";
 		};
 
 		spi0: spi@fff00000 {
-			compatible = "snps,dw-spi-mmio";
+			compatible = "snps,dw-apb-ssi";
 			#address-cells = <1>;
 			#size-cells = <0>;
 			reg = <0xfff00000 0x1000>;
 			interrupts = <0 154 4>;
-			num-chipselect = <4>;
+			num-cs = <4>;
 			bus-num = <0>;
 			tx-dma-channel = <&pdma 16>;
 			rx-dma-channel = <&pdma 17>;
@@ -658,12 +658,12 @@
 		};
 
 		spi1: spi@fff01000 {
-			compatible = "snps,dw-spi-mmio";
+			compatible = "snps,dw-apb-ssi";
 			#address-cells = <1>;
 			#size-cells = <0>;
 			reg = <0xfff01000 0x1000>;
 			interrupts = <0 156 4>;
-			num-chipselect = <4>;
+			num-cs = <4>;
 			bus-num = <1>;
 			tx-dma-channel = <&pdma 20>;
 			rx-dma-channel = <&pdma 21>;
diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
index fa95b19..98ae3b8 100644
--- a/drivers/spi/cadence_qspi.c
+++ b/drivers/spi/cadence_qspi.c
@@ -297,7 +297,7 @@
 
 	/* All other paramters are embedded in the child node */
 	subnode = fdt_first_subnode(blob, node);
-	if (!subnode) {
+	if (subnode < 0) {
 		printf("Error: subnode with SPI flash config missing!\n");
 		return -ENODEV;
 	}
diff --git a/drivers/spi/designware_spi.c b/drivers/spi/designware_spi.c
index 98c9f03..700f616 100644
--- a/drivers/spi/designware_spi.c
+++ b/drivers/spi/designware_spi.c
@@ -164,13 +164,13 @@
 	if (!priv->fifo_len) {
 		u32 fifo;
 
-		for (fifo = 2; fifo <= 257; fifo++) {
+		for (fifo = 2; fifo <= 256; fifo++) {
 			dw_writew(priv, DW_SPI_TXFLTR, fifo);
 			if (fifo != dw_readw(priv, DW_SPI_TXFLTR))
 				break;
 		}
 
-		priv->fifo_len = (fifo == 257) ? 0 : fifo;
+		priv->fifo_len = (fifo == 2) ? 0 : fifo - 1;
 		dw_writew(priv, DW_SPI_TXFLTR, 0);
 	}
 	debug("%s: fifo_len=%d\n", __func__, priv->fifo_len);
@@ -409,7 +409,7 @@
 };
 
 static const struct udevice_id dw_spi_ids[] = {
-	{ .compatible = "snps,dw-spi-mmio" },
+	{ .compatible = "snps,dw-apb-ssi" },
 	{ }
 };
 
diff --git a/drivers/spi/fsl_qspi.c b/drivers/spi/fsl_qspi.c
index d12f420..5e0b069 100644
--- a/drivers/spi/fsl_qspi.c
+++ b/drivers/spi/fsl_qspi.c
@@ -33,6 +33,12 @@
 #define SEQID_PP		6
 #define SEQID_RDID		7
 #define SEQID_BE_4K		8
+#ifdef CONFIG_SPI_FLASH_BAR
+#define SEQID_BRRD		9
+#define SEQID_BRWR		10
+#define SEQID_RDEAR		11
+#define SEQID_WREAR		12
+#endif
 
 /* QSPI CMD */
 #define QSPI_CMD_PP		0x02	/* Page program (up to 256 bytes) */
@@ -44,6 +50,14 @@
 #define QSPI_CMD_SE		0xd8	/* Sector erase (usually 64KiB) */
 #define QSPI_CMD_RDID		0x9f	/* Read JEDEC ID */
 
+/* Used for Micron, winbond and Macronix flashes */
+#define	QSPI_CMD_WREAR		0xc5	/* EAR register write */
+#define	QSPI_CMD_RDEAR		0xc8	/* EAR reigster read */
+
+/* Used for Spansion flashes only. */
+#define	QSPI_CMD_BRRD		0x16	/* Bank register read */
+#define	QSPI_CMD_BRWR		0x17	/* Bank register write */
+
 /* 4-byte address QSPI CMD - used on Spansion and some Macronix flashes */
 #define QSPI_CMD_FAST_READ_4B	0x0c    /* Read data bytes (high frequency) */
 #define QSPI_CMD_PP_4B		0x12    /* Page program (up to 256 bytes) */
@@ -114,6 +128,11 @@
 
 	/* Fast Read */
 	lut_base = SEQID_FAST_READ * 4;
+#ifdef CONFIG_SPI_FLASH_BAR
+	qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_FAST_READ) |
+		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
+		     PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+#else
 	if (FSL_QSPI_FLASH_SIZE  <= SZ_16M)
 		qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_FAST_READ) |
 			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
@@ -124,6 +143,7 @@
 			     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) |
 			     OPRND1(ADDR32BIT) | PAD1(LUT_PAD1) |
 			     INSTR1(LUT_ADDR));
+#endif
 	qspi_write32(&regs->lut[lut_base + 1], OPRND0(8) | PAD0(LUT_PAD1) |
 		INSTR0(LUT_DUMMY) | OPRND1(RX_BUFFER_SIZE) | PAD1(LUT_PAD1) |
 		INSTR1(LUT_READ));
@@ -141,6 +161,11 @@
 
 	/* Erase a sector */
 	lut_base = SEQID_SE * 4;
+#ifdef CONFIG_SPI_FLASH_BAR
+	qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_SE) |
+		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
+		     PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+#else
 	if (FSL_QSPI_FLASH_SIZE  <= SZ_16M)
 		qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_SE) |
 			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
@@ -149,6 +174,7 @@
 		qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_SE_4B) |
 			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR32BIT) |
 			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+#endif
 	qspi_write32(&regs->lut[lut_base + 1], 0);
 	qspi_write32(&regs->lut[lut_base + 2], 0);
 	qspi_write32(&regs->lut[lut_base + 3], 0);
@@ -163,6 +189,11 @@
 
 	/* Page Program */
 	lut_base = SEQID_PP * 4;
+#ifdef CONFIG_SPI_FLASH_BAR
+	qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_PP) |
+		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
+		     PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+#else
 	if (FSL_QSPI_FLASH_SIZE  <= SZ_16M)
 		qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_PP) |
 			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
@@ -171,6 +202,7 @@
 		qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_PP_4B) |
 			PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR32BIT) |
 			PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
+#endif
 #ifdef CONFIG_MX6SX
 	/*
 	 * To MX6SX, OPRND0(TX_BUFFER_SIZE) can not work correctly.
@@ -200,10 +232,140 @@
 		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(ADDR24BIT) |
 		     PAD1(LUT_PAD1) | INSTR1(LUT_ADDR));
 
+#ifdef CONFIG_SPI_FLASH_BAR
+	/*
+	 * BRRD BRWR RDEAR WREAR are all supported, because it is hard to
+	 * dynamically check whether to set BRRD BRWR or RDEAR WREAR during
+	 * initialization.
+	 */
+	lut_base = SEQID_BRRD * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_BRRD) |
+		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(1) |
+		     PAD1(LUT_PAD1) | INSTR1(LUT_READ));
+
+	lut_base = SEQID_BRWR * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_BRWR) |
+		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(1) |
+		     PAD1(LUT_PAD1) | INSTR1(LUT_WRITE));
+
+	lut_base = SEQID_RDEAR * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_RDEAR) |
+		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(1) |
+		     PAD1(LUT_PAD1) | INSTR1(LUT_READ));
+
+	lut_base = SEQID_WREAR * 4;
+	qspi_write32(&regs->lut[lut_base], OPRND0(QSPI_CMD_WREAR) |
+		     PAD0(LUT_PAD1) | INSTR0(LUT_CMD) | OPRND1(1) |
+		     PAD1(LUT_PAD1) | INSTR1(LUT_WRITE));
+#endif
 	/* Lock the LUT */
 	qspi_write32(&regs->lutkey, LUT_KEY_VALUE);
 	qspi_write32(&regs->lckcr, QSPI_LCKCR_LOCK);
 }
+
+#if defined(CONFIG_SYS_FSL_QSPI_AHB)
+/*
+ * If we have changed the content of the flash by writing or erasing,
+ * we need to invalidate the AHB buffer. If we do not do so, we may read out
+ * the wrong data. The spec tells us reset the AHB domain and Serial Flash
+ * domain at the same time.
+ */
+static inline void qspi_ahb_invalid(struct fsl_qspi *q)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)q->reg_base;
+	u32 reg;
+
+	reg = qspi_read32(&regs->mcr);
+	reg |= QSPI_MCR_SWRSTHD_MASK | QSPI_MCR_SWRSTSD_MASK;
+	qspi_write32(&regs->mcr, reg);
+
+	/*
+	 * The minimum delay : 1 AHB + 2 SFCK clocks.
+	 * Delay 1 us is enough.
+	 */
+	udelay(1);
+
+	reg &= ~(QSPI_MCR_SWRSTHD_MASK | QSPI_MCR_SWRSTSD_MASK);
+	qspi_write32(&regs->mcr, reg);
+}
+
+/* Read out the data from the AHB buffer. */
+static inline void qspi_ahb_read(struct fsl_qspi *q, u8 *rxbuf, int len)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)q->reg_base;
+	u32 mcr_reg;
+
+	mcr_reg = qspi_read32(&regs->mcr);
+
+	qspi_write32(&regs->mcr, QSPI_MCR_CLR_RXF_MASK | QSPI_MCR_CLR_TXF_MASK |
+		     QSPI_MCR_RESERVED_MASK | QSPI_MCR_END_CFD_LE);
+
+	/* Read out the data directly from the AHB buffer. */
+	memcpy(rxbuf, (u8 *)(q->amba_base + q->sf_addr), len);
+
+	qspi_write32(&regs->mcr, mcr_reg);
+}
+
+static void qspi_enable_ddr_mode(struct fsl_qspi_regs *regs)
+{
+	u32 reg, reg2;
+
+	reg = qspi_read32(&regs->mcr);
+	/* Disable the module */
+	qspi_write32(&regs->mcr, reg | QSPI_MCR_MDIS_MASK);
+
+	/* Set the Sampling Register for DDR */
+	reg2 = qspi_read32(&regs->smpr);
+	reg2 &= ~QSPI_SMPR_DDRSMP_MASK;
+	reg2 |= (2 << QSPI_SMPR_DDRSMP_SHIFT);
+	qspi_write32(&regs->smpr, reg2);
+
+	/* Enable the module again (enable the DDR too) */
+	reg |= QSPI_MCR_DDR_EN_MASK;
+	/* Enable bit 29 for imx6sx */
+	reg |= (1 << 29);
+
+	qspi_write32(&regs->mcr, reg);
+}
+
+/*
+ * There are two different ways to read out the data from the flash:
+ *  the "IP Command Read" and the "AHB Command Read".
+ *
+ * The IC guy suggests we use the "AHB Command Read" which is faster
+ * then the "IP Command Read". (What's more is that there is a bug in
+ * the "IP Command Read" in the Vybrid.)
+ *
+ * After we set up the registers for the "AHB Command Read", we can use
+ * the memcpy to read the data directly. A "missed" access to the buffer
+ * causes the controller to clear the buffer, and use the sequence pointed
+ * by the QUADSPI_BFGENCR[SEQID] to initiate a read from the flash.
+ */
+static void qspi_init_ahb_read(struct fsl_qspi_regs *regs)
+{
+	/* AHB configuration for access buffer 0/1/2 .*/
+	qspi_write32(&regs->buf0cr, QSPI_BUFXCR_INVALID_MSTRID);
+	qspi_write32(&regs->buf1cr, QSPI_BUFXCR_INVALID_MSTRID);
+	qspi_write32(&regs->buf2cr, QSPI_BUFXCR_INVALID_MSTRID);
+	qspi_write32(&regs->buf3cr, QSPI_BUF3CR_ALLMST_MASK |
+		     (0x80 << QSPI_BUF3CR_ADATSZ_SHIFT));
+
+	/* We only use the buffer3 */
+	qspi_write32(&regs->buf0ind, 0);
+	qspi_write32(&regs->buf1ind, 0);
+	qspi_write32(&regs->buf2ind, 0);
+
+	/*
+	 * Set the default lut sequence for AHB Read.
+	 * Parallel mode is disabled.
+	 */
+	qspi_write32(&regs->bfgencr,
+		     SEQID_FAST_READ << QSPI_BFGENCR_SEQID_SHIFT);
+
+	/*Enable DDR Mode*/
+	qspi_enable_ddr_mode(regs);
+}
+#endif
 
 void spi_init()
 {
@@ -215,8 +377,8 @@
 {
 	struct fsl_qspi *qspi;
 	struct fsl_qspi_regs *regs;
-	u32 reg_val, smpr_val;
-	u32 total_size, seq_id;
+	u32 smpr_val;
+	u32 total_size;
 
 	if (bus >= ARRAY_SIZE(spi_bases))
 		return NULL;
@@ -271,13 +433,9 @@
 	qspi_write32(&regs->smpr, smpr_val);
 	qspi_write32(&regs->mcr, QSPI_MCR_RESERVED_MASK);
 
-	seq_id = 0;
-	reg_val = qspi_read32(&regs->bfgencr);
-	reg_val &= ~QSPI_BFGENCR_SEQID_MASK;
-	reg_val |= (seq_id << QSPI_BFGENCR_SEQID_SHIFT);
-	reg_val &= ~QSPI_BFGENCR_PAR_EN_MASK;
-	qspi_write32(&regs->bfgencr, reg_val);
-
+#ifdef CONFIG_SYS_FSL_QSPI_AHB
+	qspi_init_ahb_read(regs);
+#endif
 	return &qspi->slave;
 }
 
@@ -293,6 +451,47 @@
 	return 0;
 }
 
+#ifdef CONFIG_SPI_FLASH_BAR
+/* Bank register read/write, EAR register read/write */
+static void qspi_op_rdbank(struct fsl_qspi *qspi, u8 *rxbuf, u32 len)
+{
+	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
+	u32 reg, mcr_reg, data, seqid;
+
+	mcr_reg = qspi_read32(&regs->mcr);
+	qspi_write32(&regs->mcr, QSPI_MCR_CLR_RXF_MASK | QSPI_MCR_CLR_TXF_MASK |
+		     QSPI_MCR_RESERVED_MASK | QSPI_MCR_END_CFD_LE);
+	qspi_write32(&regs->rbct, QSPI_RBCT_RXBRD_USEIPS);
+
+	qspi_write32(&regs->sfar, qspi->amba_base);
+
+	if (qspi->cur_seqid == QSPI_CMD_BRRD)
+		seqid = SEQID_BRRD;
+	else
+		seqid = SEQID_RDEAR;
+
+	qspi_write32(&regs->ipcr, (seqid << QSPI_IPCR_SEQID_SHIFT) | len);
+
+	/* Wait previous command complete */
+	while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
+		;
+
+	while (1) {
+		reg = qspi_read32(&regs->rbsr);
+		if (reg & QSPI_RBSR_RDBFL_MASK) {
+			data = qspi_read32(&regs->rbdr[0]);
+			data = qspi_endian_xchg(data);
+			memcpy(rxbuf, &data, len);
+			qspi_write32(&regs->mcr, qspi_read32(&regs->mcr) |
+				     QSPI_MCR_CLR_RXF_MASK);
+			break;
+		}
+	}
+
+	qspi_write32(&regs->mcr, mcr_reg);
+}
+#endif
+
 static void qspi_op_rdid(struct fsl_qspi *qspi, u32 *rxbuf, u32 len)
 {
 	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
@@ -327,6 +526,8 @@
 	qspi_write32(&regs->mcr, mcr_reg);
 }
 
+#ifndef CONFIG_SYS_FSL_QSPI_AHB
+/* If not use AHB read, read data from ip interface */
 static void qspi_op_read(struct fsl_qspi *qspi, u32 *rxbuf, u32 len)
 {
 	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
@@ -370,11 +571,12 @@
 
 	qspi_write32(&regs->mcr, mcr_reg);
 }
+#endif
 
-static void qspi_op_pp(struct fsl_qspi *qspi, u32 *txbuf, u32 len)
+static void qspi_op_write(struct fsl_qspi *qspi, u8 *txbuf, u32 len)
 {
 	struct fsl_qspi_regs *regs = (struct fsl_qspi_regs *)qspi->reg_base;
-	u32 mcr_reg, data, reg, status_reg;
+	u32 mcr_reg, data, reg, status_reg, seqid;
 	int i, size, tx_size;
 	u32 to_or_from = 0;
 
@@ -404,22 +606,39 @@
 			qspi_read32(&regs->mcr) | QSPI_MCR_CLR_RXF_MASK);
 	}
 
+	/* Default is page programming */
+	seqid = SEQID_PP;
+#ifdef CONFIG_SPI_FLASH_BAR
+	if (qspi->cur_seqid == QSPI_CMD_BRWR)
+		seqid = SEQID_BRWR;
+	else if (qspi->cur_seqid == QSPI_CMD_WREAR)
+		seqid = SEQID_WREAR;
+#endif
+
 	to_or_from = qspi->sf_addr + qspi->amba_base;
+
 	qspi_write32(&regs->sfar, to_or_from);
 
 	tx_size = (len > TX_BUFFER_SIZE) ?
 		TX_BUFFER_SIZE : len;
 
-	size = (tx_size + 3) / 4;
-
+	size = tx_size / 4;
 	for (i = 0; i < size; i++) {
-		data = qspi_endian_xchg(*txbuf);
+		memcpy(&data, txbuf, 4);
+		data = qspi_endian_xchg(data);
 		qspi_write32(&regs->tbdr, data);
-		txbuf++;
+		txbuf += 4;
 	}
 
-	qspi_write32(&regs->ipcr,
-		(SEQID_PP << QSPI_IPCR_SEQID_SHIFT) | tx_size);
+	size = tx_size % 4;
+	if (size) {
+		data = 0;
+		memcpy(&data, txbuf, size);
+		data = qspi_endian_xchg(data);
+		qspi_write32(&regs->tbdr, data);
+	}
+
+	qspi_write32(&regs->ipcr, (seqid << QSPI_IPCR_SEQID_SHIFT) | tx_size);
 	while (qspi_read32(&regs->sr) & QSPI_SR_BUSY_MASK)
 		;
 
@@ -495,16 +714,18 @@
 {
 	struct fsl_qspi *qspi = to_qspi_spi(slave);
 	u32 bytes = DIV_ROUND_UP(bitlen, 8);
-	static u32 pp_sfaddr;
+	static u32 wr_sfaddr;
 	u32 txbuf;
 
 	if (dout) {
-		memcpy(&txbuf, dout, 4);
-		qspi->cur_seqid = *(u8 *)dout;
+		if (flags & SPI_XFER_BEGIN) {
+			qspi->cur_seqid = *(u8 *)dout;
+			memcpy(&txbuf, dout, 4);
+		}
 
 		if (flags == SPI_XFER_END) {
-			qspi->sf_addr = pp_sfaddr;
-			qspi_op_pp(qspi, (u32 *)dout, bytes);
+			qspi->sf_addr = wr_sfaddr;
+			qspi_op_write(qspi, (u8 *)dout, bytes);
 			return 0;
 		}
 
@@ -514,20 +735,46 @@
 			   (qspi->cur_seqid == QSPI_CMD_BE_4K)) {
 			qspi->sf_addr = swab32(txbuf) & OFFSET_BITS_MASK;
 			qspi_op_erase(qspi);
-		} else if (qspi->cur_seqid == QSPI_CMD_PP) {
-			pp_sfaddr = swab32(txbuf) & OFFSET_BITS_MASK;
+		} else if (qspi->cur_seqid == QSPI_CMD_PP)
+			wr_sfaddr = swab32(txbuf) & OFFSET_BITS_MASK;
+#ifdef CONFIG_SPI_FLASH_BAR
+		else if ((qspi->cur_seqid == QSPI_CMD_BRWR) ||
+			 (qspi->cur_seqid == QSPI_CMD_WREAR)) {
+			wr_sfaddr = 0;
 		}
+#endif
 	}
 
 	if (din) {
-		if (qspi->cur_seqid == QSPI_CMD_FAST_READ)
+		if (qspi->cur_seqid == QSPI_CMD_FAST_READ) {
+#ifdef CONFIG_SYS_FSL_QSPI_AHB
+			qspi_ahb_read(qspi, din, bytes);
+#else
 			qspi_op_read(qspi, din, bytes);
+#endif
+		}
 		else if (qspi->cur_seqid == QSPI_CMD_RDID)
 			qspi_op_rdid(qspi, din, bytes);
 		else if (qspi->cur_seqid == QSPI_CMD_RDSR)
 			qspi_op_rdsr(qspi, din);
+#ifdef CONFIG_SPI_FLASH_BAR
+		else if ((qspi->cur_seqid == QSPI_CMD_BRRD) ||
+			 (qspi->cur_seqid == QSPI_CMD_RDEAR)) {
+			qspi->sf_addr = 0;
+			qspi_op_rdbank(qspi, din, bytes);
+		}
+#endif
 	}
 
+#ifdef CONFIG_SYS_FSL_QSPI_AHB
+	if ((qspi->cur_seqid == QSPI_CMD_SE) ||
+	    (qspi->cur_seqid == QSPI_CMD_PP) ||
+	    (qspi->cur_seqid == QSPI_CMD_BE_4K) ||
+	    (qspi->cur_seqid == QSPI_CMD_WREAR) ||
+	    (qspi->cur_seqid == QSPI_CMD_BRWR))
+		qspi_ahb_invalid(qspi);
+#endif
+
 	return 0;
 }
 
diff --git a/drivers/spi/fsl_qspi.h b/drivers/spi/fsl_qspi.h
index db400e6..6cb3610 100644
--- a/drivers/spi/fsl_qspi.h
+++ b/drivers/spi/fsl_qspi.h
@@ -58,7 +58,12 @@
 
 #define QSPI_MCR_END_CFD_SHIFT		2
 #define QSPI_MCR_END_CFD_MASK		(3 << QSPI_MCR_END_CFD_SHIFT)
+#ifdef CONFIG_SYS_FSL_QSPI_AHB
+/* AHB needs 64bit operation */
+#define QSPI_MCR_END_CFD_LE		(3 << QSPI_MCR_END_CFD_SHIFT)
+#else
 #define QSPI_MCR_END_CFD_LE		(1 << QSPI_MCR_END_CFD_SHIFT)
+#endif
 #define QSPI_MCR_DDR_EN_SHIFT		7
 #define QSPI_MCR_DDR_EN_MASK		(1 << QSPI_MCR_DDR_EN_SHIFT)
 #define QSPI_MCR_CLR_RXF_SHIFT		10
@@ -69,6 +74,10 @@
 #define QSPI_MCR_MDIS_MASK		(1 << QSPI_MCR_MDIS_SHIFT)
 #define QSPI_MCR_RESERVED_SHIFT		16
 #define QSPI_MCR_RESERVED_MASK		(0xf << QSPI_MCR_RESERVED_SHIFT)
+#define QSPI_MCR_SWRSTHD_SHIFT		1
+#define QSPI_MCR_SWRSTHD_MASK		(1 << QSPI_MCR_SWRSTHD_SHIFT)
+#define QSPI_MCR_SWRSTSD_SHIFT		0
+#define QSPI_MCR_SWRSTSD_MASK		(1 << QSPI_MCR_SWRSTSD_SHIFT)
 
 #define QSPI_SMPR_HSENA_SHIFT		0
 #define QSPI_SMPR_HSENA_MASK		(1 << QSPI_SMPR_HSENA_SHIFT)
@@ -79,6 +88,12 @@
 #define QSPI_SMPR_DDRSMP_SHIFT		16
 #define QSPI_SMPR_DDRSMP_MASK		(7 << QSPI_SMPR_DDRSMP_SHIFT)
 
+#define QSPI_BUFXCR_INVALID_MSTRID	0xe
+#define QSPI_BUF3CR_ALLMST_SHIFT	31
+#define QSPI_BUF3CR_ALLMST_MASK		(1 << QSPI_BUF3CR_ALLMST_SHIFT)
+#define QSPI_BUF3CR_ADATSZ_SHIFT	8
+#define QSPI_BUF3CR_ADATSZ_MASK		(0xFF << QSPI_BUF3CR_ADATSZ_SHIFT)
+
 #define QSPI_BFGENCR_SEQID_SHIFT	12
 #define QSPI_BFGENCR_SEQID_MASK		(0xf << QSPI_BFGENCR_SEQID_SHIFT)
 #define QSPI_BFGENCR_PAR_EN_SHIFT	16
diff --git a/drivers/spi/ftssp010_spi.c b/drivers/spi/ftssp010_spi.c
index aa3b5a0..267e4d8 100644
--- a/drivers/spi/ftssp010_spi.c
+++ b/drivers/spi/ftssp010_spi.c
@@ -169,61 +169,49 @@
 static int ftssp010_wait(struct ftssp010_spi *chip)
 {
 	struct ftssp010_regs *regs = chip->regs;
-	int ret = -1;
 	ulong t;
 
 	/* wait until device idle */
 	for (t = get_timer(0); get_timer(t) < CONFIG_FTSSP010_TIMEOUT; ) {
-		if (readl(&regs->sr) & SR_BUSY)
-			continue;
-		ret = 0;
-		break;
+		if (!(readl(&regs->sr) & SR_BUSY))
+			return 0;
 	}
 
-	if (ret)
-		puts("ftspi010: busy timeout\n");
+	puts("ftspi010: busy timeout\n");
 
-	return ret;
+	return -1;
 }
 
 static int ftssp010_wait_tx(struct ftssp010_spi *chip)
 {
 	struct ftssp010_regs *regs = chip->regs;
-	int ret = -1;
 	ulong t;
 
 	/* wait until tx fifo not full */
 	for (t = get_timer(0); get_timer(t) < CONFIG_FTSSP010_TIMEOUT; ) {
-		if (!(readl(&regs->sr) & SR_TFNF))
-			continue;
-		ret = 0;
-		break;
+		if (readl(&regs->sr) & SR_TFNF)
+			return 0;
 	}
 
-	if (ret)
-		puts("ftssp010: tx timeout\n");
+	puts("ftssp010: tx timeout\n");
 
-	return ret;
+	return -1;
 }
 
 static int ftssp010_wait_rx(struct ftssp010_spi *chip)
 {
 	struct ftssp010_regs *regs = chip->regs;
-	int ret = -1;
 	ulong t;
 
 	/* wait until rx fifo not empty */
 	for (t = get_timer(0); get_timer(t) < CONFIG_FTSSP010_TIMEOUT; ) {
-		if (!SR_RFVE(readl(&regs->sr)))
-			continue;
-		ret = 0;
-		break;
+		if (SR_RFVE(readl(&regs->sr)))
+			return 0;
 	}
 
-	if (ret)
-		puts("ftssp010: rx timeout\n");
+	puts("ftssp010: rx timeout\n");
 
-	return ret;
+	return -1;
 }
 
 static int ftssp010_spi_work_transfer_v2(struct ftssp010_spi *chip,
diff --git a/include/configs/mx6sxsabresd.h b/include/configs/mx6sxsabresd.h
index 469d250..fbaae3f 100644
--- a/include/configs/mx6sxsabresd.h
+++ b/include/configs/mx6sxsabresd.h
@@ -235,15 +235,23 @@
 /* FLASH and environment organization */
 #define CONFIG_SYS_NO_FLASH
 
+#define CONFIG_CMD_TIME
+
 #define CONFIG_FSL_QSPI
 
 #ifdef CONFIG_FSL_QSPI
 #define CONFIG_CMD_SF
 #define CONFIG_SPI_FLASH
+#define CONFIG_SPI_FLASH_BAR
 #define CONFIG_SPI_FLASH_SPANSION
 #define CONFIG_SPI_FLASH_STMICRO
 #define CONFIG_SYS_FSL_QSPI_LE
+#define CONFIG_SYS_FSL_QSPI_AHB
+#ifdef CONFIG_MX6SX_SABRESD_REVA
 #define FSL_QSPI_FLASH_SIZE		SZ_16M
+#else
+#define FSL_QSPI_FLASH_SIZE		SZ_32M
+#endif
 #define FSL_QSPI_FLASH_NUM		2
 #endif