Merge branch 'next' of https://source.denx.de/u-boot/custodians/u-boot-spi into next

- xSPI Octal DTR support (Pratyush Yadav)
- MXIC SPI driver (Zhengxun)
diff --git a/drivers/mtd/spi/Kconfig b/drivers/mtd/spi/Kconfig
index f8db8e5..1b2ef37 100644
--- a/drivers/mtd/spi/Kconfig
+++ b/drivers/mtd/spi/Kconfig
@@ -88,6 +88,32 @@
 	 SPI NOR flashes using Serial Flash Discoverable Parameters (SFDP)
 	 tables as per JESD216 standard.
 
+config SPI_FLASH_SMART_HWCAPS
+	bool "Smart hardware capability detection based on SPI MEM supports_op() hook"
+	default y
+	help
+	 Enable support for smart hardware capability detection based on SPI
+	 MEM supports_op() hook that lets controllers express whether they
+	 can support a type of operation in a much more refined way compared
+	 to using flags like SPI_RX_DUAL, SPI_TX_QUAD, etc.
+
+config SPI_FLASH_SOFT_RESET
+	bool "Software Reset support for SPI NOR flashes"
+	default n
+	help
+	 Enable support for xSPI Software Reset. It will be used to switch from
+	 Octal DTR mode to legacy mode on shutdown and boot (if enabled).
+
+config SPI_FLASH_SOFT_RESET_ON_BOOT
+	bool "Perform a Software Reset on boot on flashes that boot in stateful mode"
+	depends on SPI_FLASH_SOFT_RESET
+	default n
+	help
+	 Perform a Software Reset on boot to allow detecting flashes that are
+	 handed to us in Octal DTR mode. Do not enable this config on flashes
+	 that are not supposed to be handed to U-Boot in Octal DTR mode, even
+	 if they _do_ support the Soft Reset sequence.
+
 config SPI_FLASH_BAR
 	bool "SPI flash Bank/Extended address register support"
 	help
@@ -141,11 +167,27 @@
 	help
 	  Add support for various Spansion SPI flash chips (S25FLxxx)
 
+config SPI_FLASH_S28HS512T
+	bool "Cypress S28HS512T chip support"
+	depends on SPI_FLASH_SPANSION
+	help
+	 Add support for the Cypress S28HS512T chip. This is a separate config
+	 because the fixup hooks for this flash add extra size overhead. Boards
+	 that don't use the flash can disable this to save space.
+
 config SPI_FLASH_STMICRO
 	bool "STMicro SPI flash support"
 	help
 	  Add support for various STMicro SPI flash chips (M25Pxxx and N25Qxxx)
 
+config SPI_FLASH_MT35XU
+	bool "Micron MT35XU chip support"
+	depends on SPI_FLASH_STMICRO
+	help
+	 Add support for the Micron MT35XU chip. This is a separate config
+	 because the fixup hooks for this flash add extra size overhead. Boards
+	 that don't use the flash can disable this to save space.
+
 config SPI_FLASH_SST
 	bool "SST SPI flash support"
 	help
diff --git a/drivers/mtd/spi/sf_internal.h b/drivers/mtd/spi/sf_internal.h
index 0b63e1b..d3ef69e 100644
--- a/drivers/mtd/spi/sf_internal.h
+++ b/drivers/mtd/spi/sf_internal.h
@@ -68,6 +68,7 @@
 #define USE_CLSR		BIT(14)	/* use CLSR command */
 #define SPI_NOR_HAS_SST26LOCK	BIT(15)	/* Flash supports lock/unlock via BPR */
 #define SPI_NOR_OCTAL_READ	BIT(16)	/* Flash supports Octal Read */
+#define SPI_NOR_OCTAL_DTR_READ	BIT(17)	/* Flash supports Octal DTR Read */
 };
 
 extern const struct flash_info spi_nor_ids[];
diff --git a/drivers/mtd/spi/sf_probe.c b/drivers/mtd/spi/sf_probe.c
index 7edb875..f461082 100644
--- a/drivers/mtd/spi/sf_probe.c
+++ b/drivers/mtd/spi/sf_probe.c
@@ -151,6 +151,11 @@
 static int spi_flash_std_remove(struct udevice *dev)
 {
 	struct spi_flash *flash = dev_get_uclass_priv(dev);
+	int ret;
+
+	ret = spi_nor_remove(flash);
+	if (ret)
+		return ret;
 
 	if (CONFIG_IS_ENABLED(SPI_FLASH_MTD))
 		spi_flash_mtd_unregister(flash);
@@ -178,6 +183,7 @@
 	.remove		= spi_flash_std_remove,
 	.priv_auto	= sizeof(struct spi_nor),
 	.ops		= &spi_flash_std_ops,
+	.flags		= DM_FLAG_OS_PREPARE,
 };
 
 DM_DRIVER_ALIAS(jedec_spi_nor, spansion_m25p16)
diff --git a/drivers/mtd/spi/spi-nor-core.c b/drivers/mtd/spi/spi-nor-core.c
index 6af9c67..8dd44c0 100644
--- a/drivers/mtd/spi/spi-nor-core.c
+++ b/drivers/mtd/spi/spi-nor-core.c
@@ -21,6 +21,8 @@
 #include <linux/log2.h>
 #include <linux/math64.h>
 #include <linux/sizes.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/spi-nor.h>
@@ -40,6 +42,237 @@
 
 #define DEFAULT_READY_WAIT_JIFFIES		(40UL * HZ)
 
+#define ROUND_UP_TO(x, y)	(((x) + (y) - 1) / (y) * (y))
+
+struct sfdp_parameter_header {
+	u8		id_lsb;
+	u8		minor;
+	u8		major;
+	u8		length; /* in double words */
+	u8		parameter_table_pointer[3]; /* byte address */
+	u8		id_msb;
+};
+
+#define SFDP_PARAM_HEADER_ID(p)	(((p)->id_msb << 8) | (p)->id_lsb)
+#define SFDP_PARAM_HEADER_PTP(p) \
+	(((p)->parameter_table_pointer[2] << 16) | \
+	 ((p)->parameter_table_pointer[1] <<  8) | \
+	 ((p)->parameter_table_pointer[0] <<  0))
+
+#define SFDP_BFPT_ID		0xff00	/* Basic Flash Parameter Table */
+#define SFDP_SECTOR_MAP_ID	0xff81	/* Sector Map Table */
+#define SFDP_SST_ID		0x01bf	/* Manufacturer specific Table */
+#define SFDP_PROFILE1_ID	0xff05	/* xSPI Profile 1.0 Table */
+
+#define SFDP_SIGNATURE		0x50444653U
+#define SFDP_JESD216_MAJOR	1
+#define SFDP_JESD216_MINOR	0
+#define SFDP_JESD216A_MINOR	5
+#define SFDP_JESD216B_MINOR	6
+
+struct sfdp_header {
+	u32		signature; /* Ox50444653U <=> "SFDP" */
+	u8		minor;
+	u8		major;
+	u8		nph; /* 0-base number of parameter headers */
+	u8		unused;
+
+	/* Basic Flash Parameter Table. */
+	struct sfdp_parameter_header	bfpt_header;
+};
+
+/* Basic Flash Parameter Table */
+
+/*
+ * JESD216 rev D defines a Basic Flash Parameter Table of 20 DWORDs.
+ * They are indexed from 1 but C arrays are indexed from 0.
+ */
+#define BFPT_DWORD(i)		((i) - 1)
+#define BFPT_DWORD_MAX		20
+
+/* The first version of JESB216 defined only 9 DWORDs. */
+#define BFPT_DWORD_MAX_JESD216			9
+#define BFPT_DWORD_MAX_JESD216B			16
+
+/* 1st DWORD. */
+#define BFPT_DWORD1_FAST_READ_1_1_2		BIT(16)
+#define BFPT_DWORD1_ADDRESS_BYTES_MASK		GENMASK(18, 17)
+#define BFPT_DWORD1_ADDRESS_BYTES_3_ONLY	(0x0UL << 17)
+#define BFPT_DWORD1_ADDRESS_BYTES_3_OR_4	(0x1UL << 17)
+#define BFPT_DWORD1_ADDRESS_BYTES_4_ONLY	(0x2UL << 17)
+#define BFPT_DWORD1_DTR				BIT(19)
+#define BFPT_DWORD1_FAST_READ_1_2_2		BIT(20)
+#define BFPT_DWORD1_FAST_READ_1_4_4		BIT(21)
+#define BFPT_DWORD1_FAST_READ_1_1_4		BIT(22)
+
+/* 5th DWORD. */
+#define BFPT_DWORD5_FAST_READ_2_2_2		BIT(0)
+#define BFPT_DWORD5_FAST_READ_4_4_4		BIT(4)
+
+/* 11th DWORD. */
+#define BFPT_DWORD11_PAGE_SIZE_SHIFT		4
+#define BFPT_DWORD11_PAGE_SIZE_MASK		GENMASK(7, 4)
+
+/* 15th DWORD. */
+
+/*
+ * (from JESD216 rev B)
+ * Quad Enable Requirements (QER):
+ * - 000b: Device does not have a QE bit. Device detects 1-1-4 and 1-4-4
+ *         reads based on instruction. DQ3/HOLD# functions are hold during
+ *         instruction phase.
+ * - 001b: QE is bit 1 of status register 2. It is set via Write Status with
+ *         two data bytes where bit 1 of the second byte is one.
+ *         [...]
+ *         Writing only one byte to the status register has the side-effect of
+ *         clearing status register 2, including the QE bit. The 100b code is
+ *         used if writing one byte to the status register does not modify
+ *         status register 2.
+ * - 010b: QE is bit 6 of status register 1. It is set via Write Status with
+ *         one data byte where bit 6 is one.
+ *         [...]
+ * - 011b: QE is bit 7 of status register 2. It is set via Write status
+ *         register 2 instruction 3Eh with one data byte where bit 7 is one.
+ *         [...]
+ *         The status register 2 is read using instruction 3Fh.
+ * - 100b: QE is bit 1 of status register 2. It is set via Write Status with
+ *         two data bytes where bit 1 of the second byte is one.
+ *         [...]
+ *         In contrast to the 001b code, writing one byte to the status
+ *         register does not modify status register 2.
+ * - 101b: QE is bit 1 of status register 2. Status register 1 is read using
+ *         Read Status instruction 05h. Status register2 is read using
+ *         instruction 35h. QE is set via Writ Status instruction 01h with
+ *         two data bytes where bit 1 of the second byte is one.
+ *         [...]
+ */
+#define BFPT_DWORD15_QER_MASK			GENMASK(22, 20)
+#define BFPT_DWORD15_QER_NONE			(0x0UL << 20) /* Micron */
+#define BFPT_DWORD15_QER_SR2_BIT1_BUGGY		(0x1UL << 20)
+#define BFPT_DWORD15_QER_SR1_BIT6		(0x2UL << 20) /* Macronix */
+#define BFPT_DWORD15_QER_SR2_BIT7		(0x3UL << 20)
+#define BFPT_DWORD15_QER_SR2_BIT1_NO_RD		(0x4UL << 20)
+#define BFPT_DWORD15_QER_SR2_BIT1		(0x5UL << 20) /* Spansion */
+
+#define BFPT_DWORD16_SOFT_RST			BIT(12)
+
+#define BFPT_DWORD18_CMD_EXT_MASK		GENMASK(30, 29)
+#define BFPT_DWORD18_CMD_EXT_REP		(0x0UL << 29) /* Repeat */
+#define BFPT_DWORD18_CMD_EXT_INV		(0x1UL << 29) /* Invert */
+#define BFPT_DWORD18_CMD_EXT_RES		(0x2UL << 29) /* Reserved */
+#define BFPT_DWORD18_CMD_EXT_16B		(0x3UL << 29) /* 16-bit opcode */
+
+/* xSPI Profile 1.0 table (from JESD216D.01). */
+#define PROFILE1_DWORD1_RD_FAST_CMD		GENMASK(15, 8)
+#define PROFILE1_DWORD1_RDSR_DUMMY		BIT(28)
+#define PROFILE1_DWORD1_RDSR_ADDR_BYTES		BIT(29)
+#define PROFILE1_DWORD4_DUMMY_200MHZ		GENMASK(11, 7)
+#define PROFILE1_DWORD5_DUMMY_166MHZ		GENMASK(31, 27)
+#define PROFILE1_DWORD5_DUMMY_133MHZ		GENMASK(21, 17)
+#define PROFILE1_DWORD5_DUMMY_100MHZ		GENMASK(11, 7)
+#define PROFILE1_DUMMY_DEFAULT			20
+
+struct sfdp_bfpt {
+	u32	dwords[BFPT_DWORD_MAX];
+};
+
+/**
+ * struct spi_nor_fixups - SPI NOR fixup hooks
+ * @default_init: called after default flash parameters init. Used to tweak
+ *                flash parameters when information provided by the flash_info
+ *                table is incomplete or wrong.
+ * @post_bfpt: called after the BFPT table has been parsed
+ * @post_sfdp: called after SFDP has been parsed (is also called for SPI NORs
+ *             that do not support RDSFDP). Typically used to tweak various
+ *             parameters that could not be extracted by other means (i.e.
+ *             when information provided by the SFDP/flash_info tables are
+ *             incomplete or wrong).
+ *
+ * Those hooks can be used to tweak the SPI NOR configuration when the SFDP
+ * table is broken or not available.
+ */
+struct spi_nor_fixups {
+	void (*default_init)(struct spi_nor *nor);
+	int (*post_bfpt)(struct spi_nor *nor,
+			 const struct sfdp_parameter_header *bfpt_header,
+			 const struct sfdp_bfpt *bfpt,
+			 struct spi_nor_flash_parameter *params);
+	void (*post_sfdp)(struct spi_nor *nor,
+			  struct spi_nor_flash_parameter *params);
+};
+
+#define SPI_NOR_SRST_SLEEP_LEN			200
+
+/**
+ * spi_nor_get_cmd_ext() - Get the command opcode extension based on the
+ *			   extension type.
+ * @nor:		pointer to a 'struct spi_nor'
+ * @op:			pointer to the 'struct spi_mem_op' whose properties
+ *			need to be initialized.
+ *
+ * Right now, only "repeat" and "invert" are supported.
+ *
+ * Return: The opcode extension.
+ */
+static u8 spi_nor_get_cmd_ext(const struct spi_nor *nor,
+			      const struct spi_mem_op *op)
+{
+	switch (nor->cmd_ext_type) {
+	case SPI_NOR_EXT_INVERT:
+		return ~op->cmd.opcode;
+
+	case SPI_NOR_EXT_REPEAT:
+		return op->cmd.opcode;
+
+	default:
+		dev_dbg(nor->dev, "Unknown command extension type\n");
+		return 0;
+	}
+}
+
+/**
+ * spi_nor_setup_op() - Set up common properties of a spi-mem op.
+ * @nor:		pointer to a 'struct spi_nor'
+ * @op:			pointer to the 'struct spi_mem_op' whose properties
+ *			need to be initialized.
+ * @proto:		the protocol from which the properties need to be set.
+ */
+static void spi_nor_setup_op(const struct spi_nor *nor,
+			     struct spi_mem_op *op,
+			     const enum spi_nor_protocol proto)
+{
+	u8 ext;
+
+	op->cmd.buswidth = spi_nor_get_protocol_inst_nbits(proto);
+
+	if (op->addr.nbytes)
+		op->addr.buswidth = spi_nor_get_protocol_addr_nbits(proto);
+
+	if (op->dummy.nbytes)
+		op->dummy.buswidth = spi_nor_get_protocol_addr_nbits(proto);
+
+	if (op->data.nbytes)
+		op->data.buswidth = spi_nor_get_protocol_data_nbits(proto);
+
+	if (spi_nor_protocol_is_dtr(proto)) {
+		/*
+		 * spi-mem supports mixed DTR modes, but right now we can only
+		 * have all phases either DTR or STR. IOW, spi-mem can have
+		 * something like 4S-4D-4D, but spi-nor can't. So, set all 4
+		 * phases to either DTR or STR.
+		 */
+		op->cmd.dtr = op->addr.dtr = op->dummy.dtr =
+			op->data.dtr = true;
+
+		/* 2 bytes per clock cycle in DTR mode. */
+		op->dummy.nbytes *= 2;
+
+		ext = spi_nor_get_cmd_ext(nor, op);
+		op->cmd.opcode = (op->cmd.opcode << 8) | ext;
+		op->cmd.nbytes = 2;
+	}
+}
+
 static int spi_nor_read_write_reg(struct spi_nor *nor, struct spi_mem_op
 		*op, void *buf)
 {
@@ -52,12 +285,14 @@
 
 static int spi_nor_read_reg(struct spi_nor *nor, u8 code, u8 *val, int len)
 {
-	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(code, 1),
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(code, 0),
 					  SPI_MEM_OP_NO_ADDR,
 					  SPI_MEM_OP_NO_DUMMY,
-					  SPI_MEM_OP_DATA_IN(len, NULL, 1));
+					  SPI_MEM_OP_DATA_IN(len, NULL, 0));
 	int ret;
 
+	spi_nor_setup_op(nor, &op, nor->reg_proto);
+
 	ret = spi_nor_read_write_reg(nor, &op, val);
 	if (ret < 0)
 		dev_dbg(nor->dev, "error %d reading %x\n", ret, code);
@@ -67,10 +302,15 @@
 
 static int spi_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 {
-	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 1),
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 0),
 					  SPI_MEM_OP_NO_ADDR,
 					  SPI_MEM_OP_NO_DUMMY,
-					  SPI_MEM_OP_DATA_OUT(len, NULL, 1));
+					  SPI_MEM_OP_DATA_OUT(len, NULL, 0));
+
+	spi_nor_setup_op(nor, &op, nor->reg_proto);
+
+	if (len == 0)
+		op.data.dir = SPI_MEM_NO_DATA;
 
 	return spi_nor_read_write_reg(nor, &op, buf);
 }
@@ -79,21 +319,19 @@
 				 u_char *buf)
 {
 	struct spi_mem_op op =
-			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
-				   SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
-				   SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
-				   SPI_MEM_OP_DATA_IN(len, buf, 1));
+			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 0),
+				   SPI_MEM_OP_ADDR(nor->addr_width, from, 0),
+				   SPI_MEM_OP_DUMMY(nor->read_dummy, 0),
+				   SPI_MEM_OP_DATA_IN(len, buf, 0));
 	size_t remaining = len;
 	int ret;
 
-	/* get transfer protocols. */
-	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->read_proto);
-	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->read_proto);
-	op.dummy.buswidth = op.addr.buswidth;
-	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->read_proto);
+	spi_nor_setup_op(nor, &op, nor->read_proto);
 
 	/* convert the dummy cycles to the number of bytes */
 	op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
+	if (spi_nor_protocol_is_dtr(nor->read_proto))
+		op.dummy.nbytes *= 2;
 
 	while (remaining) {
 		op.data.nbytes = remaining < UINT_MAX ? remaining : UINT_MAX;
@@ -117,20 +355,17 @@
 				  const u_char *buf)
 {
 	struct spi_mem_op op =
-			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
-				   SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
+			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 0),
+				   SPI_MEM_OP_ADDR(nor->addr_width, to, 0),
 				   SPI_MEM_OP_NO_DUMMY,
-				   SPI_MEM_OP_DATA_OUT(len, buf, 1));
+				   SPI_MEM_OP_DATA_OUT(len, buf, 0));
 	int ret;
 
-	/* get transfer protocols. */
-	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->write_proto);
-	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->write_proto);
-	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->write_proto);
-
 	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
 		op.addr.nbytes = 0;
 
+	spi_nor_setup_op(nor, &op, nor->write_proto);
+
 	ret = spi_mem_adjust_op_size(nor->spi, &op);
 	if (ret)
 		return ret;
@@ -150,16 +385,40 @@
  */
 static int read_sr(struct spi_nor *nor)
 {
+	struct spi_mem_op op;
 	int ret;
-	u8 val;
+	u8 val[2];
+	u8 addr_nbytes, dummy;
+
+	if (nor->reg_proto == SNOR_PROTO_8_8_8_DTR) {
+		addr_nbytes = nor->rdsr_addr_nbytes;
+		dummy = nor->rdsr_dummy;
+	} else {
+		addr_nbytes = 0;
+		dummy = 0;
+	}
+
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 0),
+					   SPI_MEM_OP_ADDR(addr_nbytes, 0, 0),
+					   SPI_MEM_OP_DUMMY(dummy, 0),
+					   SPI_MEM_OP_DATA_IN(1, NULL, 0));
+
+	spi_nor_setup_op(nor, &op, nor->reg_proto);
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDSR, &val, 1);
+	/*
+	 * We don't want to read only one byte in DTR mode. So, read 2 and then
+	 * discard the second byte.
+	 */
+	if (spi_nor_protocol_is_dtr(nor->reg_proto))
+		op.data.nbytes = 2;
+
+	ret = spi_nor_read_write_reg(nor, &op, val);
 	if (ret < 0) {
 		pr_debug("error %d reading SR\n", (int)ret);
 		return ret;
 	}
 
-	return val;
+	return *val;
 }
 
 /*
@@ -169,16 +428,40 @@
  */
 static int read_fsr(struct spi_nor *nor)
 {
+	struct spi_mem_op op;
 	int ret;
-	u8 val;
+	u8 val[2];
+	u8 addr_nbytes, dummy;
+
+	if (nor->reg_proto == SNOR_PROTO_8_8_8_DTR) {
+		addr_nbytes = nor->rdsr_addr_nbytes;
+		dummy = nor->rdsr_dummy;
+	} else {
+		addr_nbytes = 0;
+		dummy = 0;
+	}
+
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDFSR, 0),
+					   SPI_MEM_OP_ADDR(addr_nbytes, 0, 0),
+					   SPI_MEM_OP_DUMMY(dummy, 0),
+					   SPI_MEM_OP_DATA_IN(1, NULL, 0));
+
+	spi_nor_setup_op(nor, &op, nor->reg_proto);
+
+	/*
+	 * We don't want to read only one byte in DTR mode. So, read 2 and then
+	 * discard the second byte.
+	 */
+	if (spi_nor_protocol_is_dtr(nor->reg_proto))
+		op.data.nbytes = 2;
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDFSR, &val, 1);
+	ret = spi_nor_read_write_reg(nor, &op, val);
 	if (ret < 0) {
 		pr_debug("error %d reading FSR\n", ret);
 		return ret;
 	}
 
-	return val;
+	return *val;
 }
 
 /*
@@ -523,15 +806,19 @@
 #endif
 
 /*
- * Initiate the erasure of a single sector
+ * Initiate the erasure of a single sector. Returns the number of bytes erased
+ * on success, a negative error code on error.
  */
 static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 {
 	struct spi_mem_op op =
-		SPI_MEM_OP(SPI_MEM_OP_CMD(nor->erase_opcode, 1),
-			   SPI_MEM_OP_ADDR(nor->addr_width, addr, 1),
+		SPI_MEM_OP(SPI_MEM_OP_CMD(nor->erase_opcode, 0),
+			   SPI_MEM_OP_ADDR(nor->addr_width, addr, 0),
 			   SPI_MEM_OP_NO_DUMMY,
 			   SPI_MEM_OP_NO_DATA);
+	int ret;
+
+	spi_nor_setup_op(nor, &op, nor->write_proto);
 
 	if (nor->erase)
 		return nor->erase(nor, addr);
@@ -540,7 +827,11 @@
 	 * Default implementation, if driver doesn't have a specialized HW
 	 * control
 	 */
-	return spi_mem_exec_op(nor->spi, &op);
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret)
+		return ret;
+
+	return nor->mtd.erasesize;
 }
 
 /*
@@ -576,11 +867,11 @@
 		write_enable(nor);
 
 		ret = spi_nor_erase_sector(nor, addr);
-		if (ret)
+		if (ret < 0)
 			goto erase_err;
 
-		addr += mtd->erasesize;
-		len -= mtd->erasesize;
+		addr += ret;
+		len -= ret;
 
 		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
@@ -595,6 +886,67 @@
 
 	return ret;
 }
+
+#ifdef CONFIG_SPI_FLASH_S28HS512T
+/**
+ * spansion_erase_non_uniform() - erase non-uniform sectors for Spansion/Cypress
+ *                                chips
+ * @nor:	pointer to a 'struct spi_nor'
+ * @addr:	address of the sector to erase
+ * @opcode_4k:	opcode for 4K sector erase
+ * @ovlsz_top:	size of overlaid portion at the top address
+ * @ovlsz_btm:	size of overlaid portion at the bottom address
+ *
+ * Erase an address range on the nor chip that can contain 4KB sectors overlaid
+ * on top and/or bottom. The appropriate erase opcode and size are chosen by
+ * address to erase and size of overlaid portion.
+ *
+ * Return: number of bytes erased on success, -errno otherwise.
+ */
+static int spansion_erase_non_uniform(struct spi_nor *nor, u32 addr,
+				      u8 opcode_4k, u32 ovlsz_top,
+				      u32 ovlsz_btm)
+{
+	struct spi_mem_op op =
+		SPI_MEM_OP(SPI_MEM_OP_CMD(nor->erase_opcode, 0),
+			   SPI_MEM_OP_ADDR(nor->addr_width, addr, 0),
+			   SPI_MEM_OP_NO_DUMMY,
+			   SPI_MEM_OP_NO_DATA);
+	struct mtd_info *mtd = &nor->mtd;
+	u32 erasesize;
+	int ret;
+
+	/* 4KB sectors */
+	if (op.addr.val < ovlsz_btm ||
+	    op.addr.val >= mtd->size - ovlsz_top) {
+		op.cmd.opcode = opcode_4k;
+		erasesize = SZ_4K;
+
+	/* Non-overlaid portion in the normal sector at the bottom */
+	} else if (op.addr.val == ovlsz_btm) {
+		op.cmd.opcode = nor->erase_opcode;
+		erasesize = mtd->erasesize - ovlsz_btm;
+
+	/* Non-overlaid portion in the normal sector at the top */
+	} else if (op.addr.val == mtd->size - mtd->erasesize) {
+		op.cmd.opcode = nor->erase_opcode;
+		erasesize = mtd->erasesize - ovlsz_top;
+
+	/* Normal sectors */
+	} else {
+		op.cmd.opcode = nor->erase_opcode;
+		erasesize = mtd->erasesize;
+	}
+
+	spi_nor_setup_op(nor, &op, nor->write_proto);
+
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret)
+		return ret;
+
+	return erasesize;
+}
+#endif
 
 #if defined(CONFIG_SPI_FLASH_STMICRO) || defined(CONFIG_SPI_FLASH_SST)
 /* Write status register and ensure bits in mask match written values */
@@ -1451,71 +1803,6 @@
 #endif /* CONFIG_SPI_FLASH_SFDP_SUPPORT */
 #endif /* CONFIG_SPI_FLASH_SPANSION */
 
-struct spi_nor_read_command {
-	u8			num_mode_clocks;
-	u8			num_wait_states;
-	u8			opcode;
-	enum spi_nor_protocol	proto;
-};
-
-struct spi_nor_pp_command {
-	u8			opcode;
-	enum spi_nor_protocol	proto;
-};
-
-enum spi_nor_read_command_index {
-	SNOR_CMD_READ,
-	SNOR_CMD_READ_FAST,
-	SNOR_CMD_READ_1_1_1_DTR,
-
-	/* Dual SPI */
-	SNOR_CMD_READ_1_1_2,
-	SNOR_CMD_READ_1_2_2,
-	SNOR_CMD_READ_2_2_2,
-	SNOR_CMD_READ_1_2_2_DTR,
-
-	/* Quad SPI */
-	SNOR_CMD_READ_1_1_4,
-	SNOR_CMD_READ_1_4_4,
-	SNOR_CMD_READ_4_4_4,
-	SNOR_CMD_READ_1_4_4_DTR,
-
-	/* Octo SPI */
-	SNOR_CMD_READ_1_1_8,
-	SNOR_CMD_READ_1_8_8,
-	SNOR_CMD_READ_8_8_8,
-	SNOR_CMD_READ_1_8_8_DTR,
-
-	SNOR_CMD_READ_MAX
-};
-
-enum spi_nor_pp_command_index {
-	SNOR_CMD_PP,
-
-	/* Quad SPI */
-	SNOR_CMD_PP_1_1_4,
-	SNOR_CMD_PP_1_4_4,
-	SNOR_CMD_PP_4_4_4,
-
-	/* Octo SPI */
-	SNOR_CMD_PP_1_1_8,
-	SNOR_CMD_PP_1_8_8,
-	SNOR_CMD_PP_8_8_8,
-
-	SNOR_CMD_PP_MAX
-};
-
-struct spi_nor_flash_parameter {
-	u64				size;
-	u32				page_size;
-
-	struct spi_nor_hwcaps		hwcaps;
-	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
-	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
-
-	int (*quad_enable)(struct spi_nor *nor);
-};
-
 static void
 spi_nor_set_read_settings(struct spi_nor_read_command *read,
 			  u8 num_mode_clocks,
@@ -1593,119 +1880,7 @@
 	return ret;
 }
 
-struct sfdp_parameter_header {
-	u8		id_lsb;
-	u8		minor;
-	u8		major;
-	u8		length; /* in double words */
-	u8		parameter_table_pointer[3]; /* byte address */
-	u8		id_msb;
-};
-
-#define SFDP_PARAM_HEADER_ID(p)	(((p)->id_msb << 8) | (p)->id_lsb)
-#define SFDP_PARAM_HEADER_PTP(p) \
-	(((p)->parameter_table_pointer[2] << 16) | \
-	 ((p)->parameter_table_pointer[1] <<  8) | \
-	 ((p)->parameter_table_pointer[0] <<  0))
-
-#define SFDP_BFPT_ID		0xff00	/* Basic Flash Parameter Table */
-#define SFDP_SECTOR_MAP_ID	0xff81	/* Sector Map Table */
-#define SFDP_SST_ID		0x01bf	/* Manufacturer specific Table */
-
-#define SFDP_SIGNATURE		0x50444653U
-#define SFDP_JESD216_MAJOR	1
-#define SFDP_JESD216_MINOR	0
-#define SFDP_JESD216A_MINOR	5
-#define SFDP_JESD216B_MINOR	6
-
-struct sfdp_header {
-	u32		signature; /* Ox50444653U <=> "SFDP" */
-	u8		minor;
-	u8		major;
-	u8		nph; /* 0-base number of parameter headers */
-	u8		unused;
-
-	/* Basic Flash Parameter Table. */
-	struct sfdp_parameter_header	bfpt_header;
-};
-
-/* Basic Flash Parameter Table */
-
-/*
- * JESD216 rev B defines a Basic Flash Parameter Table of 16 DWORDs.
- * They are indexed from 1 but C arrays are indexed from 0.
- */
-#define BFPT_DWORD(i)		((i) - 1)
-#define BFPT_DWORD_MAX		16
-
-/* The first version of JESB216 defined only 9 DWORDs. */
-#define BFPT_DWORD_MAX_JESD216			9
-
-/* 1st DWORD. */
-#define BFPT_DWORD1_FAST_READ_1_1_2		BIT(16)
-#define BFPT_DWORD1_ADDRESS_BYTES_MASK		GENMASK(18, 17)
-#define BFPT_DWORD1_ADDRESS_BYTES_3_ONLY	(0x0UL << 17)
-#define BFPT_DWORD1_ADDRESS_BYTES_3_OR_4	(0x1UL << 17)
-#define BFPT_DWORD1_ADDRESS_BYTES_4_ONLY	(0x2UL << 17)
-#define BFPT_DWORD1_DTR				BIT(19)
-#define BFPT_DWORD1_FAST_READ_1_2_2		BIT(20)
-#define BFPT_DWORD1_FAST_READ_1_4_4		BIT(21)
-#define BFPT_DWORD1_FAST_READ_1_1_4		BIT(22)
-
-/* 5th DWORD. */
-#define BFPT_DWORD5_FAST_READ_2_2_2		BIT(0)
-#define BFPT_DWORD5_FAST_READ_4_4_4		BIT(4)
-
-/* 11th DWORD. */
-#define BFPT_DWORD11_PAGE_SIZE_SHIFT		4
-#define BFPT_DWORD11_PAGE_SIZE_MASK		GENMASK(7, 4)
-
-/* 15th DWORD. */
-
-/*
- * (from JESD216 rev B)
- * Quad Enable Requirements (QER):
- * - 000b: Device does not have a QE bit. Device detects 1-1-4 and 1-4-4
- *         reads based on instruction. DQ3/HOLD# functions are hold during
- *         instruction phase.
- * - 001b: QE is bit 1 of status register 2. It is set via Write Status with
- *         two data bytes where bit 1 of the second byte is one.
- *         [...]
- *         Writing only one byte to the status register has the side-effect of
- *         clearing status register 2, including the QE bit. The 100b code is
- *         used if writing one byte to the status register does not modify
- *         status register 2.
- * - 010b: QE is bit 6 of status register 1. It is set via Write Status with
- *         one data byte where bit 6 is one.
- *         [...]
- * - 011b: QE is bit 7 of status register 2. It is set via Write status
- *         register 2 instruction 3Eh with one data byte where bit 7 is one.
- *         [...]
- *         The status register 2 is read using instruction 3Fh.
- * - 100b: QE is bit 1 of status register 2. It is set via Write Status with
- *         two data bytes where bit 1 of the second byte is one.
- *         [...]
- *         In contrast to the 001b code, writing one byte to the status
- *         register does not modify status register 2.
- * - 101b: QE is bit 1 of status register 2. Status register 1 is read using
- *         Read Status instruction 05h. Status register2 is read using
- *         instruction 35h. QE is set via Writ Status instruction 01h with
- *         two data bytes where bit 1 of the second byte is one.
- *         [...]
- */
-#define BFPT_DWORD15_QER_MASK			GENMASK(22, 20)
-#define BFPT_DWORD15_QER_NONE			(0x0UL << 20) /* Micron */
-#define BFPT_DWORD15_QER_SR2_BIT1_BUGGY		(0x1UL << 20)
-#define BFPT_DWORD15_QER_SR1_BIT6		(0x2UL << 20) /* Macronix */
-#define BFPT_DWORD15_QER_SR2_BIT7		(0x3UL << 20)
-#define BFPT_DWORD15_QER_SR2_BIT1_NO_RD		(0x4UL << 20)
-#define BFPT_DWORD15_QER_SR2_BIT1		(0x5UL << 20) /* Spansion */
-
-struct sfdp_bfpt {
-	u32	dwords[BFPT_DWORD_MAX];
-};
-
-/* Fast Read settings. */
+/* Fast Read settings. */
 
 static void
 spi_nor_set_read_settings_from_bfpt(struct spi_nor_read_command *read,
@@ -1816,6 +1991,18 @@
 
 static int spi_nor_hwcaps_read2cmd(u32 hwcaps);
 
+static int
+spi_nor_post_bfpt_fixups(struct spi_nor *nor,
+			 const struct sfdp_parameter_header *bfpt_header,
+			 const struct sfdp_bfpt *bfpt,
+			 struct spi_nor_flash_parameter *params)
+{
+	if (nor->fixups && nor->fixups->post_bfpt)
+		return nor->fixups->post_bfpt(nor, bfpt_header, bfpt, params);
+
+	return 0;
+}
+
 /**
  * spi_nor_parse_bfpt() - read and parse the Basic Flash Parameter Table.
  * @nor:		pointer to a 'struct spi_nor'
@@ -1953,8 +2140,9 @@
 	}
 
 	/* Stop here if not JESD216 rev A or later. */
-	if (bfpt_header->length < BFPT_DWORD_MAX)
-		return 0;
+	if (bfpt_header->length == BFPT_DWORD_MAX_JESD216)
+		return spi_nor_post_bfpt_fixups(nor, bfpt_header, &bfpt,
+						params);
 
 	/* Page size: this field specifies 'N' so the page size = 2^N bytes. */
 	params->page_size = bfpt.dwords[BFPT_DWORD(11)];
@@ -1984,10 +2172,38 @@
 		break;
 #endif
 	default:
+		dev_dbg(nor->dev, "BFPT QER reserved value used\n");
+		break;
+	}
+
+	/* Soft Reset support. */
+	if (bfpt.dwords[BFPT_DWORD(16)] & BFPT_DWORD16_SOFT_RST)
+		nor->flags |= SNOR_F_SOFT_RESET;
+
+	/* Stop here if JESD216 rev B. */
+	if (bfpt_header->length == BFPT_DWORD_MAX_JESD216B)
+		return spi_nor_post_bfpt_fixups(nor, bfpt_header, &bfpt,
+						params);
+
+	/* 8D-8D-8D command extension. */
+	switch (bfpt.dwords[BFPT_DWORD(18)] & BFPT_DWORD18_CMD_EXT_MASK) {
+	case BFPT_DWORD18_CMD_EXT_REP:
+		nor->cmd_ext_type = SPI_NOR_EXT_REPEAT;
+		break;
+
+	case BFPT_DWORD18_CMD_EXT_INV:
+		nor->cmd_ext_type = SPI_NOR_EXT_INVERT;
+		break;
+
+	case BFPT_DWORD18_CMD_EXT_RES:
 		return -EINVAL;
+
+	case BFPT_DWORD18_CMD_EXT_16B:
+		dev_err(nor->dev, "16-bit opcodes not supported\n");
+		return -ENOTSUPP;
 	}
 
-	return 0;
+	return spi_nor_post_bfpt_fixups(nor, bfpt_header, &bfpt, params);
 }
 
 /**
@@ -2015,6 +2231,86 @@
 
 	ret = spi_nor_read_sfdp(nor, addr, size, nor->manufacturer_sfdp);
 
+	return ret;
+}
+
+/**
+ * spi_nor_parse_profile1() - parse the xSPI Profile 1.0 table
+ * @nor:		pointer to a 'struct spi_nor'
+ * @profile1_header:	pointer to the 'struct sfdp_parameter_header' describing
+ *			the 4-Byte Address Instruction Table length and version.
+ * @params:		pointer to the 'struct spi_nor_flash_parameter' to be.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_parse_profile1(struct spi_nor *nor,
+				  const struct sfdp_parameter_header *profile1_header,
+				  struct spi_nor_flash_parameter *params)
+{
+	u32 *table, opcode, addr;
+	size_t len;
+	int ret, i;
+	u8 dummy;
+
+	len = profile1_header->length * sizeof(*table);
+	table = kmalloc(len, GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	addr = SFDP_PARAM_HEADER_PTP(profile1_header);
+	ret = spi_nor_read_sfdp(nor, addr, len, table);
+	if (ret)
+		goto out;
+
+	/* Fix endianness of the table DWORDs. */
+	for (i = 0; i < profile1_header->length; i++)
+		table[i] = le32_to_cpu(table[i]);
+
+	/* Get 8D-8D-8D fast read opcode and dummy cycles. */
+	opcode = FIELD_GET(PROFILE1_DWORD1_RD_FAST_CMD, table[0]);
+
+	/*
+	 * We don't know what speed the controller is running at. Find the
+	 * dummy cycles for the fastest frequency the flash can run at to be
+	 * sure we are never short of dummy cycles. A value of 0 means the
+	 * frequency is not supported.
+	 *
+	 * Default to PROFILE1_DUMMY_DEFAULT if we don't find anything, and let
+	 * flashes set the correct value if needed in their fixup hooks.
+	 */
+	dummy = FIELD_GET(PROFILE1_DWORD4_DUMMY_200MHZ, table[3]);
+	if (!dummy)
+		dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_166MHZ, table[4]);
+	if (!dummy)
+		dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_133MHZ, table[4]);
+	if (!dummy)
+		dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_100MHZ, table[4]);
+	if (!dummy)
+		dummy = PROFILE1_DUMMY_DEFAULT;
+
+	/* Round up to an even value to avoid tripping controllers up. */
+	dummy = ROUND_UP_TO(dummy, 2);
+
+	/* Update the fast read settings. */
+	spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_8_8_8_DTR],
+				  0, dummy, opcode,
+				  SNOR_PROTO_8_8_8_DTR);
+
+	/*
+	 * Set the Read Status Register dummy cycles and dummy address bytes.
+	 */
+	if (table[0] & PROFILE1_DWORD1_RDSR_DUMMY)
+		params->rdsr_dummy = 8;
+	else
+		params->rdsr_dummy = 4;
+
+	if (table[0] & PROFILE1_DWORD1_RDSR_ADDR_BYTES)
+		params->rdsr_addr_nbytes = 4;
+	else
+		params->rdsr_addr_nbytes = 0;
+
+out:
+	kfree(table);
 	return ret;
 }
 
@@ -2120,6 +2416,10 @@
 			err = spi_nor_parse_microchip_sfdp(nor, param_header);
 			break;
 
+		case SFDP_PROFILE1_ID:
+			err = spi_nor_parse_profile1(nor, param_header, params);
+			break;
+
 		default:
 			break;
 		}
@@ -2150,6 +2450,29 @@
 }
 #endif /* SPI_FLASH_SFDP_SUPPORT */
 
+/**
+ * spi_nor_post_sfdp_fixups() - Updates the flash's parameters and settings
+ * after SFDP has been parsed (is also called for SPI NORs that do not
+ * support RDSFDP).
+ * @nor:	pointer to a 'struct spi_nor'
+ *
+ * Typically used to tweak various parameters that could not be extracted by
+ * other means (i.e. when information provided by the SFDP/flash_info tables
+ * are incomplete or wrong).
+ */
+static void spi_nor_post_sfdp_fixups(struct spi_nor *nor,
+				     struct spi_nor_flash_parameter *params)
+{
+	if (nor->fixups && nor->fixups->post_sfdp)
+		nor->fixups->post_sfdp(nor, params);
+}
+
+static void spi_nor_default_init_fixups(struct spi_nor *nor)
+{
+	if (nor->fixups && nor->fixups->default_init)
+		nor->fixups->default_init(nor);
+}
+
 static int spi_nor_init_params(struct spi_nor *nor,
 			       const struct flash_info *info,
 			       struct spi_nor_flash_parameter *params)
@@ -2195,11 +2518,25 @@
 					  SNOR_PROTO_1_1_8);
 	}
 
+	if (info->flags & SPI_NOR_OCTAL_DTR_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_8_8_8_DTR;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_8_8_8_DTR],
+					  0, 20, SPINOR_OP_READ_FAST,
+					  SNOR_PROTO_8_8_8_DTR);
+	}
+
 	/* Page Program settings. */
 	params->hwcaps.mask |= SNOR_HWCAPS_PP;
 	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
 				SPINOR_OP_PP, SNOR_PROTO_1_1_1);
 
+	/*
+	 * Since xSPI Page Program opcode is backward compatible with
+	 * Legacy SPI, use Legacy SPI opcode there as well.
+	 */
+	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP_8_8_8_DTR],
+				SPINOR_OP_PP, SNOR_PROTO_8_8_8_DTR);
+
 	if (info->flags & SPI_NOR_QUAD_READ) {
 		params->hwcaps.mask |= SNOR_HWCAPS_PP_1_1_4;
 		spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP_1_1_4],
@@ -2229,10 +2566,13 @@
 		}
 	}
 
+	spi_nor_default_init_fixups(nor);
+
 	/* Override the parameters with data read from SFDP tables. */
 	nor->addr_width = 0;
 	nor->mtd.erasesize = 0;
-	if ((info->flags & (SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)) &&
+	if ((info->flags & (SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+	     SPI_NOR_OCTAL_DTR_READ)) &&
 	    !(info->flags & SPI_NOR_SKIP_SFDP)) {
 		struct spi_nor_flash_parameter sfdp_params;
 
@@ -2245,6 +2585,8 @@
 		}
 	}
 
+	spi_nor_post_sfdp_fixups(nor, params);
+
 	return 0;
 }
 
@@ -2277,6 +2619,7 @@
 		{ SNOR_HWCAPS_READ_1_8_8,	SNOR_CMD_READ_1_8_8 },
 		{ SNOR_HWCAPS_READ_8_8_8,	SNOR_CMD_READ_8_8_8 },
 		{ SNOR_HWCAPS_READ_1_8_8_DTR,	SNOR_CMD_READ_1_8_8_DTR },
+		{ SNOR_HWCAPS_READ_8_8_8_DTR,	SNOR_CMD_READ_8_8_8_DTR },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_read2cmd,
@@ -2293,11 +2636,202 @@
 		{ SNOR_HWCAPS_PP_1_1_8,		SNOR_CMD_PP_1_1_8 },
 		{ SNOR_HWCAPS_PP_1_8_8,		SNOR_CMD_PP_1_8_8 },
 		{ SNOR_HWCAPS_PP_8_8_8,		SNOR_CMD_PP_8_8_8 },
+		{ SNOR_HWCAPS_PP_8_8_8_DTR,	SNOR_CMD_PP_8_8_8_DTR },
 	};
 
 	return spi_nor_hwcaps2cmd(hwcaps, hwcaps_pp2cmd,
 				  ARRAY_SIZE(hwcaps_pp2cmd));
 }
+
+#ifdef CONFIG_SPI_FLASH_SMART_HWCAPS
+/**
+ * spi_nor_check_op - check if the operation is supported by controller
+ * @nor:        pointer to a 'struct spi_nor'
+ * @op:         pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_check_op(struct spi_nor *nor,
+			    struct spi_mem_op *op)
+{
+	/*
+	 * First test with 4 address bytes. The opcode itself might be a 3B
+	 * addressing opcode but we don't care, because SPI controller
+	 * implementation should not check the opcode, but just the sequence.
+	 */
+	op->addr.nbytes = 4;
+	if (!spi_mem_supports_op(nor->spi, op)) {
+		if (nor->mtd.size > SZ_16M)
+			return -ENOTSUPP;
+
+		/* If flash size <= 16MB, 3 address bytes are sufficient */
+		op->addr.nbytes = 3;
+		if (!spi_mem_supports_op(nor->spi, op))
+			return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * spi_nor_check_readop - check if the read op is supported by controller
+ * @nor:         pointer to a 'struct spi_nor'
+ * @read:        pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_check_readop(struct spi_nor *nor,
+				const struct spi_nor_read_command *read)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(read->opcode, 0),
+					  SPI_MEM_OP_ADDR(3, 0, 0),
+					  SPI_MEM_OP_DUMMY(1, 0),
+					  SPI_MEM_OP_DATA_IN(2, NULL, 0));
+
+	spi_nor_setup_op(nor, &op, read->proto);
+
+	op.dummy.nbytes = (read->num_mode_clocks + read->num_wait_states) *
+			  op.dummy.buswidth / 8;
+	if (spi_nor_protocol_is_dtr(nor->read_proto))
+		op.dummy.nbytes *= 2;
+
+	return spi_nor_check_op(nor, &op);
+}
+
+/**
+ * spi_nor_check_pp - check if the page program op is supported by controller
+ * @nor:         pointer to a 'struct spi_nor'
+ * @pp:          pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_check_pp(struct spi_nor *nor,
+			    const struct spi_nor_pp_command *pp)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(pp->opcode, 0),
+					  SPI_MEM_OP_ADDR(3, 0, 0),
+					  SPI_MEM_OP_NO_DUMMY,
+					  SPI_MEM_OP_DATA_OUT(2, NULL, 0));
+
+	spi_nor_setup_op(nor, &op, pp->proto);
+
+	return spi_nor_check_op(nor, &op);
+}
+
+/**
+ * spi_nor_adjust_hwcaps - Find optimal Read/Write protocol based on SPI
+ *                         controller capabilities
+ * @nor:        pointer to a 'struct spi_nor'
+ * @params:     pointer to the 'struct spi_nor_flash_parameter'
+ *              representing SPI NOR flash capabilities
+ * @hwcaps:     pointer to resulting capabilities after adjusting
+ *              according to controller and flash's capability
+ *
+ * Discard caps based on what the SPI controller actually supports (using
+ * spi_mem_supports_op()).
+ */
+static void
+spi_nor_adjust_hwcaps(struct spi_nor *nor,
+		      const struct spi_nor_flash_parameter *params,
+		      u32 *hwcaps)
+{
+	unsigned int cap;
+
+	/*
+	 * Enable all caps by default. We will mask them after checking what's
+	 * really supported using spi_mem_supports_op().
+	 */
+	*hwcaps = SNOR_HWCAPS_ALL;
+
+	/* X-X-X modes are not supported yet, mask them all. */
+	*hwcaps &= ~SNOR_HWCAPS_X_X_X;
+
+	/*
+	 * If the reset line is broken, we do not want to enter a stateful
+	 * mode.
+	 */
+	if (nor->flags & SNOR_F_BROKEN_RESET)
+		*hwcaps &= ~(SNOR_HWCAPS_X_X_X | SNOR_HWCAPS_X_X_X_DTR);
+
+	for (cap = 0; cap < sizeof(*hwcaps) * BITS_PER_BYTE; cap++) {
+		int rdidx, ppidx;
+
+		if (!(*hwcaps & BIT(cap)))
+			continue;
+
+		rdidx = spi_nor_hwcaps_read2cmd(BIT(cap));
+		if (rdidx >= 0 &&
+		    spi_nor_check_readop(nor, &params->reads[rdidx]))
+			*hwcaps &= ~BIT(cap);
+
+		ppidx = spi_nor_hwcaps_pp2cmd(BIT(cap));
+		if (ppidx < 0)
+			continue;
+
+		if (spi_nor_check_pp(nor, &params->page_programs[ppidx]))
+			*hwcaps &= ~BIT(cap);
+	}
+}
+#else
+/**
+ * spi_nor_adjust_hwcaps - Find optimal Read/Write protocol based on SPI
+ *                         controller capabilities
+ * @nor:        pointer to a 'struct spi_nor'
+ * @params:     pointer to the 'struct spi_nor_flash_parameter'
+ *              representing SPI NOR flash capabilities
+ * @hwcaps:     pointer to resulting capabilities after adjusting
+ *              according to controller and flash's capability
+ *
+ * Select caps based on what the SPI controller and SPI flash both support.
+ */
+static void
+spi_nor_adjust_hwcaps(struct spi_nor *nor,
+		      const struct spi_nor_flash_parameter *params,
+		      u32 *hwcaps)
+{
+	struct spi_slave *spi = nor->spi;
+	u32 ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
+			    SNOR_HWCAPS_READ_4_4_4 |
+			    SNOR_HWCAPS_READ_8_8_8 |
+			    SNOR_HWCAPS_PP_4_4_4   |
+			    SNOR_HWCAPS_PP_8_8_8);
+	u32 spi_hwcaps = (SNOR_HWCAPS_READ | SNOR_HWCAPS_READ_FAST |
+			  SNOR_HWCAPS_PP);
+
+	/* Get the hardware capabilities the SPI controller supports. */
+	if (spi->mode & SPI_RX_OCTAL) {
+		spi_hwcaps |= SNOR_HWCAPS_READ_1_1_8;
+
+		if (spi->mode & SPI_TX_OCTAL)
+			spi_hwcaps |= (SNOR_HWCAPS_READ_1_8_8 |
+					SNOR_HWCAPS_PP_1_1_8 |
+					SNOR_HWCAPS_PP_1_8_8);
+	} else if (spi->mode & SPI_RX_QUAD) {
+		spi_hwcaps |= SNOR_HWCAPS_READ_1_1_4;
+
+		if (spi->mode & SPI_TX_QUAD)
+			spi_hwcaps |= (SNOR_HWCAPS_READ_1_4_4 |
+					SNOR_HWCAPS_PP_1_1_4 |
+					SNOR_HWCAPS_PP_1_4_4);
+	} else if (spi->mode & SPI_RX_DUAL) {
+		spi_hwcaps |= SNOR_HWCAPS_READ_1_1_2;
+
+		if (spi->mode & SPI_TX_DUAL)
+			spi_hwcaps |= SNOR_HWCAPS_READ_1_2_2;
+	}
+
+	/*
+	 * Keep only the hardware capabilities supported by both the SPI
+	 * controller and the SPI flash memory.
+	 */
+	*hwcaps = spi_hwcaps & params->hwcaps.mask;
+	if (*hwcaps & ignored_mask) {
+		dev_dbg(nor->dev,
+			"SPI n-n-n protocols are not supported yet.\n");
+		*hwcaps &= ~ignored_mask;
+	}
+}
+#endif /* CONFIG_SPI_FLASH_SMART_HWCAPS */
 
 static int spi_nor_select_read(struct spi_nor *nor,
 			       const struct spi_nor_flash_parameter *params,
@@ -2377,31 +2911,15 @@
 	return 0;
 }
 
-static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
-			 const struct spi_nor_flash_parameter *params,
-			 const struct spi_nor_hwcaps *hwcaps)
+static int spi_nor_default_setup(struct spi_nor *nor,
+				 const struct flash_info *info,
+				 const struct spi_nor_flash_parameter *params)
 {
-	u32 ignored_mask, shared_mask;
+	u32 shared_mask;
 	bool enable_quad_io;
 	int err;
 
-	/*
-	 * Keep only the hardware capabilities supported by both the SPI
-	 * controller and the SPI flash memory.
-	 */
-	shared_mask = hwcaps->mask & params->hwcaps.mask;
-
-	/* SPI n-n-n protocols are not supported yet. */
-	ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
-			SNOR_HWCAPS_READ_4_4_4 |
-			SNOR_HWCAPS_READ_8_8_8 |
-			SNOR_HWCAPS_PP_4_4_4 |
-			SNOR_HWCAPS_PP_8_8_8);
-	if (shared_mask & ignored_mask) {
-		dev_dbg(nor->dev,
-			"SPI n-n-n protocols are not supported yet.\n");
-		shared_mask &= ~ignored_mask;
-	}
+	spi_nor_adjust_hwcaps(nor, params, &shared_mask);
 
 	/* Select the (Fast) Read command. */
 	err = spi_nor_select_read(nor, params, shared_mask);
@@ -2438,10 +2956,315 @@
 	return 0;
 }
 
+static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
+			 const struct spi_nor_flash_parameter *params)
+{
+	if (!nor->setup)
+		return 0;
+
+	return nor->setup(nor, info, params);
+}
+
+#ifdef CONFIG_SPI_FLASH_S28HS512T
+/**
+ * spi_nor_cypress_octal_dtr_enable() - Enable octal DTR on Cypress flashes.
+ * @nor:		pointer to a 'struct spi_nor'
+ *
+ * This also sets the memory access latency cycles to 24 to allow the flash to
+ * run at up to 200MHz.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_cypress_octal_dtr_enable(struct spi_nor *nor)
+{
+	struct spi_mem_op op;
+	u8 buf;
+	u8 addr_width = 3;
+	int ret;
+
+	/* Use 24 dummy cycles for memory array reads. */
+	ret = write_enable(nor);
+	if (ret)
+		return ret;
+
+	buf = SPINOR_REG_CYPRESS_CFR2V_MEMLAT_11_24;
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WR_ANY_REG, 1),
+			SPI_MEM_OP_ADDR(addr_width, SPINOR_REG_CYPRESS_CFR2V, 1),
+			SPI_MEM_OP_NO_DUMMY,
+			SPI_MEM_OP_DATA_OUT(1, &buf, 1));
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret) {
+		dev_warn(nor->dev,
+			 "failed to set default memory latency value: %d\n",
+			 ret);
+		return ret;
+	}
+	ret = spi_nor_wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	nor->read_dummy = 24;
+
+	/* Set the octal and DTR enable bits. */
+	ret = write_enable(nor);
+	if (ret)
+		return ret;
+
+	buf = SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN;
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WR_ANY_REG, 1),
+			SPI_MEM_OP_ADDR(addr_width, SPINOR_REG_CYPRESS_CFR5V, 1),
+			SPI_MEM_OP_NO_DUMMY,
+			SPI_MEM_OP_DATA_OUT(1, &buf, 1));
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret) {
+		dev_warn(nor->dev, "Failed to enable octal DTR mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int s28hs512t_erase_non_uniform(struct spi_nor *nor, loff_t addr)
+{
+	/* Factory default configuration: 32 x 4 KiB sectors at bottom. */
+	return spansion_erase_non_uniform(nor, addr, SPINOR_OP_S28_SE_4K,
+					  0, SZ_128K);
+}
+
+static int s28hs512t_setup(struct spi_nor *nor, const struct flash_info *info,
+			   const struct spi_nor_flash_parameter *params)
+{
+	struct spi_mem_op op;
+	u8 buf;
+	u8 addr_width = 3;
+	int ret;
+
+	ret = spi_nor_wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check CFR3V to check if non-uniform sector mode is selected. If it
+	 * is, set the erase hook to the non-uniform erase procedure.
+	 */
+	op = (struct spi_mem_op)
+		SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RD_ANY_REG, 1),
+			   SPI_MEM_OP_ADDR(addr_width,
+					   SPINOR_REG_CYPRESS_CFR3V, 1),
+			   SPI_MEM_OP_NO_DUMMY,
+			   SPI_MEM_OP_DATA_IN(1, &buf, 1));
+
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret)
+		return ret;
+
+	if (!(buf & SPINOR_REG_CYPRESS_CFR3V_UNISECT))
+		nor->erase = s28hs512t_erase_non_uniform;
+
+	return spi_nor_default_setup(nor, info, params);
+}
+
+static void s28hs512t_default_init(struct spi_nor *nor)
+{
+	nor->octal_dtr_enable = spi_nor_cypress_octal_dtr_enable;
+	nor->setup = s28hs512t_setup;
+}
+
+static void s28hs512t_post_sfdp_fixup(struct spi_nor *nor,
+				      struct spi_nor_flash_parameter *params)
+{
+	/*
+	 * On older versions of the flash the xSPI Profile 1.0 table has the
+	 * 8D-8D-8D Fast Read opcode as 0x00. But it actually should be 0xEE.
+	 */
+	if (params->reads[SNOR_CMD_READ_8_8_8_DTR].opcode == 0)
+		params->reads[SNOR_CMD_READ_8_8_8_DTR].opcode =
+			SPINOR_OP_CYPRESS_RD_FAST;
+
+	params->hwcaps.mask |= SNOR_HWCAPS_PP_8_8_8_DTR;
+
+	/* This flash is also missing the 4-byte Page Program opcode bit. */
+	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
+				SPINOR_OP_PP_4B, SNOR_PROTO_1_1_1);
+	/*
+	 * Since xSPI Page Program opcode is backward compatible with
+	 * Legacy SPI, use Legacy SPI opcode there as well.
+	 */
+	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP_8_8_8_DTR],
+				SPINOR_OP_PP_4B, SNOR_PROTO_8_8_8_DTR);
+
+	/*
+	 * The xSPI Profile 1.0 table advertises the number of additional
+	 * address bytes needed for Read Status Register command as 0 but the
+	 * actual value for that is 4.
+	 */
+	params->rdsr_addr_nbytes = 4;
+}
+
+static int s28hs512t_post_bfpt_fixup(struct spi_nor *nor,
+				     const struct sfdp_parameter_header *bfpt_header,
+				     const struct sfdp_bfpt *bfpt,
+				     struct spi_nor_flash_parameter *params)
+{
+	struct spi_mem_op op;
+	u8 buf;
+	u8 addr_width = 3;
+	int ret;
+
+	/*
+	 * The BFPT table advertises a 512B page size but the page size is
+	 * actually configurable (with the default being 256B). Read from
+	 * CFR3V[4] and set the correct size.
+	 */
+	op = (struct spi_mem_op)
+		SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RD_ANY_REG, 1),
+			   SPI_MEM_OP_ADDR(addr_width, SPINOR_REG_CYPRESS_CFR3V, 1),
+			   SPI_MEM_OP_NO_DUMMY,
+			   SPI_MEM_OP_DATA_IN(1, &buf, 1));
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret)
+		return ret;
+
+	if (buf & SPINOR_REG_CYPRESS_CFR3V_PGSZ)
+		params->page_size = 512;
+	else
+		params->page_size = 256;
+
+	/*
+	 * The BFPT advertises that it supports 4k erases, and the datasheet
+	 * says the same. But 4k erases did not work when testing. So, use 256k
+	 * erases for now.
+	 */
+	nor->erase_opcode = SPINOR_OP_SE_4B;
+	nor->mtd.erasesize = 0x40000;
+
+	return 0;
+}
+
+static struct spi_nor_fixups s28hs512t_fixups = {
+	.default_init = s28hs512t_default_init,
+	.post_sfdp = s28hs512t_post_sfdp_fixup,
+	.post_bfpt = s28hs512t_post_bfpt_fixup,
+};
+#endif /* CONFIG_SPI_FLASH_S28HS512T */
+
+#ifdef CONFIG_SPI_FLASH_MT35XU
+static int spi_nor_micron_octal_dtr_enable(struct spi_nor *nor)
+{
+	struct spi_mem_op op;
+	u8 buf;
+	u8 addr_width = 3;
+	int ret;
+
+	/* Set dummy cycles for Fast Read to the default of 20. */
+	ret = write_enable(nor);
+	if (ret)
+		return ret;
+
+	buf = 20;
+	op = (struct spi_mem_op)
+		SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_MT_WR_ANY_REG, 1),
+			   SPI_MEM_OP_ADDR(addr_width, SPINOR_REG_MT_CFR1V, 1),
+			   SPI_MEM_OP_NO_DUMMY,
+			   SPI_MEM_OP_DATA_OUT(1, &buf, 1));
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret)
+		return ret;
+
+	ret = spi_nor_wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	nor->read_dummy = 20;
+
+	ret = write_enable(nor);
+	if (ret)
+		return ret;
+
+	buf = SPINOR_MT_OCT_DTR;
+	op = (struct spi_mem_op)
+		SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_MT_WR_ANY_REG, 1),
+			   SPI_MEM_OP_ADDR(addr_width, SPINOR_REG_MT_CFR0V, 1),
+			   SPI_MEM_OP_NO_DUMMY,
+			   SPI_MEM_OP_DATA_OUT(1, &buf, 1));
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret) {
+		dev_err(nor->dev, "Failed to enable octal DTR mode\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mt35xu512aba_default_init(struct spi_nor *nor)
+{
+	nor->octal_dtr_enable = spi_nor_micron_octal_dtr_enable;
+}
+
+static void mt35xu512aba_post_sfdp_fixup(struct spi_nor *nor,
+					 struct spi_nor_flash_parameter *params)
+{
+	/* Set the Fast Read settings. */
+	params->hwcaps.mask |= SNOR_HWCAPS_READ_8_8_8_DTR;
+	spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_8_8_8_DTR],
+				  0, 20, SPINOR_OP_MT_DTR_RD,
+				  SNOR_PROTO_8_8_8_DTR);
+
+	params->hwcaps.mask |= SNOR_HWCAPS_PP_8_8_8_DTR;
+
+	nor->cmd_ext_type = SPI_NOR_EXT_REPEAT;
+	params->rdsr_dummy = 8;
+	params->rdsr_addr_nbytes = 0;
+
+	/*
+	 * The BFPT quad enable field is set to a reserved value so the quad
+	 * enable function is ignored by spi_nor_parse_bfpt(). Make sure we
+	 * disable it.
+	 */
+	params->quad_enable = NULL;
+}
+
+static struct spi_nor_fixups mt35xu512aba_fixups = {
+	.default_init = mt35xu512aba_default_init,
+	.post_sfdp = mt35xu512aba_post_sfdp_fixup,
+};
+#endif /* CONFIG_SPI_FLASH_MT35XU */
+
+/** spi_nor_octal_dtr_enable() - enable Octal DTR I/O if needed
+ * @nor:                 pointer to a 'struct spi_nor'
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_octal_dtr_enable(struct spi_nor *nor)
+{
+	int ret;
+
+	if (!nor->octal_dtr_enable)
+		return 0;
+
+	if (!(nor->read_proto == SNOR_PROTO_8_8_8_DTR &&
+	      nor->write_proto == SNOR_PROTO_8_8_8_DTR))
+		return 0;
+
+	ret = nor->octal_dtr_enable(nor);
+	if (ret)
+		return ret;
+
+	nor->reg_proto = SNOR_PROTO_8_8_8_DTR;
+
+	return 0;
+}
+
 static int spi_nor_init(struct spi_nor *nor)
 {
 	int err;
 
+	err = spi_nor_octal_dtr_enable(nor);
+	if (err) {
+		dev_dbg(nor->dev, "Octal DTR mode not supported\n");
+		return err;
+	}
+
 	/*
 	 * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
 	 * with the software protection bits set
@@ -2465,6 +3288,7 @@
 	}
 
 	if (nor->addr_width == 4 &&
+	    !(nor->info->flags & SPI_NOR_OCTAL_DTR_READ) &&
 	    (JEDEC_MFR(nor->info) != SNOR_MFR_SPANSION) &&
 	    !(nor->info->flags & SPI_NOR_4B_OPCODES)) {
 		/*
@@ -2482,16 +3306,89 @@
 	return 0;
 }
 
+#ifdef CONFIG_SPI_FLASH_SOFT_RESET
+/**
+ * spi_nor_soft_reset() - perform the JEDEC Software Reset sequence
+ * @nor:	the spi_nor structure
+ *
+ * This function can be used to switch from Octal DTR mode to legacy mode on a
+ * flash that supports it. The soft reset is executed in Octal DTR mode.
+ *
+ * Return: 0 for success, -errno for failure.
+ */
+static int spi_nor_soft_reset(struct spi_nor *nor)
+{
+	struct spi_mem_op op;
+	int ret;
+	enum spi_nor_cmd_ext ext;
+
+	ext = nor->cmd_ext_type;
+	nor->cmd_ext_type = SPI_NOR_EXT_REPEAT;
+
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_SRSTEN, 0),
+			SPI_MEM_OP_NO_DUMMY,
+			SPI_MEM_OP_NO_ADDR,
+			SPI_MEM_OP_NO_DATA);
+	spi_nor_setup_op(nor, &op, SNOR_PROTO_8_8_8_DTR);
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret) {
+		dev_warn(nor->dev, "Software reset enable failed: %d\n", ret);
+		goto out;
+	}
+
+	op = (struct spi_mem_op)SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_SRST, 0),
+			SPI_MEM_OP_NO_DUMMY,
+			SPI_MEM_OP_NO_ADDR,
+			SPI_MEM_OP_NO_DATA);
+	spi_nor_setup_op(nor, &op, SNOR_PROTO_8_8_8_DTR);
+	ret = spi_mem_exec_op(nor->spi, &op);
+	if (ret) {
+		dev_warn(nor->dev, "Software reset failed: %d\n", ret);
+		goto out;
+	}
+
+	/*
+	 * Software Reset is not instant, and the delay varies from flash to
+	 * flash. Looking at a few flashes, most range somewhere below 100
+	 * microseconds. So, wait for 200ms just to be sure.
+	 */
+	udelay(SPI_NOR_SRST_SLEEP_LEN);
+
+out:
+	nor->cmd_ext_type = ext;
+	return ret;
+}
+#endif /* CONFIG_SPI_FLASH_SOFT_RESET */
+
+int spi_nor_remove(struct spi_nor *nor)
+{
+#ifdef CONFIG_SPI_FLASH_SOFT_RESET
+	if (nor->info->flags & SPI_NOR_OCTAL_DTR_READ &&
+	    nor->flags & SNOR_F_SOFT_RESET)
+		return spi_nor_soft_reset(nor);
+#endif
+
+	return 0;
+}
+
+void spi_nor_set_fixups(struct spi_nor *nor)
+{
+#ifdef CONFIG_SPI_FLASH_S28HS512T
+	if (!strcmp(nor->info->name, "s28hs512t"))
+		nor->fixups = &s28hs512t_fixups;
+#endif
+
+#ifdef CONFIG_SPI_FLASH_MT35XU
+	if (!strcmp(nor->info->name, "mt35xu512aba"))
+		nor->fixups = &mt35xu512aba_fixups;
+#endif
+}
+
 int spi_nor_scan(struct spi_nor *nor)
 {
 	struct spi_nor_flash_parameter params;
 	const struct flash_info *info = NULL;
 	struct mtd_info *mtd = &nor->mtd;
-	struct spi_nor_hwcaps hwcaps = {
-		.mask = SNOR_HWCAPS_READ |
-			SNOR_HWCAPS_READ_FAST |
-			SNOR_HWCAPS_PP,
-	};
 	struct spi_slave *spi = nor->spi;
 	int ret;
 
@@ -2504,30 +3401,42 @@
 	nor->read_reg = spi_nor_read_reg;
 	nor->write_reg = spi_nor_write_reg;
 
-	if (spi->mode & SPI_RX_OCTAL) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
-
-		if (spi->mode & SPI_TX_OCTAL)
-			hwcaps.mask |= (SNOR_HWCAPS_READ_1_8_8 |
-					SNOR_HWCAPS_PP_1_1_8 |
-					SNOR_HWCAPS_PP_1_8_8);
-	} else if (spi->mode & SPI_RX_QUAD) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
+	nor->setup = spi_nor_default_setup;
 
-		if (spi->mode & SPI_TX_QUAD)
-			hwcaps.mask |= (SNOR_HWCAPS_READ_1_4_4 |
-					SNOR_HWCAPS_PP_1_1_4 |
-					SNOR_HWCAPS_PP_1_4_4);
-	} else if (spi->mode & SPI_RX_DUAL) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
-
-		if (spi->mode & SPI_TX_DUAL)
-			hwcaps.mask |= SNOR_HWCAPS_READ_1_2_2;
-	}
+#ifdef CONFIG_SPI_FLASH_SOFT_RESET_ON_BOOT
+	/*
+	 * When the flash is handed to us in a stateful mode like 8D-8D-8D, it
+	 * is difficult to detect the mode the flash is in. One option is to
+	 * read SFDP in all modes and see which one gives the correct "SFDP"
+	 * signature, but not all flashes support SFDP in 8D-8D-8D mode.
+	 *
+	 * Further, even if you detect the mode of the flash via SFDP, you
+	 * still have the problem of actually reading the ID. The Read ID
+	 * command is not standardized across flash vendors. Flashes can have
+	 * different dummy cycles needed for reading the ID. Some flashes even
+	 * expect a 4-byte dummy address with the Read ID command. All this
+	 * information cannot be obtained from the SFDP table.
+	 *
+	 * So, perform a Software Reset sequence before reading the ID and
+	 * initializing the flash. A Soft Reset will bring back the flash in
+	 * its default protocol mode assuming no non-volatile configuration was
+	 * set. This will let us detect the flash even if ROM hands it to us in
+	 * Octal DTR mode.
+	 *
+	 * To accommodate cases where there is more than one flash on a board,
+	 * and only one of them needs a soft reset, failure to reset is not
+	 * made fatal, and we still try to read ID if possible.
+	 */
+	spi_nor_soft_reset(nor);
+#endif /* CONFIG_SPI_FLASH_SOFT_RESET_ON_BOOT */
 
 	info = spi_nor_read_id(nor);
 	if (IS_ERR_OR_NULL(info))
 		return -ENOENT;
+	nor->info = info;
+
+	spi_nor_set_fixups(nor);
+
 	/* Parse the Serial Flash Discoverable Parameters table. */
 	ret = spi_nor_init_params(nor, info, &params);
 	if (ret)
@@ -2595,15 +3504,22 @@
 	 * - set the SPI protocols for register and memory accesses.
 	 * - set the Quad Enable bit if needed (required by SPI x-y-4 protos).
 	 */
-	ret = spi_nor_setup(nor, info, &params, &hwcaps);
+	ret = spi_nor_setup(nor, info, &params);
 	if (ret)
 		return ret;
 
-	if (nor->addr_width) {
+	if (spi_nor_protocol_is_dtr(nor->read_proto)) {
+		 /* Always use 4-byte addresses in DTR mode. */
+		nor->addr_width = 4;
+	} else if (nor->addr_width) {
 		/* already configured from SFDP */
 	} else if (info->addr_width) {
 		nor->addr_width = info->addr_width;
-	} else if (mtd->size > SZ_16M) {
+	} else {
+		nor->addr_width = 3;
+	}
+
+	if (nor->addr_width == 3 && mtd->size > SZ_16M) {
 #ifndef CONFIG_SPI_FLASH_BAR
 		/* enable 4-byte addressing if the device exceeds 16MiB */
 		nor->addr_width = 4;
@@ -2617,8 +3533,6 @@
 	if (ret < 0)
 		return ret;
 #endif
-	} else {
-		nor->addr_width = 3;
 	}
 
 	if (nor->addr_width > SPI_NOR_MAX_ADDR_WIDTH) {
@@ -2628,11 +3542,12 @@
 	}
 
 	/* Send all the required SPI flash commands to initialize device */
-	nor->info = info;
 	ret = spi_nor_init(nor);
 	if (ret)
 		return ret;
 
+	nor->rdsr_dummy = params.rdsr_dummy;
+	nor->rdsr_addr_nbytes = params.rdsr_addr_nbytes;
 	nor->name = mtd->name;
 	nor->size = mtd->size;
 	nor->erase_size = mtd->erasesize;
diff --git a/drivers/mtd/spi/spi-nor-ids.c b/drivers/mtd/spi/spi-nor-ids.c
index 8d2b73b..59f2d3e 100644
--- a/drivers/mtd/spi/spi-nor-ids.c
+++ b/drivers/mtd/spi/spi-nor-ids.c
@@ -193,7 +193,9 @@
 	{ INFO("n25q00a",     0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
 	{ INFO("mt25ql01g",   0x21ba20, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
 	{ INFO("mt25qu02g",   0x20bb22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) },
-	{ INFO("mt35xu512aba", 0x2c5b1a, 0,  128 * 1024,  512, USE_FSR | SPI_NOR_OCTAL_READ | SPI_NOR_4B_OPCODES) },
+#ifdef CONFIG_SPI_FLASH_MT35XU
+	{ INFO("mt35xu512aba", 0x2c5b1a, 0,  128 * 1024,  512, USE_FSR | SPI_NOR_OCTAL_READ | SPI_NOR_4B_OPCODES | SPI_NOR_OCTAL_DTR_READ) },
+#endif /* CONFIG_SPI_FLASH_MT35XU */
 	{ INFO("mt35xu02g",  0x2c5b1c, 0, 128 * 1024,  2048, USE_FSR | SPI_NOR_OCTAL_READ | SPI_NOR_4B_OPCODES) },
 #endif
 #ifdef CONFIG_SPI_FLASH_SPANSION	/* SPANSION */
@@ -223,6 +225,9 @@
 	{ INFO("s25fl208k",  0x014014,      0,  64 * 1024,  16, SECT_4K | SPI_NOR_DUAL_READ) },
 	{ INFO("s25fl064l",  0x016017,      0,  64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) },
 	{ INFO("s25fl128l",  0x016018,      0,  64 * 1024, 256, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) },
+#ifdef CONFIG_SPI_FLASH_S28HS512T
+	{ INFO("s28hs512t",  0x345b1a,      0, 256 * 1024, 256, SPI_NOR_OCTAL_DTR_READ) },
+#endif
 #endif
 #ifdef CONFIG_SPI_FLASH_SST		/* SST */
 	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
diff --git a/drivers/mtd/spi/spi-nor-tiny.c b/drivers/mtd/spi/spi-nor-tiny.c
index b0aa97d..70061f1 100644
--- a/drivers/mtd/spi/spi-nor-tiny.c
+++ b/drivers/mtd/spi/spi-nor-tiny.c
@@ -555,28 +555,6 @@
 }
 #endif /* CONFIG_SPI_FLASH_SPANSION */
 
-struct spi_nor_read_command {
-	u8			num_mode_clocks;
-	u8			num_wait_states;
-	u8			opcode;
-	enum spi_nor_protocol	proto;
-};
-
-enum spi_nor_read_command_index {
-	SNOR_CMD_READ,
-	SNOR_CMD_READ_FAST,
-
-	/* Quad SPI */
-	SNOR_CMD_READ_1_1_4,
-
-	SNOR_CMD_READ_MAX
-};
-
-struct spi_nor_flash_parameter {
-	struct spi_nor_hwcaps		hwcaps;
-	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
-};
-
 static void
 spi_nor_set_read_settings(struct spi_nor_read_command *read,
 			  u8 num_mode_clocks,
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 1494c91..e317d8a 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -255,6 +255,13 @@
 	  Enable the MXS SPI controller driver. This driver can be used
 	  on the i.MX23 and i.MX28 SoCs.
 
+config SPI_MXIC
+	bool "Macronix MX25F0A SPI controller"
+	help
+	  Enable the Macronix MX25F0A SPI controller driver. This driver
+	  can be used to access the SPI flash on platforms embedding
+	  this Macronix IP core.
+
 config NXP_FSPI
 	bool "NXP FlexSPI driver"
 	depends on SPI_MEM
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index cfe4fae..3dc8308 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -51,6 +51,7 @@
 obj-$(CONFIG_PIC32_SPI) += pic32_spi.o
 obj-$(CONFIG_PL022_SPI) += pl022_spi.o
 obj-$(CONFIG_SPI_QUP) += spi-qup.o
+obj-$(CONFIG_SPI_MXIC) += spi-mxic.o
 obj-$(CONFIG_RENESAS_RPC_SPI) += renesas_rpc_spi.o
 obj-$(CONFIG_ROCKCHIP_SPI) += rk_spi.o
 obj-$(CONFIG_SANDBOX_SPI) += sandbox_spi.o
diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
index 6798043..d1b3808 100644
--- a/drivers/spi/cadence_qspi.c
+++ b/drivers/spi/cadence_qspi.c
@@ -20,6 +20,8 @@
 #include <linux/sizes.h>
 #include "cadence_qspi.h"
 
+#define NSEC_PER_SEC			1000000000L
+
 #define CQSPI_STIG_READ			0
 #define CQSPI_STIG_WRITE		1
 #define CQSPI_READ			2
@@ -41,20 +43,22 @@
 	return 0;
 }
 
-static int cadence_spi_read_id(void *reg_base, u8 len, u8 *idcode)
+static int cadence_spi_read_id(struct cadence_spi_plat *plat, u8 len,
+			       u8 *idcode)
 {
 	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(0x9F, 1),
 					  SPI_MEM_OP_NO_ADDR,
 					  SPI_MEM_OP_NO_DUMMY,
 					  SPI_MEM_OP_DATA_IN(len, idcode, 1));
 
-	return cadence_qspi_apb_command_read(reg_base, &op);
+	return cadence_qspi_apb_command_read(plat, &op);
 }
 
 /* Calibration sequence to determine the read data capture delay register */
 static int spi_calibration(struct udevice *bus, uint hz)
 {
 	struct cadence_spi_priv *priv = dev_get_priv(bus);
+	struct cadence_spi_plat *plat = dev_get_plat(bus);
 	void *base = priv->regbase;
 	unsigned int idcode = 0, temp = 0;
 	int err = 0, i, range_lo = -1, range_hi = -1;
@@ -69,7 +73,7 @@
 	cadence_qspi_apb_controller_enable(base);
 
 	/* read the ID which will be our golden value */
-	err = cadence_spi_read_id(base, 3, (u8 *)&idcode);
+	err = cadence_spi_read_id(plat, 3, (u8 *)&idcode);
 	if (err) {
 		puts("SF: Calibration failed (read)\n");
 		return err;
@@ -88,7 +92,7 @@
 		cadence_qspi_apb_controller_enable(base);
 
 		/* issue a RDID to get the ID value */
-		err = cadence_spi_read_id(base, 3, (u8 *)&temp);
+		err = cadence_spi_read_id(plat, 3, (u8 *)&temp);
 		if (err) {
 			puts("SF: Calibration failed (read)\n");
 			return err;
@@ -141,12 +145,20 @@
 	cadence_qspi_apb_controller_disable(priv->regbase);
 
 	/*
-	 * Calibration required for different current SCLK speed, requested
-	 * SCLK speed or chip select
+	 * If the device tree already provides a read delay value, use that
+	 * instead of calibrating.
 	 */
-	if (priv->previous_hz != hz ||
-	    priv->qspi_calibrated_hz != hz ||
-	    priv->qspi_calibrated_cs != spi_chip_select(bus)) {
+	if (plat->read_delay >= 0) {
+		cadence_spi_write_speed(bus, hz);
+		cadence_qspi_apb_readdata_capture(priv->regbase, 1,
+						  plat->read_delay);
+	} else if (priv->previous_hz != hz ||
+		   priv->qspi_calibrated_hz != hz ||
+		   priv->qspi_calibrated_cs != spi_chip_select(bus)) {
+		/*
+		 * Calibration required for different current SCLK speed,
+		 * requested SCLK speed or chip select
+		 */
 		err = spi_calibration(bus, hz);
 		if (err)
 			return err;
@@ -200,6 +212,8 @@
 		priv->qspi_is_init = 1;
 	}
 
+	plat->wr_delay = 50 * DIV_ROUND_UP(NSEC_PER_SEC, plat->ref_clk_hz);
+
 	return 0;
 }
 
@@ -259,10 +273,14 @@
 
 	switch (mode) {
 	case CQSPI_STIG_READ:
-		err = cadence_qspi_apb_command_read(base, op);
+		err = cadence_qspi_apb_command_read_setup(plat, op);
+		if (!err)
+			err = cadence_qspi_apb_command_read(plat, op);
 		break;
 	case CQSPI_STIG_WRITE:
-		err = cadence_qspi_apb_command_write(base, op);
+		err = cadence_qspi_apb_command_write_setup(plat, op);
+		if (!err)
+			err = cadence_qspi_apb_command_write(plat, op);
 		break;
 	case CQSPI_READ:
 		err = cadence_qspi_apb_read_setup(plat, op);
@@ -282,6 +300,26 @@
 	return err;
 }
 
+static bool cadence_spi_mem_supports_op(struct spi_slave *slave,
+					const struct spi_mem_op *op)
+{
+	bool all_true, all_false;
+
+	all_true = op->cmd.dtr && op->addr.dtr && op->dummy.dtr &&
+		   op->data.dtr;
+	all_false = !op->cmd.dtr && !op->addr.dtr && !op->dummy.dtr &&
+		    !op->data.dtr;
+
+	/* Mixed DTR modes not supported. */
+	if (!(all_true || all_false))
+		return false;
+
+	if (all_true)
+		return spi_mem_dtr_supports_op(slave, op);
+	else
+		return spi_mem_default_supports_op(slave, op);
+}
+
 static int cadence_spi_of_to_plat(struct udevice *bus)
 {
 	struct cadence_spi_plat *plat = dev_get_plat(bus);
@@ -320,6 +358,14 @@
 						 255);
 	plat->tchsh_ns = ofnode_read_u32_default(subnode, "cdns,tchsh-ns", 20);
 	plat->tslch_ns = ofnode_read_u32_default(subnode, "cdns,tslch-ns", 20);
+	/*
+	 * Read delay should be an unsigned value but we use a signed integer
+	 * so that negative values can indicate that the device tree did not
+	 * specify any signed values and we need to perform the calibration
+	 * sequence to find it out.
+	 */
+	plat->read_delay = ofnode_read_s32_default(subnode, "cdns,read-delay",
+						   -1);
 
 	debug("%s: regbase=%p ahbbase=%p max-frequency=%d page-size=%d\n",
 	      __func__, plat->regbase, plat->ahbbase, plat->max_hz,
@@ -330,6 +376,7 @@
 
 static const struct spi_controller_mem_ops cadence_spi_mem_ops = {
 	.exec_op = cadence_spi_mem_exec_op,
+	.supports_op = cadence_spi_mem_supports_op,
 };
 
 static const struct dm_spi_ops cadence_spi_ops = {
diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
index 64c5867..49b4011 100644
--- a/drivers/spi/cadence_qspi.h
+++ b/drivers/spi/cadence_qspi.h
@@ -26,6 +26,8 @@
 	u32		trigger_address;
 	fdt_addr_t	ahbsize;
 	bool		use_dac_mode;
+	int		read_delay;
+	u32		wr_delay;
 
 	/* Flash parameters */
 	u32		page_size;
@@ -34,6 +36,12 @@
 	u32		tsd2d_ns;
 	u32		tchsh_ns;
 	u32		tslch_ns;
+
+	/* Transaction protocol parameters. */
+	u8		inst_width;
+	u8		addr_width;
+	u8		data_width;
+	bool		dtr;
 };
 
 struct cadence_spi_priv {
@@ -57,9 +65,13 @@
 void cadence_qspi_apb_controller_disable(void *reg_base_addr);
 void cadence_qspi_apb_dac_mode_enable(void *reg_base);
 
-int cadence_qspi_apb_command_read(void *reg_base_addr,
+int cadence_qspi_apb_command_read_setup(struct cadence_spi_plat *plat,
+					const struct spi_mem_op *op);
+int cadence_qspi_apb_command_read(struct cadence_spi_plat *plat,
 				  const struct spi_mem_op *op);
-int cadence_qspi_apb_command_write(void *reg_base_addr,
+int cadence_qspi_apb_command_write_setup(struct cadence_spi_plat *plat,
+					 const struct spi_mem_op *op);
+int cadence_qspi_apb_command_write(struct cadence_spi_plat *plat,
 				   const struct spi_mem_op *op);
 
 int cadence_qspi_apb_read_setup(struct cadence_spi_plat *plat,
diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
index b051f46..c36a652 100644
--- a/drivers/spi/cadence_qspi_apb.c
+++ b/drivers/spi/cadence_qspi_apb.c
@@ -51,7 +51,7 @@
 #define CQSPI_STIG_DATA_LEN_MAX			8
 
 #define CQSPI_DUMMY_CLKS_PER_BYTE		8
-#define CQSPI_DUMMY_BYTES_MAX			4
+#define CQSPI_DUMMY_CLKS_MAX			31
 
 /****************************************************************************
  * Controller's configuration and status register (offset from QSPI_BASE)
@@ -65,6 +65,8 @@
 #define	CQSPI_REG_CONFIG_XIP_IMM		BIT(18)
 #define	CQSPI_REG_CONFIG_CHIPSELECT_LSB		10
 #define	CQSPI_REG_CONFIG_BAUD_LSB		19
+#define CQSPI_REG_CONFIG_DTR_PROTO		BIT(24)
+#define CQSPI_REG_CONFIG_DUAL_OPCODE		BIT(30)
 #define	CQSPI_REG_CONFIG_IDLE_LSB		31
 #define	CQSPI_REG_CONFIG_CHIPSELECT_MASK	0xF
 #define	CQSPI_REG_CONFIG_BAUD_MASK		0xF
@@ -83,6 +85,7 @@
 
 #define	CQSPI_REG_WR_INSTR			0x08
 #define	CQSPI_REG_WR_INSTR_OPCODE_LSB		0
+#define CQSPI_REG_WR_INSTR_TYPE_ADDR_LSB	12
 #define	CQSPI_REG_WR_INSTR_TYPE_DATA_LSB	16
 
 #define	CQSPI_REG_DELAY				0x0C
@@ -120,6 +123,9 @@
 #define	CQSPI_REG_SDRAMLEVEL_RD_MASK		0xFFFF
 #define	CQSPI_REG_SDRAMLEVEL_WR_MASK		0xFFFF
 
+#define CQSPI_REG_WR_COMPLETION_CTRL		0x38
+#define CQSPI_REG_WR_DISABLE_AUTO_POLL		BIT(14)
+
 #define	CQSPI_REG_IRQSTATUS			0x40
 #define	CQSPI_REG_IRQMASK			0x44
 
@@ -166,6 +172,11 @@
 #define	CQSPI_REG_CMDWRITEDATALOWER		0xA8
 #define	CQSPI_REG_CMDWRITEDATAUPPER		0xAC
 
+#define CQSPI_REG_OP_EXT_LOWER			0xE0
+#define CQSPI_REG_OP_EXT_READ_LSB		24
+#define CQSPI_REG_OP_EXT_WRITE_LSB		16
+#define CQSPI_REG_OP_EXT_STIG_LSB		0
+
 #define CQSPI_REG_IS_IDLE(base)					\
 	((readl(base + CQSPI_REG_CONFIG) >>		\
 		CQSPI_REG_CONFIG_IDLE_LSB) & 0x1)
@@ -203,6 +214,75 @@
 	writel(reg, reg_base + CQSPI_REG_CONFIG);
 }
 
+static unsigned int cadence_qspi_calc_dummy(const struct spi_mem_op *op,
+					    bool dtr)
+{
+	unsigned int dummy_clk;
+
+	dummy_clk = op->dummy.nbytes * (8 / op->dummy.buswidth);
+	if (dtr)
+		dummy_clk /= 2;
+
+	return dummy_clk;
+}
+
+static u32 cadence_qspi_calc_rdreg(struct cadence_spi_plat *plat)
+{
+	u32 rdreg = 0;
+
+	rdreg |= plat->inst_width << CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB;
+	rdreg |= plat->addr_width << CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB;
+	rdreg |= plat->data_width << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
+
+	return rdreg;
+}
+
+static int cadence_qspi_buswidth_to_inst_type(u8 buswidth)
+{
+	switch (buswidth) {
+	case 0:
+	case 1:
+		return CQSPI_INST_TYPE_SINGLE;
+
+	case 2:
+		return CQSPI_INST_TYPE_DUAL;
+
+	case 4:
+		return CQSPI_INST_TYPE_QUAD;
+
+	case 8:
+		return CQSPI_INST_TYPE_OCTAL;
+
+	default:
+		return -ENOTSUPP;
+	}
+}
+
+static int cadence_qspi_set_protocol(struct cadence_spi_plat *plat,
+				     const struct spi_mem_op *op)
+{
+	int ret;
+
+	plat->dtr = op->data.dtr && op->cmd.dtr && op->addr.dtr;
+
+	ret = cadence_qspi_buswidth_to_inst_type(op->cmd.buswidth);
+	if (ret < 0)
+		return ret;
+	plat->inst_width = ret;
+
+	ret = cadence_qspi_buswidth_to_inst_type(op->addr.buswidth);
+	if (ret < 0)
+		return ret;
+	plat->addr_width = ret;
+
+	ret = cadence_qspi_buswidth_to_inst_type(op->data.buswidth);
+	if (ret < 0)
+		return ret;
+	plat->data_width = ret;
+
+	return 0;
+}
+
 /* Return 1 if idle, otherwise return 0 (busy). */
 static unsigned int cadence_qspi_wait_idle(void *reg_base)
 {
@@ -434,21 +514,109 @@
 	return 0;
 }
 
+static int cadence_qspi_setup_opcode_ext(struct cadence_spi_plat *plat,
+					 const struct spi_mem_op *op,
+					 unsigned int shift)
+{
+	unsigned int reg;
+	u8 ext;
+
+	if (op->cmd.nbytes != 2)
+		return -EINVAL;
+
+	/* Opcode extension is the LSB. */
+	ext = op->cmd.opcode & 0xff;
+
+	reg = readl(plat->regbase + CQSPI_REG_OP_EXT_LOWER);
+	reg &= ~(0xff << shift);
+	reg |= ext << shift;
+	writel(reg, plat->regbase + CQSPI_REG_OP_EXT_LOWER);
+
+	return 0;
+}
+
+static int cadence_qspi_enable_dtr(struct cadence_spi_plat *plat,
+				   const struct spi_mem_op *op,
+				   unsigned int shift,
+				   bool enable)
+{
+	unsigned int reg;
+	int ret;
+
+	reg = readl(plat->regbase + CQSPI_REG_CONFIG);
+
+	if (enable) {
+		reg |= CQSPI_REG_CONFIG_DTR_PROTO;
+		reg |= CQSPI_REG_CONFIG_DUAL_OPCODE;
+
+		/* Set up command opcode extension. */
+		ret = cadence_qspi_setup_opcode_ext(plat, op, shift);
+		if (ret)
+			return ret;
+	} else {
+		reg &= ~CQSPI_REG_CONFIG_DTR_PROTO;
+		reg &= ~CQSPI_REG_CONFIG_DUAL_OPCODE;
+	}
+
+	writel(reg, plat->regbase + CQSPI_REG_CONFIG);
+
+	return 0;
+}
+
+int cadence_qspi_apb_command_read_setup(struct cadence_spi_plat *plat,
+					const struct spi_mem_op *op)
+{
+	int ret;
+	unsigned int reg;
+
+	ret = cadence_qspi_set_protocol(plat, op);
+	if (ret)
+		return ret;
+
+	ret = cadence_qspi_enable_dtr(plat, op, CQSPI_REG_OP_EXT_STIG_LSB,
+				      plat->dtr);
+	if (ret)
+		return ret;
+
+	reg = cadence_qspi_calc_rdreg(plat);
+	writel(reg, plat->regbase + CQSPI_REG_RD_INSTR);
+
+	return 0;
+}
+
 /* For command RDID, RDSR. */
-int cadence_qspi_apb_command_read(void *reg_base, const struct spi_mem_op *op)
+int cadence_qspi_apb_command_read(struct cadence_spi_plat *plat,
+				  const struct spi_mem_op *op)
 {
+	void *reg_base = plat->regbase;
 	unsigned int reg;
 	unsigned int read_len;
 	int status;
 	unsigned int rxlen = op->data.nbytes;
 	void *rxbuf = op->data.buf.in;
+	unsigned int dummy_clk;
+	u8 opcode;
 
 	if (rxlen > CQSPI_STIG_DATA_LEN_MAX || !rxbuf) {
 		printf("QSPI: Invalid input arguments rxlen %u\n", rxlen);
 		return -EINVAL;
 	}
 
+	if (plat->dtr)
+		opcode = op->cmd.opcode >> 8;
+	else
+		opcode = op->cmd.opcode;
+
+	reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+
-	reg = op->cmd.opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+	/* Set up dummy cycles. */
+	dummy_clk = cadence_qspi_calc_dummy(op, plat->dtr);
+	if (dummy_clk > CQSPI_DUMMY_CLKS_MAX)
+		return -ENOTSUPP;
+
+	if (dummy_clk)
+		reg |= (dummy_clk & CQSPI_REG_CMDCTRL_DUMMY_MASK)
+		     << CQSPI_REG_CMDCTRL_DUMMY_LSB;
 
 	reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB);
 
@@ -475,15 +643,39 @@
 	return 0;
 }
 
+int cadence_qspi_apb_command_write_setup(struct cadence_spi_plat *plat,
+					 const struct spi_mem_op *op)
+{
+	int ret;
+	unsigned int reg;
+
+	ret = cadence_qspi_set_protocol(plat, op);
+	if (ret)
+		return ret;
+
+	ret = cadence_qspi_enable_dtr(plat, op, CQSPI_REG_OP_EXT_STIG_LSB,
+				      plat->dtr);
+	if (ret)
+		return ret;
+
+	reg = cadence_qspi_calc_rdreg(plat);
+	writel(reg, plat->regbase + CQSPI_REG_RD_INSTR);
+
+	return 0;
+}
+
 /* For commands: WRSR, WREN, WRDI, CHIP_ERASE, BE, etc. */
-int cadence_qspi_apb_command_write(void *reg_base, const struct spi_mem_op *op)
+int cadence_qspi_apb_command_write(struct cadence_spi_plat *plat,
+				   const struct spi_mem_op *op)
 {
 	unsigned int reg = 0;
 	unsigned int wr_data;
 	unsigned int wr_len;
 	unsigned int txlen = op->data.nbytes;
 	const void *txbuf = op->data.buf.out;
+	void *reg_base = plat->regbase;
 	u32 addr;
+	u8 opcode;
 
 	/* Reorder address to SPI bus order if only transferring address */
 	if (!txlen) {
@@ -499,7 +691,12 @@
 		return -EINVAL;
 	}
 
-	reg |= op->cmd.opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+	if (plat->dtr)
+		opcode = op->cmd.opcode >> 8;
+	else
+		opcode = op->cmd.opcode;
+
+	reg |= opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
 
 	if (txlen) {
 		/* writing data = yes */
@@ -533,29 +730,39 @@
 	unsigned int rd_reg;
 	unsigned int dummy_clk;
 	unsigned int dummy_bytes = op->dummy.nbytes;
+	int ret;
+	u8 opcode;
+
+	ret = cadence_qspi_set_protocol(plat, op);
+	if (ret)
+		return ret;
+
+	ret = cadence_qspi_enable_dtr(plat, op, CQSPI_REG_OP_EXT_READ_LSB,
+				      plat->dtr);
+	if (ret)
+		return ret;
 
 	/* Setup the indirect trigger address */
 	writel(plat->trigger_address,
 	       plat->regbase + CQSPI_REG_INDIRECTTRIGGER);
 
 	/* Configure the opcode */
-	rd_reg = op->cmd.opcode << CQSPI_REG_RD_INSTR_OPCODE_LSB;
+	if (plat->dtr)
+		opcode = op->cmd.opcode >> 8;
+	else
+		opcode = op->cmd.opcode;
 
-	if (op->data.buswidth == 8)
-		/* Instruction and address at DQ0, data at DQ0-7. */
-		rd_reg |= CQSPI_INST_TYPE_OCTAL << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
-	else if (op->data.buswidth == 4)
-		/* Instruction and address at DQ0, data at DQ0-3. */
-		rd_reg |= CQSPI_INST_TYPE_QUAD << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
+	rd_reg = opcode << CQSPI_REG_RD_INSTR_OPCODE_LSB;
+	rd_reg |= cadence_qspi_calc_rdreg(plat);
 
 	writel(op->addr.val, plat->regbase + CQSPI_REG_INDIRECTRDSTARTADDR);
 
 	if (dummy_bytes) {
-		if (dummy_bytes > CQSPI_DUMMY_BYTES_MAX)
-			dummy_bytes = CQSPI_DUMMY_BYTES_MAX;
-
 		/* Convert to clock cycles. */
-		dummy_clk = dummy_bytes * CQSPI_DUMMY_CLKS_PER_BYTE;
+		dummy_clk = cadence_qspi_calc_dummy(op, plat->dtr);
+
+		if (dummy_clk > CQSPI_DUMMY_CLKS_MAX)
+			return -ENOTSUPP;
 
 		if (dummy_clk)
 			rd_reg |= (dummy_clk & CQSPI_REG_RD_INSTR_DUMMY_MASK)
@@ -682,17 +889,52 @@
 				 const struct spi_mem_op *op)
 {
 	unsigned int reg;
+	int ret;
+	u8 opcode;
+
+	ret = cadence_qspi_set_protocol(plat, op);
+	if (ret)
+		return ret;
+
+	ret = cadence_qspi_enable_dtr(plat, op, CQSPI_REG_OP_EXT_WRITE_LSB,
+				      plat->dtr);
+	if (ret)
+		return ret;
 
 	/* Setup the indirect trigger address */
 	writel(plat->trigger_address,
 	       plat->regbase + CQSPI_REG_INDIRECTTRIGGER);
 
 	/* Configure the opcode */
-	reg = op->cmd.opcode << CQSPI_REG_WR_INSTR_OPCODE_LSB;
+	if (plat->dtr)
+		opcode = op->cmd.opcode >> 8;
+	else
+		opcode = op->cmd.opcode;
+
+	reg = opcode << CQSPI_REG_WR_INSTR_OPCODE_LSB;
+	reg |= plat->data_width << CQSPI_REG_WR_INSTR_TYPE_DATA_LSB;
+	reg |= plat->addr_width << CQSPI_REG_WR_INSTR_TYPE_ADDR_LSB;
 	writel(reg, plat->regbase + CQSPI_REG_WR_INSTR);
 
+	reg = cadence_qspi_calc_rdreg(plat);
+	writel(reg, plat->regbase + CQSPI_REG_RD_INSTR);
+
 	writel(op->addr.val, plat->regbase + CQSPI_REG_INDIRECTWRSTARTADDR);
 
+	if (plat->dtr) {
+		/*
+		 * Some flashes like the cypress Semper flash expect a 4-byte
+		 * dummy address with the Read SR command in DTR mode, but this
+		 * controller does not support sending address with the Read SR
+		 * command. So, disable write completion polling on the
+		 * controller's side. spi-nor will take care of polling the
+		 * status register.
+		 */
+		reg = readl(plat->regbase + CQSPI_REG_WR_COMPLETION_CTRL);
+		reg |= CQSPI_REG_WR_DISABLE_AUTO_POLL;
+		writel(reg, plat->regbase + CQSPI_REG_WR_COMPLETION_CTRL);
+	}
+
 	reg = readl(plat->regbase + CQSPI_REG_SIZE);
 	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
 	reg |= (op->addr.nbytes - 1);
@@ -730,6 +972,12 @@
 	writel(CQSPI_REG_INDIRECTWR_START,
 	       plat->regbase + CQSPI_REG_INDIRECTWR);
 
+	/*
+	 * Some delay is required for the above bit to be internally
+	 * synchronized by the QSPI module.
+	 */
+	ndelay(plat->wr_delay);
+
 	while (remaining > 0) {
 		write_bytes = remaining > page_size ? page_size : remaining;
 		writesl(plat->ahbbase, bb_txbuf, write_bytes >> 2);
@@ -781,7 +1029,15 @@
 	const void *buf = op->data.buf.out;
 	size_t len = op->data.nbytes;
 
-	if (plat->use_dac_mode && (to + len < plat->ahbsize)) {
+	/*
+	 * Some flashes like the Cypress Semper flash expect a dummy 4-byte
+	 * address (all 0s) with the read status register command in DTR mode.
+	 * But this controller does not support sending dummy address bytes to
+	 * the flash when it is polling the write completion register in DTR
+	 * mode. So, we can not use direct mode when in DTR mode for writing
+	 * data.
+	 */
+	if (!plat->dtr && plat->use_dac_mode && (to + len < plat->ahbsize)) {
 		memcpy_toio(plat->ahbbase + to, buf, len);
 		if (!cadence_qspi_wait_idle(plat->regbase))
 			return -EIO;
diff --git a/drivers/spi/mtk_snfi_spi.c b/drivers/spi/mtk_snfi_spi.c
index b6ab5fa..65d0ce0 100644
--- a/drivers/spi/mtk_snfi_spi.c
+++ b/drivers/spi/mtk_snfi_spi.c
@@ -64,8 +64,7 @@
 	 * or the output+input data must not exceed the GPRAM size.
 	 */
 
-	nbytes = sizeof(op->cmd.opcode) + op->addr.nbytes +
-		op->dummy.nbytes;
+	nbytes = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
 
 	if (nbytes + op->data.nbytes <= SNFI_GPRAM_SIZE)
 		return 0;
diff --git a/drivers/spi/spi-mem-nodm.c b/drivers/spi/spi-mem-nodm.c
index 765f05f..a228c80 100644
--- a/drivers/spi/spi-mem-nodm.c
+++ b/drivers/spi/spi-mem-nodm.c
@@ -27,7 +27,7 @@
 			tx_buf = op->data.buf.out;
 	}
 
-	op_len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes;
+	op_len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
 	op_buf = calloc(1, op_len);
 
 	ret = spi_claim_bus(slave);
@@ -89,7 +89,7 @@
 {
 	unsigned int len;
 
-	len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes;
+	len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
 	if (slave->max_write_size && len > slave->max_write_size)
 		return -EINVAL;
 
@@ -105,3 +105,65 @@
 
 	return 0;
 }
+
+static int spi_check_buswidth_req(struct spi_slave *slave, u8 buswidth, bool tx)
+{
+	u32 mode = slave->mode;
+
+	switch (buswidth) {
+	case 1:
+		return 0;
+
+	case 2:
+		if ((tx && (mode & (SPI_TX_DUAL | SPI_TX_QUAD))) ||
+		    (!tx && (mode & (SPI_RX_DUAL | SPI_RX_QUAD))))
+			return 0;
+
+		break;
+
+	case 4:
+		if ((tx && (mode & SPI_TX_QUAD)) ||
+		    (!tx && (mode & SPI_RX_QUAD)))
+			return 0;
+
+		break;
+	case 8:
+		if ((tx && (mode & SPI_TX_OCTAL)) ||
+		    (!tx && (mode & SPI_RX_OCTAL)))
+			return 0;
+
+		break;
+
+	default:
+		break;
+	}
+
+	return -ENOTSUPP;
+}
+
+bool spi_mem_supports_op(struct spi_slave *slave, const struct spi_mem_op *op)
+{
+	if (spi_check_buswidth_req(slave, op->cmd.buswidth, true))
+		return false;
+
+	if (op->addr.nbytes &&
+	    spi_check_buswidth_req(slave, op->addr.buswidth, true))
+		return false;
+
+	if (op->dummy.nbytes &&
+	    spi_check_buswidth_req(slave, op->dummy.buswidth, true))
+		return false;
+
+	if (op->data.nbytes &&
+	    spi_check_buswidth_req(slave, op->data.buswidth,
+				   op->data.dir == SPI_MEM_DATA_OUT))
+		return false;
+
+	if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr)
+		return false;
+
+	if (op->cmd.nbytes != 1)
+		return false;
+
+	return true;
+}
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index c095ae9..9c1ede1 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -145,8 +145,8 @@
 	return -ENOTSUPP;
 }
 
-bool spi_mem_default_supports_op(struct spi_slave *slave,
-				 const struct spi_mem_op *op)
+static bool spi_mem_check_buswidth(struct spi_slave *slave,
+				   const struct spi_mem_op *op)
 {
 	if (spi_check_buswidth_req(slave, op->cmd.buswidth, true))
 		return false;
@@ -166,6 +166,38 @@
 
 	return true;
 }
+
+bool spi_mem_dtr_supports_op(struct spi_slave *slave,
+			     const struct spi_mem_op *op)
+{
+	if (op->cmd.buswidth == 8 && op->cmd.nbytes % 2)
+		return false;
+
+	if (op->addr.nbytes && op->addr.buswidth == 8 && op->addr.nbytes % 2)
+		return false;
+
+	if (op->dummy.nbytes && op->dummy.buswidth == 8 && op->dummy.nbytes % 2)
+		return false;
+
+	if (op->data.dir != SPI_MEM_NO_DATA &&
+	    op->dummy.buswidth == 8 && op->data.nbytes % 2)
+		return false;
+
+	return spi_mem_check_buswidth(slave, op);
+}
+EXPORT_SYMBOL_GPL(spi_mem_dtr_supports_op);
+
+bool spi_mem_default_supports_op(struct spi_slave *slave,
+				 const struct spi_mem_op *op)
+{
+	if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr)
+		return false;
+
+	if (op->cmd.nbytes != 1)
+		return false;
+
+	return spi_mem_check_buswidth(slave, op);
+}
 EXPORT_SYMBOL_GPL(spi_mem_default_supports_op);
 
 /**
@@ -270,8 +302,7 @@
 	}
 
 #ifndef __UBOOT__
-	tmpbufsize = sizeof(op->cmd.opcode) + op->addr.nbytes +
-		     op->dummy.nbytes;
+	tmpbufsize = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
 
 	/*
 	 * Allocate a buffer to transmit the CMD, ADDR cycles with kmalloc() so
@@ -286,7 +317,7 @@
 
 	tmpbuf[0] = op->cmd.opcode;
 	xfers[xferpos].tx_buf = tmpbuf;
-	xfers[xferpos].len = sizeof(op->cmd.opcode);
+	xfers[xferpos].len = op->cmd.nbytes;
 	xfers[xferpos].tx_nbits = op->cmd.buswidth;
 	spi_message_add_tail(&xfers[xferpos], &msg);
 	xferpos++;
@@ -350,7 +381,7 @@
 			tx_buf = op->data.buf.out;
 	}
 
-	op_len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes;
+	op_len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
 
 	/*
 	 * Avoid using malloc() here so that we can use this code in SPL where
@@ -439,8 +470,7 @@
 	if (!ops->mem_ops || !ops->mem_ops->exec_op) {
 		unsigned int len;
 
-		len = sizeof(op->cmd.opcode) + op->addr.nbytes +
-			op->dummy.nbytes;
+		len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
 		if (slave->max_write_size && len > slave->max_write_size)
 			return -EINVAL;
 
diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c
new file mode 100644
index 0000000..6aae9f7
--- /dev/null
+++ b/drivers/spi/spi-mxic.c
@@ -0,0 +1,547 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Macronix International Co., Ltd.
+ *
+ * Authors:
+ *	zhengxunli <zhengxunli@mxic.com.tw>
+ */
+
+#include <common.h>
+#include <clk.h>
+#include <dm.h>
+#include <errno.h>
+#include <asm/io.h>
+#include <malloc.h>
+#include <spi.h>
+#include <spi-mem.h>
+#include <linux/bug.h>
+#include <linux/iopoll.h>
+
+#define HC_CFG			0x0
+#define HC_CFG_IF_CFG(x)	((x) << 27)
+#define HC_CFG_DUAL_SLAVE	BIT(31)
+#define HC_CFG_INDIVIDUAL	BIT(30)
+#define HC_CFG_NIO(x)		(((x) / 4) << 27)
+#define HC_CFG_TYPE(s, t)	((t) << (23 + ((s) * 2)))
+#define HC_CFG_TYPE_SPI_NOR	0
+#define HC_CFG_TYPE_SPI_NAND	1
+#define HC_CFG_TYPE_SPI_RAM	2
+#define HC_CFG_TYPE_RAW_NAND	3
+#define HC_CFG_SLV_ACT(x)	((x) << 21)
+#define HC_CFG_CLK_PH_EN	BIT(20)
+#define HC_CFG_CLK_POL_INV	BIT(19)
+#define HC_CFG_BIG_ENDIAN	BIT(18)
+#define HC_CFG_DATA_PASS	BIT(17)
+#define HC_CFG_IDLE_SIO_LVL(x)	((x) << 16)
+#define HC_CFG_MAN_START_EN	BIT(3)
+#define HC_CFG_MAN_START	BIT(2)
+#define HC_CFG_MAN_CS_EN	BIT(1)
+#define HC_CFG_MAN_CS_ASSERT	BIT(0)
+
+#define INT_STS			0x4
+#define INT_STS_EN		0x8
+#define INT_SIG_EN		0xc
+#define INT_STS_ALL		GENMASK(31, 0)
+#define INT_RDY_PIN		BIT(26)
+#define INT_RDY_SR		BIT(25)
+#define INT_LNR_SUSP		BIT(24)
+#define INT_ECC_ERR		BIT(17)
+#define INT_CRC_ERR		BIT(16)
+#define INT_LWR_DIS		BIT(12)
+#define INT_LRD_DIS		BIT(11)
+#define INT_SDMA_INT		BIT(10)
+#define INT_DMA_FINISH		BIT(9)
+#define INT_RX_NOT_FULL		BIT(3)
+#define INT_RX_NOT_EMPTY	BIT(2)
+#define INT_TX_NOT_FULL		BIT(1)
+#define INT_TX_EMPTY		BIT(0)
+
+#define HC_EN			0x10
+#define HC_EN_BIT		BIT(0)
+
+#define TXD(x)			(0x14 + ((x) * 4))
+#define RXD			0x24
+
+#define SS_CTRL(s)		(0x30 + ((s) * 4))
+#define LRD_CFG			0x44
+#define LWR_CFG			0x80
+#define RWW_CFG			0x70
+#define OP_READ			BIT(23)
+#define OP_DUMMY_CYC(x)		((x) << 17)
+#define OP_ADDR_BYTES(x)	((x) << 14)
+#define OP_CMD_BYTES(x)		(((x) - 1) << 13)
+#define OP_OCTA_CRC_EN		BIT(12)
+#define OP_DQS_EN		BIT(11)
+#define OP_ENHC_EN		BIT(10)
+#define OP_PREAMBLE_EN		BIT(9)
+#define OP_DATA_DDR		BIT(8)
+#define OP_DATA_BUSW(x)		((x) << 6)
+#define OP_ADDR_DDR		BIT(5)
+#define OP_ADDR_BUSW(x)		((x) << 3)
+#define OP_CMD_DDR		BIT(2)
+#define OP_CMD_BUSW(x)		(x)
+#define OP_BUSW_1		0
+#define OP_BUSW_2		1
+#define OP_BUSW_4		2
+#define OP_BUSW_8		3
+
+#define OCTA_CRC		0x38
+#define OCTA_CRC_IN_EN(s)	BIT(3 + ((s) * 16))
+#define OCTA_CRC_CHUNK(s, x)	((fls((x) / 32)) << (1 + ((s) * 16)))
+#define OCTA_CRC_OUT_EN(s)	BIT(0 + ((s) * 16))
+
+#define ONFI_DIN_CNT(s)		(0x3c + (s))
+
+#define LRD_CTRL		0x48
+#define RWW_CTRL		0x74
+#define LWR_CTRL		0x84
+#define LMODE_EN		BIT(31)
+#define LMODE_SLV_ACT(x)	((x) << 21)
+#define LMODE_CMD1(x)		((x) << 8)
+#define LMODE_CMD0(x)		(x)
+
+#define LRD_ADDR		0x4c
+#define LWR_ADDR		0x88
+#define LRD_RANGE		0x50
+#define LWR_RANGE		0x8c
+
+#define AXI_SLV_ADDR		0x54
+
+#define DMAC_RD_CFG		0x58
+#define DMAC_WR_CFG		0x94
+#define DMAC_CFG_PERIPH_EN	BIT(31)
+#define DMAC_CFG_ALLFLUSH_EN	BIT(30)
+#define DMAC_CFG_LASTFLUSH_EN	BIT(29)
+#define DMAC_CFG_QE(x)		(((x) + 1) << 16)
+#define DMAC_CFG_BURST_LEN(x)	(((x) + 1) << 12)
+#define DMAC_CFG_BURST_SZ(x)	((x) << 8)
+#define DMAC_CFG_DIR_READ	BIT(1)
+#define DMAC_CFG_START		BIT(0)
+
+#define DMAC_RD_CNT		0x5c
+#define DMAC_WR_CNT		0x98
+
+#define SDMA_ADDR		0x60
+
+#define DMAM_CFG		0x64
+#define DMAM_CFG_START		BIT(31)
+#define DMAM_CFG_CONT		BIT(30)
+#define DMAM_CFG_SDMA_GAP(x)	(fls((x) / 8192) << 2)
+#define DMAM_CFG_DIR_READ	BIT(1)
+#define DMAM_CFG_EN		BIT(0)
+
+#define DMAM_CNT		0x68
+
+#define LNR_TIMER_TH		0x6c
+
+#define RDM_CFG0		0x78
+#define RDM_CFG0_POLY(x)	(x)
+
+#define RDM_CFG1		0x7c
+#define RDM_CFG1_RDM_EN		BIT(31)
+#define RDM_CFG1_SEED(x)	(x)
+
+#define LWR_SUSP_CTRL		0x90
+#define LWR_SUSP_CTRL_EN	BIT(31)
+
+#define DMAS_CTRL		0x9c
+#define DMAS_CTRL_EN		BIT(31)
+#define DMAS_CTRL_DIR_READ	BIT(30)
+
+#define DATA_STROB		0xa0
+#define DATA_STROB_EDO_EN	BIT(2)
+#define DATA_STROB_INV_POL	BIT(1)
+#define DATA_STROB_DELAY_2CYC	BIT(0)
+
+#define IDLY_CODE(x)		(0xa4 + ((x) * 4))
+#define IDLY_CODE_VAL(x, v)	((v) << (((x) % 4) * 8))
+
+#define GPIO			0xc4
+#define GPIO_PT(x)		BIT(3 + ((x) * 16))
+#define GPIO_RESET(x)		BIT(2 + ((x) * 16))
+#define GPIO_HOLDB(x)		BIT(1 + ((x) * 16))
+#define GPIO_WPB(x)		BIT((x) * 16)
+
+#define HC_VER			0xd0
+
+#define HW_TEST(x)		(0xe0 + ((x) * 4))
+
+struct mxic_spi_priv {
+	struct clk *send_clk;
+	struct clk *send_dly_clk;
+	void __iomem *regs;
+	u32 cur_speed_hz;
+};
+
+static int mxic_spi_clk_enable(struct mxic_spi_priv *priv)
+{
+	int ret;
+
+	ret = clk_prepare_enable(priv->send_clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(priv->send_dly_clk);
+	if (ret)
+		goto err_send_dly_clk;
+
+	return ret;
+
+err_send_dly_clk:
+	clk_disable_unprepare(priv->send_clk);
+
+	return ret;
+}
+
+static void mxic_spi_clk_disable(struct mxic_spi_priv *priv)
+{
+	clk_disable_unprepare(priv->send_clk);
+	clk_disable_unprepare(priv->send_dly_clk);
+}
+
+static void mxic_spi_set_input_delay_dqs(struct mxic_spi_priv *priv,
+					 u8 idly_code)
+{
+	writel(IDLY_CODE_VAL(0, idly_code) |
+	       IDLY_CODE_VAL(1, idly_code) |
+	       IDLY_CODE_VAL(2, idly_code) |
+	       IDLY_CODE_VAL(3, idly_code),
+	       priv->regs + IDLY_CODE(0));
+	writel(IDLY_CODE_VAL(4, idly_code) |
+	       IDLY_CODE_VAL(5, idly_code) |
+	       IDLY_CODE_VAL(6, idly_code) |
+	       IDLY_CODE_VAL(7, idly_code),
+	       priv->regs + IDLY_CODE(1));
+}
+
+static int mxic_spi_clk_setup(struct mxic_spi_priv *priv, uint freq)
+{
+	int ret;
+
+	ret = clk_set_rate(priv->send_clk, freq);
+	if (ret)
+		return ret;
+
+	ret = clk_set_rate(priv->send_dly_clk, freq);
+	if (ret)
+		return ret;
+
+	/*
+	 * A constant delay range from 0x0 ~ 0x1F for input delay,
+	 * the unit is 78 ps, the max input delay is 2.418 ns.
+	 */
+	mxic_spi_set_input_delay_dqs(priv, 0xf);
+
+	return 0;
+}
+
+static int mxic_spi_set_speed(struct udevice *bus, uint freq)
+{
+	struct mxic_spi_priv *priv = dev_get_priv(bus);
+	int ret;
+
+	if (priv->cur_speed_hz == freq)
+		return 0;
+
+	mxic_spi_clk_disable(priv);
+	ret = mxic_spi_clk_setup(priv, freq);
+	if (ret)
+		return ret;
+
+	ret = mxic_spi_clk_enable(priv);
+	if (ret)
+		return ret;
+
+	priv->cur_speed_hz = freq;
+
+	return 0;
+}
+
+static int mxic_spi_set_mode(struct udevice *bus, uint mode)
+{
+	struct mxic_spi_priv *priv = dev_get_priv(bus);
+	u32 hc_config = 0;
+
+	if (mode & SPI_CPHA)
+		hc_config |= HC_CFG_CLK_PH_EN;
+	if (mode & SPI_CPOL)
+		hc_config |= HC_CFG_CLK_POL_INV;
+
+	writel(hc_config, priv->regs + HC_CFG);
+
+	return 0;
+}
+
+static void mxic_spi_hw_init(struct mxic_spi_priv *priv)
+{
+	writel(0, priv->regs + DATA_STROB);
+	writel(INT_STS_ALL, priv->regs + INT_STS_EN);
+	writel(0, priv->regs + HC_EN);
+	writel(0, priv->regs + LRD_CFG);
+	writel(0, priv->regs + LRD_CTRL);
+	writel(HC_CFG_NIO(1) | HC_CFG_TYPE(0, HC_CFG_TYPE_SPI_NOR) |
+	       HC_CFG_SLV_ACT(0) | HC_CFG_MAN_CS_EN | HC_CFG_IDLE_SIO_LVL(1),
+	       priv->regs + HC_CFG);
+}
+
+static int mxic_spi_data_xfer(struct mxic_spi_priv *priv, const void *txbuf,
+			      void *rxbuf, unsigned int len)
+{
+	unsigned int pos = 0;
+
+	while (pos < len) {
+		unsigned int nbytes = len - pos;
+		u32 data = 0xffffffff;
+		u32 sts;
+		int ret;
+
+		if (nbytes > 4)
+			nbytes = 4;
+
+		if (txbuf)
+			memcpy(&data, txbuf + pos, nbytes);
+
+		ret = readl_poll_timeout(priv->regs + INT_STS, sts,
+					 sts & INT_TX_EMPTY, 1000000);
+		if (ret)
+			return ret;
+
+		writel(data, priv->regs + TXD(nbytes % 4));
+
+		if (rxbuf) {
+			ret = readl_poll_timeout(priv->regs + INT_STS, sts,
+						 sts & INT_TX_EMPTY,
+						 1000000);
+			if (ret)
+				return ret;
+
+			ret = readl_poll_timeout(priv->regs + INT_STS, sts,
+						 sts & INT_RX_NOT_EMPTY,
+						 1000000);
+			if (ret)
+				return ret;
+
+			data = readl(priv->regs + RXD);
+			data >>= (8 * (4 - nbytes));
+			memcpy(rxbuf + pos, &data, nbytes);
+			WARN_ON(readl(priv->regs + INT_STS) & INT_RX_NOT_EMPTY);
+		} else {
+			readl(priv->regs + RXD);
+		}
+		WARN_ON(readl(priv->regs + INT_STS) & INT_RX_NOT_EMPTY);
+
+		pos += nbytes;
+	}
+
+	return 0;
+}
+
+static bool mxic_spi_mem_supports_op(struct spi_slave *slave,
+				     const struct spi_mem_op *op)
+{
+	if (op->data.buswidth > 8 || op->addr.buswidth > 8 ||
+	    op->dummy.buswidth > 8 || op->cmd.buswidth > 8)
+		return false;
+
+	if (op->addr.nbytes > 7)
+		return false;
+
+	return spi_mem_default_supports_op(slave, op);
+}
+
+static int mxic_spi_mem_exec_op(struct spi_slave *slave,
+				const struct spi_mem_op *op)
+{
+	struct dm_spi_slave_plat *slave_plat = dev_get_parent_plat(slave->dev);
+	struct udevice *bus = slave->dev->parent;
+	struct mxic_spi_priv *priv = dev_get_priv(bus);
+	int nio = 1, i, ret;
+	u32 ss_ctrl;
+	u8 addr[8], dummy_bytes = 0;
+
+	if (slave->mode & (SPI_TX_OCTAL | SPI_RX_OCTAL))
+		nio = 8;
+	else if (slave->mode & (SPI_TX_QUAD | SPI_RX_QUAD))
+		nio = 4;
+	else if (slave->mode & (SPI_TX_DUAL | SPI_RX_DUAL))
+		nio = 2;
+
+	writel(HC_CFG_NIO(nio) |
+	       HC_CFG_TYPE(slave_plat->cs, HC_CFG_TYPE_SPI_NOR) |
+	       HC_CFG_SLV_ACT(slave_plat->cs) | HC_CFG_IDLE_SIO_LVL(1) |
+	       HC_CFG_MAN_CS_EN,
+	       priv->regs + HC_CFG);
+	writel(HC_EN_BIT, priv->regs + HC_EN);
+
+	ss_ctrl = OP_CMD_BYTES(1) | OP_CMD_BUSW(fls(op->cmd.buswidth) - 1);
+
+	if (op->addr.nbytes)
+		ss_ctrl |= OP_ADDR_BYTES(op->addr.nbytes) |
+			   OP_ADDR_BUSW(fls(op->addr.buswidth) - 1);
+
+	/*
+	 * Since the SPI MXIC dummy buswidth is aligned with the data buswidth,
+	 * the dummy byte needs to be recalculated to send out the correct
+	 * dummy cycle.
+	 */
+	if (op->dummy.nbytes) {
+		dummy_bytes = op->dummy.nbytes /
+			      op->addr.buswidth *
+			      op->data.buswidth;
+		ss_ctrl |= OP_DUMMY_CYC(dummy_bytes);
+	}
+
+	if (op->data.nbytes) {
+		ss_ctrl |= OP_DATA_BUSW(fls(op->data.buswidth) - 1);
+		if (op->data.dir == SPI_MEM_DATA_IN)
+			ss_ctrl |= OP_READ;
+	}
+
+	writel(ss_ctrl, priv->regs + SS_CTRL(slave_plat->cs));
+
+	writel(readl(priv->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
+	       priv->regs + HC_CFG);
+
+	ret = mxic_spi_data_xfer(priv, &op->cmd.opcode, NULL, 1);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < op->addr.nbytes; i++)
+		addr[i] = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
+	ret = mxic_spi_data_xfer(priv, addr, NULL, op->addr.nbytes);
+	if (ret)
+		goto out;
+
+	ret = mxic_spi_data_xfer(priv, NULL, NULL, dummy_bytes);
+	if (ret)
+		goto out;
+
+	ret = mxic_spi_data_xfer(priv,
+				 op->data.dir == SPI_MEM_DATA_OUT ?
+				 op->data.buf.out : NULL,
+				 op->data.dir == SPI_MEM_DATA_IN ?
+				 op->data.buf.in : NULL,
+				 op->data.nbytes);
+
+out:
+	writel(readl(priv->regs + HC_CFG) & ~HC_CFG_MAN_CS_ASSERT,
+	       priv->regs + HC_CFG);
+	writel(0, priv->regs + HC_EN);
+
+	return ret;
+}
+
+static const struct spi_controller_mem_ops mxic_spi_mem_ops = {
+	.supports_op = mxic_spi_mem_supports_op,
+	.exec_op = mxic_spi_mem_exec_op,
+};
+
+static int mxic_spi_claim_bus(struct udevice *dev)
+{
+	struct udevice *bus = dev_get_parent(dev);
+	struct mxic_spi_priv *priv = dev_get_priv(bus);
+
+	writel(readl(priv->regs + HC_CFG) | HC_CFG_MAN_CS_EN,
+	       priv->regs + HC_CFG);
+	writel(HC_EN_BIT, priv->regs + HC_EN);
+	writel(readl(priv->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
+	       priv->regs + HC_CFG);
+
+	return 0;
+}
+
+static int mxic_spi_release_bus(struct udevice *dev)
+{
+	struct udevice *bus = dev_get_parent(dev);
+	struct mxic_spi_priv *priv = dev_get_priv(bus);
+
+	writel(readl(priv->regs + HC_CFG) & ~HC_CFG_MAN_CS_ASSERT,
+	       priv->regs + HC_CFG);
+	writel(0, priv->regs + HC_EN);
+
+	return 0;
+}
+
+static int mxic_spi_xfer(struct udevice *dev, unsigned int bitlen,
+			 const void *dout, void *din, unsigned long flags)
+{
+	struct udevice *bus = dev_get_parent(dev);
+	struct mxic_spi_priv *priv = dev_get_priv(bus);
+	struct spi_slave *slave = dev_get_parent_priv(dev);
+	unsigned int busw = OP_BUSW_1;
+	unsigned int len = bitlen / 8;
+	int ret;
+
+	if (dout && din) {
+		if (((slave->mode & SPI_TX_QUAD) &&
+		     !(slave->mode & SPI_RX_QUAD)) ||
+		    ((slave->mode & SPI_TX_DUAL) &&
+		     !(slave->mode & SPI_RX_DUAL)))
+			return -ENOTSUPP;
+	}
+
+	if (din) {
+		if (slave->mode & SPI_TX_QUAD)
+			busw = OP_BUSW_4;
+		else if (slave->mode & SPI_TX_DUAL)
+			busw = OP_BUSW_2;
+	} else if (dout) {
+		if (slave->mode & SPI_RX_QUAD)
+			busw = OP_BUSW_4;
+		else if (slave->mode & SPI_RX_DUAL)
+			busw = OP_BUSW_2;
+	}
+
+	writel(OP_CMD_BYTES(1) | OP_CMD_BUSW(busw) |
+	       OP_DATA_BUSW(busw) | (din ? OP_READ : 0),
+	       priv->regs + SS_CTRL(0));
+
+	ret = mxic_spi_data_xfer(priv, dout, din, len);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int mxic_spi_probe(struct udevice *bus)
+{
+	struct mxic_spi_priv *priv = dev_get_priv(bus);
+
+	priv->regs = (void *)dev_read_addr(bus);
+
+	priv->send_clk = devm_clk_get(bus, "send_clk");
+	if (IS_ERR(priv->send_clk))
+		return PTR_ERR(priv->send_clk);
+
+	priv->send_dly_clk = devm_clk_get(bus, "send_dly_clk");
+	if (IS_ERR(priv->send_dly_clk))
+		return PTR_ERR(priv->send_dly_clk);
+
+	mxic_spi_hw_init(priv);
+
+	return 0;
+}
+
+static const struct dm_spi_ops mxic_spi_ops = {
+	.claim_bus	= mxic_spi_claim_bus,
+	.release_bus	= mxic_spi_release_bus,
+	.xfer		= mxic_spi_xfer,
+	.set_speed	= mxic_spi_set_speed,
+	.set_mode	= mxic_spi_set_mode,
+	.mem_ops	= &mxic_spi_mem_ops,
+};
+
+static const struct udevice_id mxic_spi_ids[] = {
+	{ .compatible = "mxicy,mx25f0a-spi", },
+	{ }
+};
+
+U_BOOT_DRIVER(mxic_spi) = {
+	.name	= "mxic_spi",
+	.id	= UCLASS_SPI,
+	.of_match = mxic_spi_ids,
+	.ops	= &mxic_spi_ops,
+	.priv_auto = sizeof(struct mxic_spi_priv),
+	.probe	= mxic_spi_probe,
+};
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index c3e38e4..d68e48f 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -67,6 +67,8 @@
 #define SPINOR_OP_CLFSR		0x50	/* Clear flag status register */
 #define SPINOR_OP_RDEAR		0xc8	/* Read Extended Address Register */
 #define SPINOR_OP_WREAR		0xc5	/* Write Extended Address Register */
+#define SPINOR_OP_SRSTEN	0x66	/* Software Reset Enable */
+#define SPINOR_OP_SRST		0x99	/* Software Reset */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
 #define SPINOR_OP_READ_4B	0x13	/* Read data bytes (low frequency) */
@@ -124,6 +126,12 @@
 /* Used for Micron flashes only. */
 #define SPINOR_OP_RD_EVCR	0x65	/* Read EVCR register */
 #define SPINOR_OP_WD_EVCR	0x61	/* Write EVCR register */
+#define SPINOR_OP_MT_DTR_RD	0xfd	/* Fast Read opcode in DTR mode */
+#define SPINOR_OP_MT_RD_ANY_REG	0x85	/* Read volatile register */
+#define SPINOR_OP_MT_WR_ANY_REG	0x81	/* Write volatile register */
+#define SPINOR_REG_MT_CFR0V	0x00	/* For setting octal DTR mode */
+#define SPINOR_REG_MT_CFR1V	0x01	/* For setting dummy cycles */
+#define SPINOR_MT_OCT_DTR	0xe7	/* Enable Octal DTR with DQS. */
 
 /* Status Register bits. */
 #define SR_WIP			BIT(0)	/* Write in progress */
@@ -155,6 +163,19 @@
 /* Status Register 2 bits. */
 #define SR2_QUAD_EN_BIT7	BIT(7)
 
+/* For Cypress flash. */
+#define SPINOR_OP_RD_ANY_REG			0x65	/* Read any register */
+#define SPINOR_OP_WR_ANY_REG			0x71	/* Write any register */
+#define SPINOR_OP_S28_SE_4K			0x21
+#define SPINOR_REG_CYPRESS_CFR2V		0x00800003
+#define SPINOR_REG_CYPRESS_CFR2V_MEMLAT_11_24	0xb
+#define SPINOR_REG_CYPRESS_CFR3V		0x00800004
+#define SPINOR_REG_CYPRESS_CFR3V_PGSZ		BIT(4) /* Page size. */
+#define SPINOR_REG_CYPRESS_CFR3V_UNISECT	BIT(3) /* Uniform sector mode */
+#define SPINOR_REG_CYPRESS_CFR5V		0x00800006
+#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN	0x3
+#define SPINOR_OP_CYPRESS_RD_FAST		0xee
+
 /* Supported SPI protocols */
 #define SNOR_PROTO_INST_MASK	GENMASK(23, 16)
 #define SNOR_PROTO_INST_SHIFT	16
@@ -200,6 +221,7 @@
 	SNOR_PROTO_1_2_2_DTR = SNOR_PROTO_DTR(1, 2, 2),
 	SNOR_PROTO_1_4_4_DTR = SNOR_PROTO_DTR(1, 4, 4),
 	SNOR_PROTO_1_8_8_DTR = SNOR_PROTO_DTR(1, 8, 8),
+	SNOR_PROTO_8_8_8_DTR = SNOR_PROTO_DTR(8, 8, 8),
 };
 
 static inline bool spi_nor_protocol_is_dtr(enum spi_nor_protocol proto)
@@ -247,8 +269,176 @@
 	SNOR_F_READY_XSR_RDY	= BIT(4),
 	SNOR_F_USE_CLSR		= BIT(5),
 	SNOR_F_BROKEN_RESET	= BIT(6),
+	SNOR_F_SOFT_RESET	= BIT(7),
+};
+
+struct spi_nor;
+
+/**
+ * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
+ * supported by the SPI controller (bus master).
+ * @mask:		the bitmask listing all the supported hw capabilies
+ */
+struct spi_nor_hwcaps {
+	u32	mask;
+};
+
+/*
+ *(Fast) Read capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * As a matter of performances, it is relevant to use Octo SPI protocols first,
+ * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
+ * (Slow) Read.
+ */
+#define SNOR_HWCAPS_READ_MASK		GENMASK(15, 0)
+#define SNOR_HWCAPS_READ		BIT(0)
+#define SNOR_HWCAPS_READ_FAST		BIT(1)
+#define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
+
+#define SNOR_HWCAPS_READ_DUAL		GENMASK(6, 3)
+#define SNOR_HWCAPS_READ_1_1_2		BIT(3)
+#define SNOR_HWCAPS_READ_1_2_2		BIT(4)
+#define SNOR_HWCAPS_READ_2_2_2		BIT(5)
+#define SNOR_HWCAPS_READ_1_2_2_DTR	BIT(6)
+
+#define SNOR_HWCAPS_READ_QUAD		GENMASK(10, 7)
+#define SNOR_HWCAPS_READ_1_1_4		BIT(7)
+#define SNOR_HWCAPS_READ_1_4_4		BIT(8)
+#define SNOR_HWCAPS_READ_4_4_4		BIT(9)
+#define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
+
+#define SNOR_HWCPAS_READ_OCTO		GENMASK(15, 11)
+#define SNOR_HWCAPS_READ_1_1_8		BIT(11)
+#define SNOR_HWCAPS_READ_1_8_8		BIT(12)
+#define SNOR_HWCAPS_READ_8_8_8		BIT(13)
+#define SNOR_HWCAPS_READ_1_8_8_DTR	BIT(14)
+#define SNOR_HWCAPS_READ_8_8_8_DTR	BIT(15)
+
+/*
+ * Page Program capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the
+ * legacy SPI 1-1-1 protocol.
+ * Note that Dual Page Programs are not supported because there is no existing
+ * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
+ * implements such commands.
+ */
+#define SNOR_HWCAPS_PP_MASK		GENMASK(23, 16)
+#define SNOR_HWCAPS_PP			BIT(16)
+
+#define SNOR_HWCAPS_PP_QUAD		GENMASK(19, 17)
+#define SNOR_HWCAPS_PP_1_1_4		BIT(17)
+#define SNOR_HWCAPS_PP_1_4_4		BIT(18)
+#define SNOR_HWCAPS_PP_4_4_4		BIT(19)
+
+#define SNOR_HWCAPS_PP_OCTO		GENMASK(23, 20)
+#define SNOR_HWCAPS_PP_1_1_8		BIT(20)
+#define SNOR_HWCAPS_PP_1_8_8		BIT(21)
+#define SNOR_HWCAPS_PP_8_8_8		BIT(22)
+#define SNOR_HWCAPS_PP_8_8_8_DTR	BIT(23)
+
+#define SNOR_HWCAPS_X_X_X	(SNOR_HWCAPS_READ_2_2_2 |	\
+				 SNOR_HWCAPS_READ_4_4_4 |	\
+				 SNOR_HWCAPS_READ_8_8_8 |	\
+				 SNOR_HWCAPS_PP_4_4_4 |		\
+				 SNOR_HWCAPS_PP_8_8_8)
+
+#define SNOR_HWCAPS_X_X_X_DTR	(SNOR_HWCAPS_READ_8_8_8_DTR |	\
+				 SNOR_HWCAPS_PP_8_8_8_DTR)
+
+#define SNOR_HWCAPS_DTR		(SNOR_HWCAPS_READ_1_1_1_DTR |	\
+				 SNOR_HWCAPS_READ_1_2_2_DTR |	\
+				 SNOR_HWCAPS_READ_1_4_4_DTR |	\
+				 SNOR_HWCAPS_READ_1_8_8_DTR)
+
+#define SNOR_HWCAPS_ALL		(SNOR_HWCAPS_READ_MASK |	\
+				 SNOR_HWCAPS_PP_MASK)
+
+struct spi_nor_read_command {
+	u8			num_mode_clocks;
+	u8			num_wait_states;
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+struct spi_nor_pp_command {
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+enum spi_nor_read_command_index {
+	SNOR_CMD_READ,
+	SNOR_CMD_READ_FAST,
+	SNOR_CMD_READ_1_1_1_DTR,
+
+	/* Dual SPI */
+	SNOR_CMD_READ_1_1_2,
+	SNOR_CMD_READ_1_2_2,
+	SNOR_CMD_READ_2_2_2,
+	SNOR_CMD_READ_1_2_2_DTR,
+
+	/* Quad SPI */
+	SNOR_CMD_READ_1_1_4,
+	SNOR_CMD_READ_1_4_4,
+	SNOR_CMD_READ_4_4_4,
+	SNOR_CMD_READ_1_4_4_DTR,
+
+	/* Octo SPI */
+	SNOR_CMD_READ_1_1_8,
+	SNOR_CMD_READ_1_8_8,
+	SNOR_CMD_READ_8_8_8,
+	SNOR_CMD_READ_1_8_8_DTR,
+	SNOR_CMD_READ_8_8_8_DTR,
+
+	SNOR_CMD_READ_MAX
+};
+
+enum spi_nor_pp_command_index {
+	SNOR_CMD_PP,
+
+	/* Quad SPI */
+	SNOR_CMD_PP_1_1_4,
+	SNOR_CMD_PP_1_4_4,
+	SNOR_CMD_PP_4_4_4,
+
+	/* Octo SPI */
+	SNOR_CMD_PP_1_1_8,
+	SNOR_CMD_PP_1_8_8,
+	SNOR_CMD_PP_8_8_8,
+	SNOR_CMD_PP_8_8_8_DTR,
+
+	SNOR_CMD_PP_MAX
 };
 
+struct spi_nor_flash_parameter {
+	u64				size;
+	u32				page_size;
+	u8				rdsr_dummy;
+	u8				rdsr_addr_nbytes;
+
+	struct spi_nor_hwcaps		hwcaps;
+	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
+	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
+
+	int (*quad_enable)(struct spi_nor *nor);
+};
+
+/**
+ * enum spi_nor_cmd_ext - describes the command opcode extension in DTR mode
+ * @SPI_MEM_NOR_NONE: no extension. This is the default, and is used in Legacy
+ *		      SPI mode
+ * @SPI_MEM_NOR_REPEAT: the extension is same as the opcode
+ * @SPI_MEM_NOR_INVERT: the extension is the bitwise inverse of the opcode
+ * @SPI_MEM_NOR_HEX: the extension is any hex value. The command and opcode
+ *		     combine to form a 16-bit opcode.
+ */
+enum spi_nor_cmd_ext {
+	SPI_NOR_EXT_NONE = 0,
+	SPI_NOR_EXT_REPEAT,
+	SPI_NOR_EXT_INVERT,
+	SPI_NOR_EXT_HEX,
+};
+
 /**
  * struct flash_info - Forward declaration of a structure used internally by
  *		       spi_nor_scan()
@@ -279,6 +469,9 @@
  * @read_opcode:	the read opcode
  * @read_dummy:		the dummy needed by the read operation
  * @program_opcode:	the program opcode
+ * @rdsr_dummy		dummy cycles needed for Read Status Register command.
+ * @rdsr_addr_nbytes:	dummy address bytes needed for Read Status Register
+ *			command.
  * @bank_read_cmd:	Bank read cmd
  * @bank_write_cmd:	Bank write cmd
  * @bank_curr:		Current flash bank
@@ -288,6 +481,8 @@
  * @write_proto:	the SPI protocol for write operations
  * @reg_proto		the SPI protocol for read_reg/write_reg/erase operations
  * @cmd_buf:		used by the write_reg
+ * @cmd_ext_type:	the command opcode extension for DTR mode.
+ * @fixups:		flash-specific fixup hooks.
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
  * @unprepare:		[OPTIONAL] do some post work after the
@@ -304,6 +499,7 @@
  * @flash_is_locked:	[FLASH-SPECIFIC] check if a region of the SPI NOR is
  *			completely locked
  * @quad_enable:	[FLASH-SPECIFIC] enables SPI NOR quad mode
+ * @octal_dtr_enable:	[FLASH-SPECIFIC] enables SPI NOR octal DTR mode.
  * @priv:		the private data
  */
 struct spi_nor {
@@ -318,6 +514,8 @@
 	u8			read_opcode;
 	u8			read_dummy;
 	u8			program_opcode;
+	u8			rdsr_dummy;
+	u8			rdsr_addr_nbytes;
 #ifdef CONFIG_SPI_FLASH_BAR
 	u8			bank_read_cmd;
 	u8			bank_write_cmd;
@@ -329,7 +527,11 @@
 	bool			sst_write_second;
 	u32			flags;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
+	enum spi_nor_cmd_ext	cmd_ext_type;
+	struct spi_nor_fixups	*fixups;
 
+	int (*setup)(struct spi_nor *nor, const struct flash_info *info,
+		     const struct spi_nor_flash_parameter *params);
 	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
 	void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
 	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
@@ -345,6 +547,7 @@
 	int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 	int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 	int (*quad_enable)(struct spi_nor *nor);
+	int (*octal_dtr_enable)(struct spi_nor *nor);
 
 	void *priv;
 /* Compatibility for spi_flash, remove once sf layer is merged with mtd */
@@ -369,67 +572,6 @@
 #endif /* __UBOOT__ */
 
 /**
- * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
- * supported by the SPI controller (bus master).
- * @mask:		the bitmask listing all the supported hw capabilies
- */
-struct spi_nor_hwcaps {
-	u32	mask;
-};
-
-/*
- *(Fast) Read capabilities.
- * MUST be ordered by priority: the higher bit position, the higher priority.
- * As a matter of performances, it is relevant to use Octo SPI protocols first,
- * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
- * (Slow) Read.
- */
-#define SNOR_HWCAPS_READ_MASK		GENMASK(14, 0)
-#define SNOR_HWCAPS_READ		BIT(0)
-#define SNOR_HWCAPS_READ_FAST		BIT(1)
-#define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
-
-#define SNOR_HWCAPS_READ_DUAL		GENMASK(6, 3)
-#define SNOR_HWCAPS_READ_1_1_2		BIT(3)
-#define SNOR_HWCAPS_READ_1_2_2		BIT(4)
-#define SNOR_HWCAPS_READ_2_2_2		BIT(5)
-#define SNOR_HWCAPS_READ_1_2_2_DTR	BIT(6)
-
-#define SNOR_HWCAPS_READ_QUAD		GENMASK(10, 7)
-#define SNOR_HWCAPS_READ_1_1_4		BIT(7)
-#define SNOR_HWCAPS_READ_1_4_4		BIT(8)
-#define SNOR_HWCAPS_READ_4_4_4		BIT(9)
-#define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
-
-#define SNOR_HWCPAS_READ_OCTO		GENMASK(14, 11)
-#define SNOR_HWCAPS_READ_1_1_8		BIT(11)
-#define SNOR_HWCAPS_READ_1_8_8		BIT(12)
-#define SNOR_HWCAPS_READ_8_8_8		BIT(13)
-#define SNOR_HWCAPS_READ_1_8_8_DTR	BIT(14)
-
-/*
- * Page Program capabilities.
- * MUST be ordered by priority: the higher bit position, the higher priority.
- * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the
- * legacy SPI 1-1-1 protocol.
- * Note that Dual Page Programs are not supported because there is no existing
- * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
- * implements such commands.
- */
-#define SNOR_HWCAPS_PP_MASK	GENMASK(22, 16)
-#define SNOR_HWCAPS_PP		BIT(16)
-
-#define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
-#define SNOR_HWCAPS_PP_1_1_4	BIT(17)
-#define SNOR_HWCAPS_PP_1_4_4	BIT(18)
-#define SNOR_HWCAPS_PP_4_4_4	BIT(19)
-
-#define SNOR_HWCAPS_PP_OCTO	GENMASK(22, 20)
-#define SNOR_HWCAPS_PP_1_1_8	BIT(20)
-#define SNOR_HWCAPS_PP_1_8_8	BIT(21)
-#define SNOR_HWCAPS_PP_8_8_8	BIT(22)
-
-/**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
  *
@@ -441,4 +583,19 @@
  */
 int spi_nor_scan(struct spi_nor *nor);
 
+#if CONFIG_IS_ENABLED(SPI_FLASH_TINY)
+static inline int spi_nor_remove(struct spi_nor *nor)
+{
+	return 0;
+}
+#else
+/**
+ * spi_nor_remove() - perform cleanup before booting to the next stage
+ * @nor:	the spi_nor structure
+ *
+ * Return: 0 for success, -errno for failure.
+ */
+int spi_nor_remove(struct spi_nor *nor);
+#endif
+
 #endif
diff --git a/include/spi-mem.h b/include/spi-mem.h
index e354c38..32ffdc2 100644
--- a/include/spi-mem.h
+++ b/include/spi-mem.h
@@ -17,6 +17,7 @@
 	{							\
 		.buswidth = __buswidth,				\
 		.opcode = __opcode,				\
+		.nbytes = 1,					\
 	}
 
 #define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth)		\
@@ -69,8 +70,11 @@
 
 /**
  * struct spi_mem_op - describes a SPI memory operation
+ * @cmd.nbytes: number of opcode bytes (only 1 or 2 are valid). The opcode is
+ *		sent MSB-first.
  * @cmd.buswidth: number of IO lines used to transmit the command
  * @cmd.opcode: operation opcode
+ * @cmd.dtr: whether the command opcode should be sent in DTR mode or not
  * @addr.nbytes: number of address bytes to send. Can be zero if the operation
  *		 does not need to send an address
  * @addr.buswidth: number of IO lines used to transmit the address cycles
@@ -78,33 +82,41 @@
  *	      Note that only @addr.nbytes are taken into account in this
  *	      address value, so users should make sure the value fits in the
  *	      assigned number of bytes.
+ * @addr.dtr: whether the address should be sent in DTR mode or not
  * @dummy.nbytes: number of dummy bytes to send after an opcode or address. Can
  *		  be zero if the operation does not require dummy bytes
  * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes
+ * @dummy.dtr: whether the dummy bytes should be sent in DTR mode or not
  * @data.buswidth: number of IO lanes used to send/receive the data
+ * @data.dtr: whether the data should be sent in DTR mode or not
  * @data.dir: direction of the transfer
  * @data.buf.in: input buffer
  * @data.buf.out: output buffer
  */
 struct spi_mem_op {
 	struct {
+		u8 nbytes;
 		u8 buswidth;
-		u8 opcode;
+		u8 dtr : 1;
+		u16 opcode;
 	} cmd;
 
 	struct {
 		u8 nbytes;
 		u8 buswidth;
+		u8 dtr : 1;
 		u64 val;
 	} addr;
 
 	struct {
 		u8 nbytes;
 		u8 buswidth;
+		u8 dtr : 1;
 	} dummy;
 
 	struct {
 		u8 buswidth;
+		u8 dtr : 1;
 		enum spi_mem_data_dir dir;
 		unsigned int nbytes;
 		/* buf.{in,out} must be DMA-able. */
@@ -237,6 +249,11 @@
 int spi_mem_adjust_op_size(struct spi_slave *slave, struct spi_mem_op *op);
 
 bool spi_mem_supports_op(struct spi_slave *slave, const struct spi_mem_op *op);
+bool spi_mem_dtr_supports_op(struct spi_slave *slave,
+			     const struct spi_mem_op *op);
+
+bool spi_mem_default_supports_op(struct spi_slave *slave,
+				 const struct spi_mem_op *op);
 
 int spi_mem_exec_op(struct spi_slave *slave, const struct spi_mem_op *op);