Merge branch 'master' of git://git.denx.de/u-boot-spi
diff --git a/arch/arm/cpu/armv7/am33xx/clock.c b/arch/arm/cpu/armv7/am33xx/clock.c
index ec7d468..595c951 100644
--- a/arch/arm/cpu/armv7/am33xx/clock.c
+++ b/arch/arm/cpu/armv7/am33xx/clock.c
@@ -144,6 +144,33 @@
 		wait_for_clk_enable(clkctrl_addr);
 }
 
+static inline void wait_for_clk_disable(u32 *clkctrl_addr)
+{
+	u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_FULLY_FUNCTIONAL;
+	u32 bound = LDELAY;
+
+	while ((idlest != MODULE_CLKCTRL_IDLEST_DISABLED)) {
+		clkctrl = readl(clkctrl_addr);
+		idlest = (clkctrl & MODULE_CLKCTRL_IDLEST_MASK) >>
+			  MODULE_CLKCTRL_IDLEST_SHIFT;
+		if (--bound == 0) {
+			printf("Clock disable failed for 0x%p idlest 0x%x\n",
+			       clkctrl_addr, clkctrl);
+			 return;
+		}
+	}
+}
+static inline void disable_clock_module(u32 *const clkctrl_addr,
+					u32 wait_for_disable)
+{
+	clrsetbits_le32(clkctrl_addr, MODULE_CLKCTRL_MODULEMODE_MASK,
+			MODULE_CLKCTRL_MODULEMODE_SW_DISABLE <<
+			MODULE_CLKCTRL_MODULEMODE_SHIFT);
+	debug("Disable clock module - %p\n", clkctrl_addr);
+	if (wait_for_disable)
+		wait_for_clk_disable(clkctrl_addr);
+}
+
 static inline void enable_clock_domain(u32 *const clkctrl_reg, u32 enable_mode)
 {
 	clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK,
@@ -151,6 +178,14 @@
 	debug("Enable clock domain - %p\n", clkctrl_reg);
 }
 
+static inline void disable_clock_domain(u32 *const clkctrl_reg)
+{
+	clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK,
+			CD_CLKCTRL_CLKTRCTRL_SW_SLEEP <<
+			CD_CLKCTRL_CLKTRCTRL_SHIFT);
+	debug("Disable clock domain - %p\n", clkctrl_reg);
+}
+
 void do_enable_clocks(u32 *const *clk_domains,
 		      u32 *const *clk_modules_explicit_en, u8 wait_for_enable)
 {
@@ -170,6 +205,23 @@
 	};
 }
 
+void do_disable_clocks(u32 *const *clk_domains,
+			u32 *const *clk_modules_disable,
+			u8 wait_for_disable)
+{
+	u32 i, max = 100;
+
+
+	/* Clock modules that need to be put in SW_DISABLE */
+	for (i = 0; (i < max) && clk_modules_disable[i]; i++)
+		disable_clock_module(clk_modules_disable[i],
+				     wait_for_disable);
+
+	/* Put the clock domains in SW_SLEEP mode */
+	for (i = 0; (i < max) && clk_domains[i]; i++)
+		disable_clock_domain(clk_domains[i]);
+}
+
 /*
  * Before scaling up the clocks we need to have the PMIC scale up the
  * voltages first.  This will be dependent on which PMIC is in use
diff --git a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c
index b639694..35c431e 100644
--- a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c
+++ b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c
@@ -135,3 +135,39 @@
 	/* For OPP100 the mac clock should be /5. */
 	writel(0x4, &cmdpll->clkselmacclk);
 }
+
+#ifdef CONFIG_TI_EDMA3
+void enable_edma3_clocks(void)
+{
+	u32 *const clk_domains_edma3[] = {
+		0
+	};
+
+	u32 *const clk_modules_explicit_en_edma3[] = {
+		&cmper->tpccclkctrl,
+		&cmper->tptc0clkctrl,
+		0
+	};
+
+	do_enable_clocks(clk_domains_edma3,
+			 clk_modules_explicit_en_edma3,
+			 1);
+}
+
+void disable_edma3_clocks(void)
+{
+	u32 *const clk_domains_edma3[] = {
+		0
+	};
+
+	u32 *const clk_modules_disable_edma3[] = {
+		&cmper->tpccclkctrl,
+		&cmper->tptc0clkctrl,
+		0
+	};
+
+	do_disable_clocks(clk_domains_edma3,
+			  clk_modules_disable_edma3,
+			  1);
+}
+#endif
diff --git a/arch/arm/cpu/armv7/omap-common/clocks-common.c b/arch/arm/cpu/armv7/omap-common/clocks-common.c
index c94a807..e28b795 100644
--- a/arch/arm/cpu/armv7/omap-common/clocks-common.c
+++ b/arch/arm/cpu/armv7/omap-common/clocks-common.c
@@ -648,6 +648,14 @@
 	debug("Enable clock domain - %x\n", clkctrl_reg);
 }
 
+static inline void disable_clock_domain(u32 const clkctrl_reg)
+{
+	clrsetbits_le32(clkctrl_reg, CD_CLKCTRL_CLKTRCTRL_MASK,
+			CD_CLKCTRL_CLKTRCTRL_SW_SLEEP <<
+			CD_CLKCTRL_CLKTRCTRL_SHIFT);
+	debug("Disable clock domain - %x\n", clkctrl_reg);
+}
+
 static inline void wait_for_clk_enable(u32 clkctrl_addr)
 {
 	u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_DISABLED;
@@ -677,6 +685,34 @@
 		wait_for_clk_enable(clkctrl_addr);
 }
 
+static inline void wait_for_clk_disable(u32 clkctrl_addr)
+{
+	u32 clkctrl, idlest = MODULE_CLKCTRL_IDLEST_FULLY_FUNCTIONAL;
+	u32 bound = LDELAY;
+
+	while ((idlest != MODULE_CLKCTRL_IDLEST_DISABLED)) {
+		clkctrl = readl(clkctrl_addr);
+		idlest = (clkctrl & MODULE_CLKCTRL_IDLEST_MASK) >>
+			 MODULE_CLKCTRL_IDLEST_SHIFT;
+		if (--bound == 0) {
+			printf("Clock disable failed for 0x%x idlest 0x%x\n",
+			       clkctrl_addr, clkctrl);
+			return;
+		}
+	}
+}
+
+static inline void disable_clock_module(u32 const clkctrl_addr,
+					u32 wait_for_disable)
+{
+	clrsetbits_le32(clkctrl_addr, MODULE_CLKCTRL_MODULEMODE_MASK,
+			MODULE_CLKCTRL_MODULEMODE_SW_DISABLE <<
+			MODULE_CLKCTRL_MODULEMODE_SHIFT);
+	debug("Disable clock module - %x\n", clkctrl_addr);
+	if (wait_for_disable)
+		wait_for_clk_disable(clkctrl_addr);
+}
+
 void freq_update_core(void)
 {
 	u32 freq_config1 = 0;
@@ -800,6 +836,23 @@
 	}
 }
 
+void do_disable_clocks(u32 const *clk_domains,
+			    u32 const *clk_modules_disable,
+			    u8 wait_for_disable)
+{
+	u32 i, max = 100;
+
+
+	/* Clock modules that need to be put in SW_DISABLE */
+	for (i = 0; (i < max) && clk_modules_disable[i]; i++)
+		disable_clock_module(clk_modules_disable[i],
+				     wait_for_disable);
+
+	/* Put the clock domains in SW_SLEEP mode */
+	for (i = 0; (i < max) && clk_domains[i]; i++)
+		disable_clock_domain(clk_domains[i]);
+}
+
 void prcm_init(void)
 {
 	switch (omap_hw_init_context()) {
diff --git a/arch/arm/cpu/armv7/omap5/hw_data.c b/arch/arm/cpu/armv7/omap5/hw_data.c
index 3a723ca..33f92b7e 100644
--- a/arch/arm/cpu/armv7/omap5/hw_data.c
+++ b/arch/arm/cpu/armv7/omap5/hw_data.c
@@ -565,6 +565,47 @@
 			 1);
 }
 
+#ifdef CONFIG_TI_EDMA3
+void enable_edma3_clocks(void)
+{
+	u32 const clk_domains_edma3[] = {
+		0
+	};
+
+	u32 const clk_modules_hw_auto_edma3[] = {
+		(*prcm)->cm_l3main1_tptc1_clkctrl,
+		(*prcm)->cm_l3main1_tptc2_clkctrl,
+		0
+	};
+
+	u32 const clk_modules_explicit_en_edma3[] = {
+		0
+	};
+
+	do_enable_clocks(clk_domains_edma3,
+			 clk_modules_hw_auto_edma3,
+			 clk_modules_explicit_en_edma3,
+			 1);
+}
+
+void disable_edma3_clocks(void)
+{
+	u32 const clk_domains_edma3[] = {
+		0
+	};
+
+	u32 const clk_modules_disable_edma3[] = {
+		(*prcm)->cm_l3main1_tptc1_clkctrl,
+		(*prcm)->cm_l3main1_tptc2_clkctrl,
+		0
+	};
+
+	do_disable_clocks(clk_domains_edma3,
+			  clk_modules_disable_edma3,
+			  1);
+}
+#endif
+
 const struct ctrl_ioregs ioregs_omap5430 = {
 	.ctrl_ddrch = DDR_IO_I_34OHM_SR_FASTEST_WD_DQ_NO_PULL_DQS_PULL_DOWN,
 	.ctrl_lpddr2ch = DDR_IO_I_34OHM_SR_FASTEST_WD_CK_CKE_NCS_CA_PULL_DOWN,
diff --git a/arch/arm/cpu/armv7/omap5/prcm-regs.c b/arch/arm/cpu/armv7/omap5/prcm-regs.c
index cd51fe7..d01ce88 100644
--- a/arch/arm/cpu/armv7/omap5/prcm-regs.c
+++ b/arch/arm/cpu/armv7/omap5/prcm-regs.c
@@ -989,4 +989,8 @@
 
 	.prm_abbldo_mpu_setup			= 0x4AE07DDC,
 	.prm_abbldo_mpu_ctrl			= 0x4AE07DE0,
+
+	/*l3main1 edma*/
+	.cm_l3main1_tptc1_clkctrl               = 0x4a008778,
+	.cm_l3main1_tptc2_clkctrl               = 0x4a008780,
 };
diff --git a/arch/arm/include/asm/arch-am33xx/clock.h b/arch/arm/include/asm/arch-am33xx/clock.h
index 4af6b57..a6d2419 100644
--- a/arch/arm/include/asm/arch-am33xx/clock.h
+++ b/arch/arm/include/asm/arch-am33xx/clock.h
@@ -112,5 +112,6 @@
 void prcm_init(void);
 void enable_basic_clocks(void);
 void do_enable_clocks(u32 *const *, u32 *const *, u8);
+void do_disable_clocks(u32 *const *, u32 *const *, u8);
 
 #endif
diff --git a/arch/arm/include/asm/arch-omap5/hardware.h b/arch/arm/include/asm/arch-omap5/hardware.h
index f7011b4..a5bd600 100644
--- a/arch/arm/include/asm/arch-omap5/hardware.h
+++ b/arch/arm/include/asm/arch-omap5/hardware.h
@@ -23,4 +23,9 @@
 /* GPMC Base address */
 #define GPMC_BASE			0x50000000
 
+/* EDMA3 Base address for DRA7XX and AM57XX */
+#if defined(CONFIG_DRA7XX) || defined(CONFIG_AM57XX)
+#define EDMA3_BASE			0x43300000
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/omap_common.h b/arch/arm/include/asm/omap_common.h
index 056affc..b67d4b6 100644
--- a/arch/arm/include/asm/omap_common.h
+++ b/arch/arm/include/asm/omap_common.h
@@ -349,6 +349,10 @@
 	/* IPU */
 	u32 cm_ipu_clkstctrl;
 	u32 cm_ipu_i2c5_clkctrl;
+
+	/*l3main1 edma*/
+	u32 cm_l3main1_tptc1_clkctrl;
+	u32 cm_l3main1_tptc2_clkctrl;
 };
 
 struct omap_sys_ctrl_regs {
@@ -575,6 +579,10 @@
 		      u32 const *clk_modules_explicit_en,
 		      u8 wait_for_enable);
 
+void do_disable_clocks(u32 const *clk_domains,
+		       u32 const *clk_modules_disable,
+		       u8 wait_for_disable);
+
 void setup_post_dividers(u32 const base,
 			const struct dpll_params *params);
 u32 omap_ddr_clk(void);
@@ -594,6 +602,9 @@
 
 void omap_smc1(u32 service, u32 val);
 
+void enable_edma3_clocks(void);
+void disable_edma3_clocks(void);
+
 /* ABB */
 #define OMAP_ABB_NOMINAL_OPP		0
 #define OMAP_ABB_FAST_OPP		1
diff --git a/arch/arm/include/asm/ti-common/ti-edma3.h b/arch/arm/include/asm/ti-common/ti-edma3.h
index 5adc1da..6a7a321 100644
--- a/arch/arm/include/asm/ti-common/ti-edma3.h
+++ b/arch/arm/include/asm/ti-common/ti-edma3.h
@@ -117,5 +117,7 @@
 void edma3_set_transfer_params(u32 base, int slot, int acnt,
 			       int bcnt, int ccnt, u16 bcnt_rld,
 			       enum edma3_sync_dimension sync_mode);
+void edma3_transfer(unsigned long edma3_base_addr, unsigned int
+		edma_slot_num, void *dst, void *src, size_t len);
 
 #endif
diff --git a/common/cmd_sf.c b/common/cmd_sf.c
index 3746e0d..ac7f5df 100644
--- a/common/cmd_sf.c
+++ b/common/cmd_sf.c
@@ -223,7 +223,7 @@
 
 	if (end - buf >= 200)
 		scale = (end - buf) / 100;
-	cmp_buf = malloc(flash->sector_size);
+	cmp_buf = memalign(ARCH_DMA_MINALIGN, flash->sector_size);
 	if (cmp_buf) {
 		ulong last_update = get_timer(0);
 
@@ -484,12 +484,12 @@
 	if (*argv[2] == 0 || *endp != 0)
 		return -1;
 
-	vbuf = malloc(len);
+	vbuf = memalign(ARCH_DMA_MINALIGN, len);
 	if (!vbuf) {
 		printf("Cannot allocate memory (%lu bytes)\n", len);
 		return 1;
 	}
-	buf = malloc(len);
+	buf = memalign(ARCH_DMA_MINALIGN, len);
 	if (!buf) {
 		free(vbuf);
 		printf("Cannot allocate memory (%lu bytes)\n", len);
diff --git a/common/env_sf.c b/common/env_sf.c
index e928f57..9409831 100644
--- a/common/env_sf.c
+++ b/common/env_sf.c
@@ -79,7 +79,7 @@
 	if (CONFIG_ENV_SECT_SIZE > CONFIG_ENV_SIZE) {
 		saved_size = CONFIG_ENV_SECT_SIZE - CONFIG_ENV_SIZE;
 		saved_offset = env_new_offset + CONFIG_ENV_SIZE;
-		saved_buffer = malloc(saved_size);
+		saved_buffer = memalign(ARCH_DMA_MINALIGN, saved_size);
 		if (!saved_buffer) {
 			ret = 1;
 			goto done;
@@ -142,9 +142,10 @@
 	env_t *tmp_env2 = NULL;
 	env_t *ep = NULL;
 
-	tmp_env1 = (env_t *)malloc(CONFIG_ENV_SIZE);
-	tmp_env2 = (env_t *)malloc(CONFIG_ENV_SIZE);
-
+	tmp_env1 = (env_t *)memalign(ARCH_DMA_MINALIGN,
+			CONFIG_ENV_SIZE);
+	tmp_env2 = (env_t *)memalign(ARCH_DMA_MINALIGN,
+			CONFIG_ENV_SIZE);
 	if (!tmp_env1 || !tmp_env2) {
 		set_default_env("!malloc() failed");
 		goto out;
@@ -295,7 +296,7 @@
 	int ret;
 	char *buf = NULL;
 
-	buf = (char *)malloc(CONFIG_ENV_SIZE);
+	buf = (char *)memalign(ARCH_DMA_MINALIGN, CONFIG_ENV_SIZE);
 	env_flash = spi_flash_probe(CONFIG_ENV_SPI_BUS, CONFIG_ENV_SPI_CS,
 			CONFIG_ENV_SPI_MAX_HZ, CONFIG_ENV_SPI_MODE);
 	if (!env_flash) {
diff --git a/drivers/dma/ti-edma3.c b/drivers/dma/ti-edma3.c
index 8184ded..d6a427f 100644
--- a/drivers/dma/ti-edma3.c
+++ b/drivers/dma/ti-edma3.c
@@ -382,3 +382,81 @@
 	/* Clear the channel map */
 	__raw_writel(0, base + EDMA3_QCHMAP(cfg->chnum));
 }
+
+void edma3_transfer(unsigned long edma3_base_addr, unsigned int
+		    edma_slot_num, void *dst, void *src, size_t len)
+{
+	struct edma3_slot_config        slot;
+	struct edma3_channel_config     edma_channel;
+	int                             b_cnt_value = 1;
+	int                             rem_bytes  = 0;
+	int                             a_cnt_value = len;
+	unsigned int                    addr = (unsigned int) (dst);
+	unsigned int                    max_acnt  = 0x7FFFU;
+
+	if (len > max_acnt) {
+		b_cnt_value = (len / max_acnt);
+		rem_bytes  = (len % max_acnt);
+		a_cnt_value = max_acnt;
+	}
+
+	slot.opt        = 0;
+	slot.src        = ((unsigned int) src);
+	slot.acnt       = a_cnt_value;
+	slot.bcnt       = b_cnt_value;
+	slot.ccnt       = 1;
+	slot.src_bidx   = a_cnt_value;
+	slot.dst_bidx   = a_cnt_value;
+	slot.src_cidx   = 0;
+	slot.dst_cidx   = 0;
+	slot.link       = EDMA3_PARSET_NULL_LINK;
+	slot.bcntrld    = 0;
+	slot.opt        = EDMA3_SLOPT_TRANS_COMP_INT_ENB |
+			  EDMA3_SLOPT_COMP_CODE(0) |
+			  EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC;
+
+	edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot);
+	edma_channel.slot = edma_slot_num;
+	edma_channel.chnum = 0;
+	edma_channel.complete_code = 0;
+	 /* set event trigger to dst update */
+	edma_channel.trigger_slot_word = EDMA3_TWORD(dst);
+
+	qedma3_start(edma3_base_addr, &edma_channel);
+	edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr);
+
+	while (edma3_check_for_transfer(edma3_base_addr, &edma_channel))
+		;
+	qedma3_stop(edma3_base_addr, &edma_channel);
+
+	if (rem_bytes != 0) {
+		slot.opt        = 0;
+		slot.src        =
+			(b_cnt_value * max_acnt) + ((unsigned int) src);
+		slot.acnt       = rem_bytes;
+		slot.bcnt       = 1;
+		slot.ccnt       = 1;
+		slot.src_bidx   = rem_bytes;
+		slot.dst_bidx   = rem_bytes;
+		slot.src_cidx   = 0;
+		slot.dst_cidx   = 0;
+		slot.link       = EDMA3_PARSET_NULL_LINK;
+		slot.bcntrld    = 0;
+		slot.opt        = EDMA3_SLOPT_TRANS_COMP_INT_ENB |
+				  EDMA3_SLOPT_COMP_CODE(0) |
+				  EDMA3_SLOPT_STATIC | EDMA3_SLOPT_AB_SYNC;
+		edma3_slot_configure(edma3_base_addr, edma_slot_num, &slot);
+		edma_channel.slot = edma_slot_num;
+		edma_channel.chnum = 0;
+		edma_channel.complete_code = 0;
+		/* set event trigger to dst update */
+		edma_channel.trigger_slot_word = EDMA3_TWORD(dst);
+
+		qedma3_start(edma3_base_addr, &edma_channel);
+		edma3_set_dest_addr(edma3_base_addr, edma_channel.slot, addr +
+				    (max_acnt * b_cnt_value));
+		while (edma3_check_for_transfer(edma3_base_addr, &edma_channel))
+			;
+		qedma3_stop(edma3_base_addr, &edma_channel);
+	}
+}
diff --git a/drivers/mtd/spi/Kconfig b/drivers/mtd/spi/Kconfig
index 8b730ff..3f7433c 100644
--- a/drivers/mtd/spi/Kconfig
+++ b/drivers/mtd/spi/Kconfig
@@ -86,6 +86,21 @@
 
 endif
 
+config SPI_FLASH_USE_4K_SECTORS
+	bool "Use small 4096 B erase sectors"
+	depends on SPI_FLASH
+	default y
+	help
+	  Many flash memories support erasing small (4096 B) sectors. Depending
+	  on the usage this feature may provide performance gain in comparison
+	  to erasing whole blocks (32/64 KiB).
+	  Changing a small part of the flash's contents is usually faster with
+	  small sectors. On the other hand erasing should be faster when using
+	  64 KiB block instead of 16 × 4 KiB sectors.
+
+	  Please note that some tools/drivers/filesystems may not work with
+	  4096 B erase size (e.g. UBIFS requires 15 KiB as a minimum).
+
 config SPI_FLASH_DATAFLASH
 	bool "AT45xxx DataFlash support"
 	depends on SPI_FLASH && DM_SPI_FLASH
diff --git a/drivers/mtd/spi/sf_internal.h b/drivers/mtd/spi/sf_internal.h
index 9fb5557..9c95d56 100644
--- a/drivers/mtd/spi/sf_internal.h
+++ b/drivers/mtd/spi/sf_internal.h
@@ -37,7 +37,11 @@
 
 /* sf param flags */
 enum {
+#ifdef CONFIG_SPI_FLASH_USE_4K_SECTORS
 	SECT_4K		= 1 << 0,
+#else
+	SECT_4K		= 0 << 0,
+#endif
 	SECT_32K	= 1 << 1,
 	E_FSR		= 1 << 2,
 	SST_BP		= 1 << 3,
diff --git a/drivers/mtd/spi/sf_ops.c b/drivers/mtd/spi/sf_ops.c
index 38592f5..900ec1f 100644
--- a/drivers/mtd/spi/sf_ops.c
+++ b/drivers/mtd/spi/sf_ops.c
@@ -14,6 +14,7 @@
 #include <spi.h>
 #include <spi_flash.h>
 #include <watchdog.h>
+#include <linux/compiler.h>
 
 #include "sf_internal.h"
 
@@ -378,6 +379,11 @@
 	return ret;
 }
 
+void __weak spi_flash_copy_mmap(void *data, void *offset, size_t len)
+{
+	memcpy(data, offset, len);
+}
+
 int spi_flash_cmd_read_ops(struct spi_flash *flash, u32 offset,
 		size_t len, void *data)
 {
@@ -394,7 +400,7 @@
 			return ret;
 		}
 		spi_xfer(flash->spi, 0, NULL, NULL, SPI_XFER_MMAP);
-		memcpy(data, flash->memory_map + offset, len);
+		spi_flash_copy_mmap(data, flash->memory_map + offset, len);
 		spi_xfer(flash->spi, 0, NULL, NULL, SPI_XFER_MMAP_END);
 		spi_release_bus(flash->spi);
 		return 0;
diff --git a/drivers/spi/ti_qspi.c b/drivers/spi/ti_qspi.c
index 3356c0f..bd63db8 100644
--- a/drivers/spi/ti_qspi.c
+++ b/drivers/spi/ti_qspi.c
@@ -13,6 +13,8 @@
 #include <spi.h>
 #include <asm/gpio.h>
 #include <asm/omap_gpio.h>
+#include <asm/omap_common.h>
+#include <asm/ti-common/ti-edma3.h>
 
 /* ti qpsi register bit masks */
 #define QSPI_TIMEOUT                    2000000
@@ -106,7 +108,6 @@
 	slave->memory_map = (void *)MMAP_START_ADDR_DRA;
 #else
 	slave->memory_map = (void *)MMAP_START_ADDR_AM43x;
-	slave->op_mode_rx = 8;
 #endif
 
 #ifdef CONFIG_QSPI_QUAD_SUPPORT
@@ -114,6 +115,7 @@
 			QSPI_SETUP0_NUM_D_BYTES_8_BITS |
 			QSPI_SETUP0_READ_QUAD | QSPI_CMD_WRITE |
 			QSPI_NUM_DUMMY_BITS);
+	slave->op_mode_rx = SPI_OPM_RX_QOF;
 #else
 	memval |= QSPI_CMD_READ | QSPI_SETUP0_NUM_A_BYTES |
 			QSPI_SETUP0_NUM_D_BYTES_NO_BITS |
@@ -347,3 +349,26 @@
 
 	return 0;
 }
+
+/* TODO: control from sf layer to here through dm-spi */
+#ifdef CONFIG_TI_EDMA3
+void spi_flash_copy_mmap(void *data, void *offset, size_t len)
+{
+	unsigned int			addr = (unsigned int) (data);
+	unsigned int			edma_slot_num = 1;
+
+	/* Invalidate the area, so no writeback into the RAM races with DMA */
+	invalidate_dcache_range(addr, addr + roundup(len, ARCH_DMA_MINALIGN));
+
+	/* enable edma3 clocks */
+	enable_edma3_clocks();
+
+	/* Call edma3 api to do actual DMA transfer	*/
+	edma3_transfer(EDMA3_BASE, edma_slot_num, data, offset, len);
+
+	/* disable edma3 clocks */
+	disable_edma3_clocks();
+
+	*((unsigned int *)offset) += len;
+}
+#endif
diff --git a/include/configs/dra7xx_evm.h b/include/configs/dra7xx_evm.h
index 7499447..6e32de8 100644
--- a/include/configs/dra7xx_evm.h
+++ b/include/configs/dra7xx_evm.h
@@ -166,6 +166,8 @@
 
 /* SPI SPL */
 #define CONFIG_SPL_SPI_SUPPORT
+#define CONFIG_SPL_DMA_SUPPORT
+#define CONFIG_TI_EDMA3
 #define CONFIG_SPL_SPI_LOAD
 #define CONFIG_SPL_SPI_FLASH_SUPPORT
 #define CONFIG_SYS_SPI_U_BOOT_OFFS     0x40000
diff --git a/include/spi.h b/include/spi.h
index 1836236..51fdfd6 100644
--- a/include/spi.h
+++ b/include/spi.h
@@ -272,6 +272,9 @@
 int  spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
 		void *din, unsigned long flags);
 
+/* Copy memory mapped data */
+void spi_flash_copy_mmap(void *data, void *offset, size_t len);
+
 /**
  * Determine if a SPI chipselect is valid.
  * This function is provided by the board if the low-level SPI driver