arm64: versal: Add versal specific cadence ospi driver

Add support for cadence ospi driver for Versal platform. This driver
provides support for DMA read operation which utilizes cadence qspi
driver.
If "cdns,is-dma" DT property is specified use dma for read operation
from cadence_qspi driver. As cadence_qspi_apb_dma_read() is defined in
cadence_ospi_versal driver add a weak function defination in
cadence_qspi driver.

Signed-off-by: T Karthik Reddy <t.karthik.reddy@xilinx.com>
Signed-off-by: Ashok Reddy Soma <ashok.reddy.soma@xilinx.com>
Link: https://lore.kernel.org/r/20220512100535.16364-3-ashok.reddy.soma@xilinx.com
Signed-off-by: Michal Simek <michal.simek@amd.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index bfa3bfb..4b74866 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -640,6 +640,7 @@
 F:	arch/arm/mach-versal/
 F:	drivers/net/xilinx_axi_mrmac.*
 F:	drivers/soc/soc_xilinx_versal.c
+F:	drivers/spi/cadence_ospi_versal.c
 F:	drivers/watchdog/xilinx_wwdt.c
 N:	(?<!uni)versal
 
diff --git a/configs/xilinx_versal_virt_defconfig b/configs/xilinx_versal_virt_defconfig
index cb10394..c50ea96 100644
--- a/configs/xilinx_versal_virt_defconfig
+++ b/configs/xilinx_versal_virt_defconfig
@@ -106,6 +106,8 @@
 CONFIG_SOC_XILINX_VERSAL=y
 CONFIG_SPI=y
 CONFIG_DM_SPI=y
+CONFIG_CADENCE_QSPI=y
+CONFIG_CADENCE_OSPI_VERSAL=y
 CONFIG_ZYNQ_SPI=y
 CONFIG_USB=y
 CONFIG_DM_USB_GADGET=y
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index e48d72d..766d563 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -136,6 +136,14 @@
 	int "Cadence QSPI reference clock value in Hz"
 	depends on HAS_CQSPI_REF_CLK
 
+config CADENCE_OSPI_VERSAL
+	bool "Configure Versal OSPI"
+	depends on ARCH_VERSAL && CADENCE_QSPI
+	imply DM_GPIO
+	help
+	  This option is used to enable Versal OSPI DMA operations which
+	  are used for ospi flash read using cadence qspi controller.
+
 config CF_SPI
         bool "ColdFire SPI driver"
         help
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 8755408..4de77c2 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -7,6 +7,7 @@
 ifdef CONFIG_$(SPL_TPL_)DM_SPI
 obj-y += spi-uclass.o
 obj-$(CONFIG_CADENCE_QSPI) += cadence_qspi.o cadence_qspi_apb.o
+obj-$(CONFIG_CADENCE_OSPI_VERSAL) += cadence_ospi_versal.o
 obj-$(CONFIG_SANDBOX) += spi-emul-uclass.o
 obj-$(CONFIG_SOFT_SPI) += soft_spi.o
 obj-$(CONFIG_SPI_MEM) += spi-mem.o
diff --git a/drivers/spi/cadence_ospi_versal.c b/drivers/spi/cadence_ospi_versal.c
new file mode 100644
index 0000000..4b13beb
--- /dev/null
+++ b/drivers/spi/cadence_ospi_versal.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) Copyright 2018 Xilinx
+ *
+ * Cadence QSPI controller DMA operations
+ */
+
+#include <clk.h>
+#include <common.h>
+#include <memalign.h>
+#include <wait_bit.h>
+#include <asm/io.h>
+#include <asm/gpio.h>
+#include <asm/cache.h>
+#include <cpu_func.h>
+#include <zynqmp_firmware.h>
+#include <asm/arch/hardware.h>
+#include "cadence_qspi.h"
+#include <dt-bindings/power/xlnx-versal-power.h>
+
+#define CMD_4BYTE_READ  0x13
+#define CMD_4BYTE_FAST_READ  0x0C
+
+int cadence_qspi_apb_dma_read(struct cadence_spi_plat *plat,
+			      const struct spi_mem_op *op)
+{
+	u32 reg, ret, rx_rem, n_rx, bytes_to_dma, data;
+	u8 opcode, addr_bytes, *rxbuf, dummy_cycles;
+
+	n_rx = op->data.nbytes;
+	rxbuf = op->data.buf.in;
+	rx_rem = n_rx % 4;
+	bytes_to_dma = n_rx - rx_rem;
+
+	if (bytes_to_dma) {
+		reg = readl(plat->regbase + CQSPI_REG_CONFIG);
+		reg |= CQSPI_REG_CONFIG_ENBL_DMA;
+		writel(reg, plat->regbase + CQSPI_REG_CONFIG);
+
+		writel(bytes_to_dma, plat->regbase + CQSPI_REG_INDIRECTRDBYTES);
+
+		writel(CQSPI_DFLT_INDIR_TRIG_ADDR_RANGE,
+		       plat->regbase + CQSPI_REG_INDIR_TRIG_ADDR_RANGE);
+		writel(CQSPI_DFLT_DMA_PERIPH_CFG,
+		       plat->regbase + CQSPI_REG_DMA_PERIPH_CFG);
+		writel((unsigned long)rxbuf, plat->regbase +
+		       CQSPI_DMA_DST_ADDR_REG);
+		writel(plat->trigger_address, plat->regbase +
+		       CQSPI_DMA_SRC_RD_ADDR_REG);
+		writel(bytes_to_dma, plat->regbase +
+		       CQSPI_DMA_DST_SIZE_REG);
+		flush_dcache_range((unsigned long)rxbuf,
+				   (unsigned long)rxbuf + bytes_to_dma);
+		writel(CQSPI_DFLT_DST_CTRL_REG_VAL,
+		       plat->regbase + CQSPI_DMA_DST_CTRL_REG);
+
+		/* Start the indirect read transfer */
+		writel(CQSPI_REG_INDIRECTRD_START, plat->regbase +
+		       CQSPI_REG_INDIRECTRD);
+		/* Wait for dma to complete transfer */
+		ret = cadence_qspi_apb_wait_for_dma_cmplt(plat);
+		if (ret)
+			return ret;
+
+		/* Clear indirect completion status */
+		writel(CQSPI_REG_INDIRECTRD_DONE, plat->regbase +
+		       CQSPI_REG_INDIRECTRD);
+		rxbuf += bytes_to_dma;
+	}
+
+	if (rx_rem) {
+		reg = readl(plat->regbase + CQSPI_REG_CONFIG);
+		reg &= ~CQSPI_REG_CONFIG_ENBL_DMA;
+		writel(reg, plat->regbase + CQSPI_REG_CONFIG);
+
+		reg = readl(plat->regbase + CQSPI_REG_INDIRECTRDSTARTADDR);
+		reg += bytes_to_dma;
+		writel(reg, plat->regbase + CQSPI_REG_CMDADDRESS);
+
+		addr_bytes = readl(plat->regbase + CQSPI_REG_SIZE) &
+				   CQSPI_REG_SIZE_ADDRESS_MASK;
+
+		opcode = CMD_4BYTE_FAST_READ;
+		dummy_cycles = 8;
+		writel((dummy_cycles << CQSPI_REG_RD_INSTR_DUMMY_LSB) | opcode,
+		       plat->regbase + CQSPI_REG_RD_INSTR);
+
+		reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+		reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB);
+		reg |= (addr_bytes & CQSPI_REG_CMDCTRL_ADD_BYTES_MASK) <<
+			CQSPI_REG_CMDCTRL_ADD_BYTES_LSB;
+		reg |= (0x1 << CQSPI_REG_CMDCTRL_ADDR_EN_LSB);
+		dummy_cycles = (readl(plat->regbase + CQSPI_REG_RD_INSTR) >>
+				CQSPI_REG_RD_INSTR_DUMMY_LSB) &
+				CQSPI_REG_RD_INSTR_DUMMY_MASK;
+		reg |= (dummy_cycles & CQSPI_REG_CMDCTRL_DUMMY_MASK) <<
+			CQSPI_REG_CMDCTRL_DUMMY_LSB;
+		reg |= (((rx_rem - 1) & CQSPI_REG_CMDCTRL_RD_BYTES_MASK) <<
+			CQSPI_REG_CMDCTRL_RD_BYTES_LSB);
+		ret = cadence_qspi_apb_exec_flash_cmd(plat->regbase, reg);
+		if (ret)
+			return ret;
+
+		data = readl(plat->regbase + CQSPI_REG_CMDREADDATALOWER);
+		memcpy(rxbuf, &data, rx_rem);
+	}
+
+	return 0;
+}
+
+int cadence_qspi_apb_wait_for_dma_cmplt(struct cadence_spi_plat *plat)
+{
+	u32 timeout = CQSPI_DMA_TIMEOUT;
+
+	while (!(readl(plat->regbase + CQSPI_DMA_DST_I_STS_REG) &
+		 CQSPI_DMA_DST_I_STS_DONE) && timeout--)
+		udelay(1);
+
+	if (!timeout) {
+		printf("DMA timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	writel(readl(plat->regbase + CQSPI_DMA_DST_I_STS_REG),
+	       plat->regbase + CQSPI_DMA_DST_I_STS_REG);
+	return 0;
+}
diff --git a/drivers/spi/cadence_qspi.c b/drivers/spi/cadence_qspi.c
index 7209bb4..f1d3050 100644
--- a/drivers/spi/cadence_qspi.c
+++ b/drivers/spi/cadence_qspi.c
@@ -27,6 +27,12 @@
 #define CQSPI_READ			2
 #define CQSPI_WRITE			3
 
+__weak int cadence_qspi_apb_dma_read(struct cadence_spi_plat *plat,
+				     const struct spi_mem_op *op)
+{
+	return 0;
+}
+
 static int cadence_spi_write_speed(struct udevice *bus, uint hz)
 {
 	struct cadence_spi_plat *plat = dev_get_plat(bus);
@@ -288,8 +294,12 @@
 		break;
 	case CQSPI_READ:
 		err = cadence_qspi_apb_read_setup(plat, op);
-		if (!err)
-			err = cadence_qspi_apb_read_execute(plat, op);
+		if (!err) {
+			if (plat->is_dma)
+				err = cadence_qspi_apb_dma_read(plat, op);
+			else
+				err = cadence_qspi_apb_read_execute(plat, op);
+		}
 		break;
 	case CQSPI_WRITE:
 		err = cadence_qspi_apb_write_setup(plat, op);
@@ -342,6 +352,8 @@
 	if (plat->ahbsize >= SZ_8M)
 		plat->use_dac_mode = true;
 
+	plat->is_dma = dev_read_bool(bus, "cdns,is-dma");
+
 	/* All other paramters are embedded in the child node */
 	subnode = dev_read_first_subnode(bus);
 	if (!ofnode_valid(subnode)) {
diff --git a/drivers/spi/cadence_qspi.h b/drivers/spi/cadence_qspi.h
index 4c21481..a201ed7 100644
--- a/drivers/spi/cadence_qspi.h
+++ b/drivers/spi/cadence_qspi.h
@@ -224,6 +224,7 @@
 	u8		addr_width;
 	u8		data_width;
 	bool		dtr;
+	bool            is_dma;
 };
 
 struct cadence_spi_priv {
@@ -278,5 +279,9 @@
 void cadence_qspi_apb_readdata_capture(void *reg_base,
 	unsigned int bypass, unsigned int delay);
 unsigned int cm_get_qspi_controller_clk_hz(void);
+int cadence_qspi_apb_dma_read(struct cadence_spi_plat *plat,
+			      const struct spi_mem_op *op);
+int cadence_qspi_apb_wait_for_dma_cmplt(struct cadence_spi_plat *plat);
+int cadence_qspi_apb_exec_flash_cmd(void *reg_base, unsigned int reg);
 
 #endif /* __CADENCE_QSPI_H__ */
diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
index ac8b1be..b11bd2d 100644
--- a/drivers/spi/cadence_qspi_apb.c
+++ b/drivers/spi/cadence_qspi_apb.c
@@ -336,8 +336,7 @@
 	cadence_qspi_apb_controller_enable(plat->regbase);
 }
 
-static int cadence_qspi_apb_exec_flash_cmd(void *reg_base,
-	unsigned int reg)
+int cadence_qspi_apb_exec_flash_cmd(void *reg_base, unsigned int reg)
 {
 	unsigned int retry = CQSPI_REG_RETRY;