spi: cadence_qspi_apb: Use 32 bit indirect read transaction when possible

According to Section 11.15.4.9.1 Indirect Read Controller of K2G SoC
TRM SPRUHY8D[1], the external master is only permitted to issue 32-bit
data interface reads until the last word of an indirect transfer
So, make sure that QSPI indirect reads are 32 bit sized except for the
final read. If the rxbuf is unaligned then use bounce buffer, so that
readsl() can be used instead of readsb() to avoid non 32-bit accesses.

[1]www.ti.com/lit/ug/spruhy8d/spruhy8d.pdf

Signed-off-by: Vignesh R <vigneshr@ti.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Reviewed-by: Jagan Teki <jagan@openedev.com>
diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
index 5d5b6f0..e02f221 100644
--- a/drivers/spi/cadence_qspi_apb.c
+++ b/drivers/spi/cadence_qspi_apb.c
@@ -634,6 +634,8 @@
 {
 	unsigned int remaining = n_rx;
 	unsigned int bytes_to_read = 0;
+	struct bounce_buffer bb;
+	u8 *bb_rxbuf;
 	int ret;
 
 	writel(n_rx, plat->regbase + CQSPI_REG_INDIRECTRDBYTES);
@@ -642,6 +644,11 @@
 	writel(CQSPI_REG_INDIRECTRD_START,
 	       plat->regbase + CQSPI_REG_INDIRECTRD);
 
+	ret = bounce_buffer_start(&bb, (void *)rxbuf, n_rx, GEN_BB_WRITE);
+	if (ret)
+		return ret;
+	bb_rxbuf = bb.bounce_buffer;
+
 	while (remaining > 0) {
 		ret = cadence_qspi_wait_for_data(plat);
 		if (ret < 0) {
@@ -655,12 +662,13 @@
 			bytes_to_read *= CQSPI_FIFO_WIDTH;
 			bytes_to_read = bytes_to_read > remaining ?
 					remaining : bytes_to_read;
-			/* Handle non-4-byte aligned access to avoid data abort. */
-			if (((uintptr_t)rxbuf % 4) || (bytes_to_read % 4))
-				readsb(plat->ahbbase, rxbuf, bytes_to_read);
-			else
-				readsl(plat->ahbbase, rxbuf, bytes_to_read >> 2);
-			rxbuf += bytes_to_read;
+			readsl(plat->ahbbase, bb_rxbuf, bytes_to_read >> 2);
+			if (bytes_to_read % 4)
+				readsb(plat->ahbbase,
+				       bb_rxbuf + rounddown(bytes_to_read, 4),
+				       bytes_to_read % 4);
+
+			bb_rxbuf += bytes_to_read;
 			remaining -= bytes_to_read;
 			bytes_to_read = cadence_qspi_get_rd_sram_level(plat);
 		}
@@ -677,6 +685,7 @@
 	/* Clear indirect completion status */
 	writel(CQSPI_REG_INDIRECTRD_DONE,
 	       plat->regbase + CQSPI_REG_INDIRECTRD);
+	bounce_buffer_stop(&bb);
 
 	return 0;
 
@@ -684,6 +693,7 @@
 	/* Cancel the indirect read */
 	writel(CQSPI_REG_INDIRECTRD_CANCEL,
 	       plat->regbase + CQSPI_REG_INDIRECTRD);
+	bounce_buffer_stop(&bb);
 	return ret;
 }