spi: cadence_qspi: support FIFO width other than 4 bytes

This patch makes the code compatible with FIFO depths other than 4
bytes. It also simplify read/write FIFO loops.

Signed-off-by: Vikas Manocha <vikas.manocha@st.com>
Tested-by: Stefan Roese <sr@denx.de>
Reviewed-by: Jagannadh Teki <jteki@openedev.com>
diff --git a/drivers/spi/cadence_qspi_apb.c b/drivers/spi/cadence_qspi_apb.c
index cbf0d42..d053407 100644
--- a/drivers/spi/cadence_qspi_apb.c
+++ b/drivers/spi/cadence_qspi_apb.c
@@ -200,18 +200,16 @@
 	unsigned int *dest_ptr = (unsigned int *)dest;
 	unsigned int *src_ptr = (unsigned int *)src_ahb_addr;
 
-	while (remaining > 0) {
-		if (remaining >= CQSPI_FIFO_WIDTH) {
-			*dest_ptr = readl(src_ptr);
-			remaining -= CQSPI_FIFO_WIDTH;
-		} else {
-			/* dangling bytes */
-			temp = readl(src_ptr);
-			memcpy(dest_ptr, &temp, remaining);
-			break;
-		}
+	while (remaining >= sizeof(dest_ptr)) {
+		*dest_ptr = readl(src_ptr);
+		remaining -= sizeof(src_ptr);
 		dest_ptr++;
 	}
+	if (remaining) {
+		/* dangling bytes */
+		temp = readl(src_ptr);
+		memcpy(dest_ptr, &temp, remaining);
+	}
 
 	return;
 }
@@ -219,24 +217,26 @@
 static void cadence_qspi_apb_write_fifo_data(const void *dest_ahb_addr,
 	const void *src, unsigned int bytes)
 {
-	unsigned int temp;
+	unsigned int temp = 0;
+	int i;
 	int remaining = bytes;
 	unsigned int *dest_ptr = (unsigned int *)dest_ahb_addr;
 	unsigned int *src_ptr = (unsigned int *)src;
 
-	while (remaining > 0) {
-		if (remaining >= CQSPI_FIFO_WIDTH) {
-			writel(*src_ptr, dest_ptr);
-			remaining -= sizeof(unsigned int);
-		} else {
-			/* dangling bytes */
-			memcpy(&temp, src_ptr, remaining);
-			writel(temp, dest_ptr);
-			break;
-		}
-		src_ptr++;
+	while (remaining >= CQSPI_FIFO_WIDTH) {
+		for (i = CQSPI_FIFO_WIDTH/sizeof(src_ptr) - 1; i >= 0; i--)
+			writel(*(src_ptr+i), dest_ptr+i);
+		src_ptr += CQSPI_FIFO_WIDTH/sizeof(src_ptr);
+		remaining -= CQSPI_FIFO_WIDTH;
 	}
-
+	if (remaining) {
+		/* dangling bytes */
+		i = remaining/sizeof(dest_ptr);
+		memcpy(&temp, src_ptr+i, remaining % sizeof(dest_ptr));
+		writel(temp, dest_ptr+i);
+		for (--i; i >= 0; i--)
+			writel(*(src_ptr+i), dest_ptr+i);
+	}
 	return;
 }