Blackfin: bfin_spi: add optional DMA support

This moves the last piece from the old spi_flash driver to the new SPI
framework -- optional DMA RX support.  This typically cuts speeds by ~40%
at the cost of additional ~300 bytes.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
new file mode 100644
index 0000000..21ff1cf
--- /dev/null
+++ b/arch/blackfin/include/asm/dma.h
@@ -0,0 +1,75 @@
+/*
+ * dma.h - Blackfin DMA defines/structures/etc...
+ *
+ * Copyright 2004-2008 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _BLACKFIN_DMA_H_
+#define _BLACKFIN_DMA_H_
+
+#include <asm/mach-common/bits/dma.h>
+
+struct dmasg_large {
+	void *next_desc_addr;
+	unsigned long start_addr;
+	unsigned short cfg;
+	unsigned short x_count;
+	short x_modify;
+	unsigned short y_count;
+	short y_modify;
+} __attribute__((packed));
+
+struct dmasg {
+	unsigned long start_addr;
+	unsigned short cfg;
+	unsigned short x_count;
+	short x_modify;
+	unsigned short y_count;
+	short y_modify;
+} __attribute__((packed));
+
+struct dma_register {
+	void *next_desc_ptr;	/* DMA Next Descriptor Pointer register */
+	unsigned long start_addr;	/* DMA Start address  register */
+
+	unsigned short cfg;	/* DMA Configuration register */
+	unsigned short dummy1;	/* DMA Configuration register */
+
+	unsigned long reserved;
+
+	unsigned short x_count;	/* DMA x_count register */
+	unsigned short dummy2;
+
+	short x_modify;	/* DMA x_modify register */
+	unsigned short dummy3;
+
+	unsigned short y_count;	/* DMA y_count register */
+	unsigned short dummy4;
+
+	short y_modify;	/* DMA y_modify register */
+	unsigned short dummy5;
+
+	void *curr_desc_ptr;	/* DMA Current Descriptor Pointer
+					   register */
+	unsigned long curr_addr_ptr;	/* DMA Current Address Pointer
+						   register */
+	unsigned short irq_status;	/* DMA irq status register */
+	unsigned short dummy6;
+
+	unsigned short peripheral_map;	/* DMA peripheral map register */
+	unsigned short dummy7;
+
+	unsigned short curr_x_count;	/* DMA Current x-count register */
+	unsigned short dummy8;
+
+	unsigned long reserved2;
+
+	unsigned short curr_y_count;	/* DMA Current y-count register */
+	unsigned short dummy9;
+
+	unsigned long reserved3;
+
+};
+
+#endif
diff --git a/drivers/spi/bfin_spi.c b/drivers/spi/bfin_spi.c
index 556b97a..d7e1474 100644
--- a/drivers/spi/bfin_spi.c
+++ b/drivers/spi/bfin_spi.c
@@ -1,7 +1,7 @@
 /*
  * Driver for Blackfin On-Chip SPI device
  *
- * Copyright (c) 2005-2008 Analog Devices Inc.
+ * Copyright (c) 2005-2010 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -13,6 +13,7 @@
 #include <spi.h>
 
 #include <asm/blackfin.h>
+#include <asm/dma.h>
 #include <asm/gpio.h>
 #include <asm/portmux.h>
 #include <asm/mach-common/bits/spi.h>
@@ -237,10 +238,131 @@
 	SSYNC();
 }
 
+#ifdef __ADSPBF54x__
+# define SPI_DMA_BASE DMA4_NEXT_DESC_PTR
+#elif defined(__ADSPBF533__) || defined(__ADSPBF532__) || defined(__ADSPBF531__) || \
+      defined(__ADSPBF538__) || defined(__ADSPBF539__)
+# define SPI_DMA_BASE DMA5_NEXT_DESC_PTR
+#elif defined(__ADSPBF561__)
+# define SPI_DMA_BASE DMA2_4_NEXT_DESC_PTR
+#elif defined(__ADSPBF537__) || defined(__ADSPBF536__) || defined(__ADSPBF534__) || \
+      defined(__ADSPBF52x__) || defined(__ADSPBF51x__)
+# define SPI_DMA_BASE DMA7_NEXT_DESC_PTR
+#else
+# error "Please provide SPI DMA channel defines"
+#endif
+static volatile struct dma_register *dma = (void *)SPI_DMA_BASE;
+
 #ifndef CONFIG_BFIN_SPI_IDLE_VAL
 # define CONFIG_BFIN_SPI_IDLE_VAL 0xff
 #endif
 
+#ifdef CONFIG_BFIN_SPI_NO_DMA
+# define SPI_DMA 0
+#else
+# define SPI_DMA 1
+#endif
+
+static int spi_dma_xfer(struct bfin_spi_slave *bss, const u8 *tx, u8 *rx,
+			uint bytes)
+{
+	int ret = -1;
+	u16 ndsize, spi_config, dma_config;
+	struct dmasg dmasg[2];
+	const u8 *buf;
+
+	if (tx) {
+		debug("%s: doing half duplex TX\n", __func__);
+		buf = tx;
+		spi_config = TDBR_DMA;
+		dma_config = 0;
+	} else {
+		debug("%s: doing half duplex RX\n", __func__);
+		buf = rx;
+		spi_config = RDBR_DMA;
+		dma_config = WNR;
+	}
+
+	dmasg[0].start_addr = (unsigned long)buf;
+	dmasg[0].x_modify = 1;
+	dma_config |= WDSIZE_8 | DMAEN;
+	if (bytes <= 65536) {
+		blackfin_dcache_flush_invalidate_range(buf, buf + bytes);
+		ndsize = NDSIZE_5;
+		dmasg[0].cfg = NDSIZE_0 | dma_config | FLOW_STOP | DI_EN;
+		dmasg[0].x_count = bytes;
+	} else {
+		blackfin_dcache_flush_invalidate_range(buf, buf + 65536 - 1);
+		ndsize = NDSIZE_7;
+		dmasg[0].cfg = NDSIZE_5 | dma_config | FLOW_ARRAY | DMA2D;
+		dmasg[0].x_count = 0;	/* 2^16 */
+		dmasg[0].y_count = bytes >> 16;	/* count / 2^16 */
+		dmasg[0].y_modify = 1;
+		dmasg[1].start_addr = (unsigned long)(buf + (bytes & ~0xFFFF));
+		dmasg[1].cfg = NDSIZE_0 | dma_config | FLOW_STOP | DI_EN;
+		dmasg[1].x_count = bytes & 0xFFFF; /* count % 2^16 */
+		dmasg[1].x_modify = 1;
+	}
+
+	dma->cfg = 0;
+	dma->irq_status = DMA_DONE | DMA_ERR;
+	dma->curr_desc_ptr = dmasg;
+	write_SPI_CTL(bss, (bss->ctl & ~TDBR_CORE));
+	write_SPI_STAT(bss, -1);
+	SSYNC();
+
+	write_SPI_TDBR(bss, CONFIG_BFIN_SPI_IDLE_VAL);
+	dma->cfg = ndsize | FLOW_ARRAY | DMAEN;
+	write_SPI_CTL(bss, (bss->ctl & ~TDBR_CORE) | spi_config);
+	SSYNC();
+
+	/*
+	 * We already invalidated the first 64k,
+	 * now while we just wait invalidate the remaining part.
+	 * Its not likely that the DMA is going to overtake
+	 */
+	if (bytes > 65536)
+		blackfin_dcache_flush_invalidate_range(buf + 65536, buf + bytes);
+
+	while (!(dma->irq_status & DMA_DONE))
+		if (ctrlc())
+			goto done;
+
+	dma->cfg = 0;
+
+	ret = 0;
+ done:
+	write_SPI_CTL(bss, bss->ctl);
+	return ret;
+}
+
+static int spi_pio_xfer(struct bfin_spi_slave *bss, const u8 *tx, u8 *rx,
+			uint bytes)
+{
+	/* todo: take advantage of hardware fifos  */
+	while (bytes--) {
+		u8 value = (tx ? *tx++ : CONFIG_BFIN_SPI_IDLE_VAL);
+		debug("%s: tx:%x ", __func__, value);
+		write_SPI_TDBR(bss, value);
+		SSYNC();
+		while ((read_SPI_STAT(bss) & TXS))
+			if (ctrlc())
+				return -1;
+		while (!(read_SPI_STAT(bss) & SPIF))
+			if (ctrlc())
+				return -1;
+		while (!(read_SPI_STAT(bss) & RXS))
+			if (ctrlc())
+				return -1;
+		value = read_SPI_RDBR(bss);
+		if (rx)
+			*rx++ = value;
+		debug("rx:%x\n", value);
+	}
+
+	return 0;
+}
+
 int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
 		void *din, unsigned long flags)
 {
@@ -265,32 +387,11 @@
 	if (flags & SPI_XFER_BEGIN)
 		spi_cs_activate(slave);
 
-	/* todo: take advantage of hardware fifos and setup RX dma */
-	while (bytes--) {
-		u8 value = (tx ? *tx++ : CONFIG_BFIN_SPI_IDLE_VAL);
-		debug("%s: tx:%x ", __func__, value);
-		write_SPI_TDBR(bss, value);
-		SSYNC();
-		while ((read_SPI_STAT(bss) & TXS))
-			if (ctrlc()) {
-				ret = -1;
-				goto done;
-			}
-		while (!(read_SPI_STAT(bss) & SPIF))
-			if (ctrlc()) {
-				ret = -1;
-				goto done;
-			}
-		while (!(read_SPI_STAT(bss) & RXS))
-			if (ctrlc()) {
-				ret = -1;
-				goto done;
-			}
-		value = read_SPI_RDBR(bss);
-		if (rx)
-			*rx++ = value;
-		debug("rx:%x\n", value);
-	}
+	/* TX DMA doesn't work quite right */
+	if (SPI_DMA && bytes > 6 && (!tx /*|| !rx*/))
+		ret = spi_dma_xfer(bss, tx, rx, bytes);
+	else
+		ret = spi_pio_xfer(bss, tx, rx, bytes);
 
  done:
 	if (flags & SPI_XFER_END)