usb:udc:samsung: Zero copy approach for data passed to Samsung's UDC driver
The Samsung's UDC driver is not anymore copying data from USB requests to
aligned internal buffers. Now it works directly in data allocated in the
upper layers like UMS, DFU, THOR.
This change is possible since those gadgets now must take care to allocate
buffers aligned to cache line (CONFIG_SYS_CACHELINE_SIZE).
This can be achieved by using DEFINE_CACHE_ALIGN_BUFFER() or
ALLOC_CACHE_ALIGN_BUFFER() macros. Those take care to allocate buffer
aligned to cache line in both starting address and its size.
Sometimes it is enough to just use memalign() with size being a
multiplication of cache line size.
Test condition
- test HW + measurement: Trats - Exynos4210 rev.1
- test HW Trats2 - Exynos4412 rev.1
400 MiB compressed rootfs image download with `thor 0 mmc 0`
Measurement:
Transmission speed: 27.04 MiB/s
Signed-off-by: Lukasz Majewski <l.majewski@samsung.com>
Cc: Marek Vasut <marex@denx.de>
diff --git a/drivers/usb/gadget/s3c_udc_otg.c b/drivers/usb/gadget/s3c_udc_otg.c
index ba17a04..63d4487 100644
--- a/drivers/usb/gadget/s3c_udc_otg.c
+++ b/drivers/usb/gadget/s3c_udc_otg.c
@@ -843,7 +843,7 @@
int s3c_udc_probe(struct s3c_plat_otg_data *pdata)
{
struct s3c_udc *dev = &memory;
- int retval = 0, i;
+ int retval = 0;
debug("%s: %p\n", __func__, pdata);
@@ -864,16 +864,15 @@
the_controller = dev;
- for (i = 0; i < S3C_MAX_ENDPOINTS+1; i++) {
- dev->dma_buf[i] = memalign(CONFIG_SYS_CACHELINE_SIZE,
- DMA_BUFFER_SIZE);
- dev->dma_addr[i] = (dma_addr_t) dev->dma_buf[i];
- invalidate_dcache_range((unsigned long) dev->dma_buf[i],
- (unsigned long) (dev->dma_buf[i]
- + DMA_BUFFER_SIZE));
+ usb_ctrl = memalign(CONFIG_SYS_CACHELINE_SIZE,
+ ROUND(sizeof(struct usb_ctrlrequest),
+ CONFIG_SYS_CACHELINE_SIZE));
+ if (!usb_ctrl) {
+ error("No memory available for UDC!\n");
+ return -ENOMEM;
}
- usb_ctrl = dev->dma_buf[0];
- usb_ctrl_dma_addr = dev->dma_addr[0];
+
+ usb_ctrl_dma_addr = (dma_addr_t) usb_ctrl;
udc_reinit(dev);
diff --git a/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c b/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c
index 7c7176b..06dfeed 100644
--- a/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c
+++ b/drivers/usb/gadget/s3c_udc_otg_xfer_dma.c
@@ -110,8 +110,7 @@
ctrl = readl(®->out_endp[ep_num].doepctl);
- writel(the_controller->dma_addr[ep_index(ep)+1],
- ®->out_endp[ep_num].doepdma);
+ writel((unsigned int) ep->dma_buf, ®->out_endp[ep_num].doepdma);
writel(DOEPT_SIZ_PKT_CNT(pktcnt) | DOEPT_SIZ_XFER_SIZE(length),
®->out_endp[ep_num].doeptsiz);
writel(DEPCTL_EPENA|DEPCTL_CNAK|ctrl, ®->out_endp[ep_num].doepctl);
@@ -134,7 +133,6 @@
u32 *buf, ctrl = 0;
u32 length, pktcnt;
u32 ep_num = ep_index(ep);
- u32 *p = the_controller->dma_buf[ep_index(ep)+1];
buf = req->req.buf + req->req.actual;
length = req->req.length - req->req.actual;
@@ -144,10 +142,10 @@
ep->len = length;
ep->dma_buf = buf;
- memcpy(p, ep->dma_buf, length);
- flush_dcache_range((unsigned long) p ,
- (unsigned long) p + DMA_BUFFER_SIZE);
+ flush_dcache_range((unsigned long) ep->dma_buf,
+ (unsigned long) ep->dma_buf +
+ ROUND(ep->len, CONFIG_SYS_CACHELINE_SIZE));
if (length == 0)
pktcnt = 1;
@@ -160,8 +158,7 @@
while (readl(®->grstctl) & TX_FIFO_FLUSH)
;
- writel(the_controller->dma_addr[ep_index(ep)+1],
- ®->in_endp[ep_num].diepdma);
+ writel((unsigned long) ep->dma_buf, ®->in_endp[ep_num].diepdma);
writel(DIEPT_SIZ_PKT_CNT(pktcnt) | DIEPT_SIZ_XFER_SIZE(length),
®->in_endp[ep_num].dieptsiz);
@@ -194,7 +191,6 @@
struct s3c_ep *ep = &dev->ep[ep_num];
struct s3c_request *req = NULL;
u32 ep_tsr = 0, xfer_size = 0, is_short = 0;
- u32 *p = the_controller->dma_buf[ep_index(ep)+1];
if (list_empty(&ep->queue)) {
debug_cond(DEBUG_OUT_EP != 0,
@@ -214,10 +210,23 @@
xfer_size = ep->len - xfer_size;
- invalidate_dcache_range((unsigned long) p,
- (unsigned long) p + DMA_BUFFER_SIZE);
-
- memcpy(ep->dma_buf, p, ep->len);
+ /*
+ * NOTE:
+ *
+ * Please be careful with proper buffer allocation for USB request,
+ * which needs to be aligned to CONFIG_SYS_CACHELINE_SIZE, not only
+ * with starting address, but also its size shall be a cache line
+ * multiplication.
+ *
+ * This will prevent from corruption of data allocated immediatelly
+ * before or after the buffer.
+ *
+ * For armv7, the cache_v7.c provides proper code to emit "ERROR"
+ * message to warn users.
+ */
+ invalidate_dcache_range((unsigned long) ep->dma_buf,
+ (unsigned long) ep->dma_buf +
+ ROUND(xfer_size, CONFIG_SYS_CACHELINE_SIZE));
req->req.actual += min(xfer_size, req->req.length - req->req.actual);
is_short = (xfer_size < ep->ep.maxpacket);
@@ -711,19 +720,14 @@
int s3c_fifo_read(struct s3c_ep *ep, u32 *cp, int max)
{
- u32 bytes;
-
- bytes = sizeof(struct usb_ctrlrequest);
-
- invalidate_dcache_range((unsigned long) ep->dev->dma_buf[ep_index(ep)],
- (unsigned long) ep->dev->dma_buf[ep_index(ep)]
- + DMA_BUFFER_SIZE);
+ invalidate_dcache_range((unsigned long)cp, (unsigned long)cp +
+ ROUND(max, CONFIG_SYS_CACHELINE_SIZE));
debug_cond(DEBUG_EP0 != 0,
- "%s: bytes=%d, ep_index=%d %p\n", __func__,
- bytes, ep_index(ep), ep->dev->dma_buf[ep_index(ep)]);
+ "%s: bytes=%d, ep_index=%d 0x%p\n", __func__,
+ max, ep_index(ep), cp);
- return bytes;
+ return max;
}
/**
@@ -855,14 +859,12 @@
return 1;
}
-u16 g_status;
-
int s3c_udc_get_status(struct s3c_udc *dev,
struct usb_ctrlrequest *crq)
{
u8 ep_num = crq->wIndex & 0x7F;
+ u16 g_status = 0;
u32 ep_ctrl;
- u32 *p = the_controller->dma_buf[1];
debug_cond(DEBUG_SETUP != 0,
"%s: *** USB_REQ_GET_STATUS\n", __func__);
@@ -900,12 +902,13 @@
return 1;
}
- memcpy(p, &g_status, sizeof(g_status));
+ memcpy(usb_ctrl, &g_status, sizeof(g_status));
- flush_dcache_range((unsigned long) p,
- (unsigned long) p + DMA_BUFFER_SIZE);
+ flush_dcache_range((unsigned long) usb_ctrl,
+ (unsigned long) usb_ctrl +
+ ROUND(sizeof(g_status), CONFIG_SYS_CACHELINE_SIZE));
- writel(the_controller->dma_addr[1], ®->in_endp[EP0_CON].diepdma);
+ writel(usb_ctrl_dma_addr, ®->in_endp[EP0_CON].diepdma);
writel(DIEPT_SIZ_PKT_CNT(1) | DIEPT_SIZ_XFER_SIZE(2),
®->in_endp[EP0_CON].dieptsiz);
diff --git a/include/usb/s3c_udc.h b/include/usb/s3c_udc.h
index 734c6cd..6dead2f 100644
--- a/include/usb/s3c_udc.h
+++ b/include/usb/s3c_udc.h
@@ -19,7 +19,7 @@
/*-------------------------------------------------------------------------*/
/* DMA bounce buffer size, 16K is enough even for mass storage */
-#define DMA_BUFFER_SIZE (4096*4)
+#define DMA_BUFFER_SIZE (16*SZ_1K)
#define EP0_FIFO_SIZE 64
#define EP_FIFO_SIZE 512
@@ -81,9 +81,6 @@
struct s3c_plat_otg_data *pdata;
- void *dma_buf[S3C_MAX_ENDPOINTS+1];
- dma_addr_t dma_addr[S3C_MAX_ENDPOINTS+1];
-
int ep0state;
struct s3c_ep ep[S3C_MAX_ENDPOINTS];