From patchwork Fri Jul 20 11:26:24 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Beno=C3=AEt_Th=C3=A9baudeau?= X-Patchwork-Id: 172204 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from theia.denx.de (theia.denx.de [85.214.87.163]) by ozlabs.org (Postfix) with ESMTP id 65A952C0391 for ; Fri, 20 Jul 2012 21:21:42 +1000 (EST) Received: from localhost (localhost [127.0.0.1]) by theia.denx.de (Postfix) with ESMTP id 5F363280EA; Fri, 20 Jul 2012 13:21:38 +0200 (CEST) X-Virus-Scanned: Debian amavisd-new at theia.denx.de Received: from theia.denx.de ([127.0.0.1]) by localhost (theia.denx.de [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id sZ-3tkkZSAUb; Fri, 20 Jul 2012 13:21:38 +0200 (CEST) Received: from theia.denx.de (localhost [127.0.0.1]) by theia.denx.de (Postfix) with ESMTP id 1BD2E280E4; Fri, 20 Jul 2012 13:21:37 +0200 (CEST) Received: from localhost (localhost [127.0.0.1]) by theia.denx.de (Postfix) with ESMTP id B6C4F280E4 for ; Fri, 20 Jul 2012 13:21:34 +0200 (CEST) X-Virus-Scanned: Debian amavisd-new at theia.denx.de Received: from theia.denx.de ([127.0.0.1]) by localhost (theia.denx.de [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id IAPlpYStO769 for ; Fri, 20 Jul 2012 13:21:31 +0200 (CEST) X-policyd-weight: NOT_IN_SBL_XBL_SPAMHAUS=-1.5 NOT_IN_SPAMCOP=-1.5 NOT_IN_BL_NJABL=-1.5 (only DNSBL check requested) Received: from zose-mta11.web4all.fr (zose-mta-11.w4a.fr [178.33.204.86]) by theia.denx.de (Postfix) with ESMTP id 2F870280E3 for ; Fri, 20 Jul 2012 13:21:28 +0200 (CEST) Received: from localhost (localhost [127.0.0.1]) by zose-mta11.web4all.fr (Postfix) with ESMTP id C0A0546019; Fri, 20 Jul 2012 13:26:18 +0200 (CEST) X-Virus-Scanned: amavisd-new at zose1.web4all.fr Received: from zose-mta11.web4all.fr ([127.0.0.1]) by localhost (zose-mta11.web4all.fr [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id F2U3zgMWCfAd; Fri, 20 Jul 2012 13:26:18 +0200 (CEST) Received: from zose-store12.web4all.fr (zose-store12.web4all.fr [178.33.204.49]) by zose-mta11.web4all.fr (Postfix) with ESMTP id 0E1CF46001; Fri, 20 Jul 2012 13:26:18 +0200 (CEST) Date: Fri, 20 Jul 2012 13:26:24 +0200 (CEST) From: =?utf-8?Q?Beno=C3=AEt_Th=C3=A9baudeau?= To: u-boot@lists.denx.de Message-ID: <1024949320.326453.1342783584348.JavaMail.root@advansee.com> In-Reply-To: <1857491712.292135.1342729024425.JavaMail.root@advansee.com> MIME-Version: 1.0 X-Originating-IP: [88.188.188.98] X-Mailer: Zimbra 7.2.0_GA_2669 (ZimbraWebClient - FF3.0 (Win)/7.2.0_GA_2669) Cc: Marek Vasut , Ilya Yanok Subject: [U-Boot] [PATCH v2 2/5] ehci-hcd: Boost transfer speed X-BeenThere: u-boot@lists.denx.de X-Mailman-Version: 2.1.11 Precedence: list List-Id: U-Boot discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: u-boot-bounces@lists.denx.de Errors-To: u-boot-bounces@lists.denx.de This patch takes advantage of the hardware EHCI qTD queuing mechanism to avoid software overhead and to make transfers as fast as possible. The only drawback is a call to memalign. However, this is fast compared to the transfer timings, and the heap size to allocate is small, e.g. a little bit more than 100 kB for a transfer length of 65535 packets of 512 bytes. Tested on i.MX25 and i.MX35. In my test conditions, the speedup was about 15x using page-aligned buffers, which is really appreciable when accessing large files. Signed-off-by: Benoît Thébaudeau Cc: Marek Vasut Cc: Ilya Yanok Cc: Stefan Herbrechtsmeier --- Changes for v2: - Use DIV_ROUND_UP to make code more readable. .../drivers/usb/host/ehci-hcd.c | 92 ++++++++++++++------ 1 file changed, 63 insertions(+), 29 deletions(-) diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c index 5b3b906..cf9ab92 100644 --- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c +++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, int length, struct devrequest *req) { ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN); - ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN); + struct qTD *qtd; + int qtd_count = 0; int qtd_counter = 0; volatile struct qTD *vtd; @@ -229,8 +230,23 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, le16_to_cpu(req->value), le16_to_cpu(req->value), le16_to_cpu(req->index)); + if (req != NULL) /* SETUP + ACK */ + qtd_count += 1 + 1; + if (length > 0 || req == NULL) { /* buffer */ + if ((uint32_t)buffer & 4095) /* page-unaligned */ + qtd_count += DIV_ROUND_UP(((uint32_t)buffer & 4095) + + length, (QT_BUFFER_CNT - 1) * 4096); + else /* page-aligned */ + qtd_count += DIV_ROUND_UP(length, QT_BUFFER_CNT * 4096); + } + qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD)); + if (qtd == NULL) { + printf("unable to allocate TDs\n"); + return -1; + } + memset(qh, 0, sizeof(struct QH)); - memset(qtd, 0, 3 * sizeof(*qtd)); + memset(qtd, 0, qtd_count * sizeof(*qtd)); toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe)); @@ -291,31 +307,46 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, } if (length > 0 || req == NULL) { - /* - * Setup request qTD (3.5 in ehci-r10.pdf) - * - * qt_next ................ 03-00 H - * qt_altnext ............. 07-04 H - * qt_token ............... 0B-08 H - * - * [ buffer, buffer_hi ] loaded with "buffer". - */ - qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); - qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); - token = (toggle << 31) | - (length << 16) | - ((req == NULL ? 1 : 0) << 15) | - (0 << 12) | - (3 << 10) | - ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); - qtd[qtd_counter].qt_token = cpu_to_hc32(token); - if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) { - printf("unable construct DATA td\n"); - goto fail; - } - /* Update previous qTD! */ - *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); - tdp = &qtd[qtd_counter++].qt_next; + uint8_t *buf_ptr = buffer; + int left_length = length; + + do { + int xfr_bytes = min(left_length, + (QT_BUFFER_CNT * 4096 - + ((uint32_t)buf_ptr & 4095)) & + ~4095); + + /* + * Setup request qTD (3.5 in ehci-r10.pdf) + * + * qt_next ................ 03-00 H + * qt_altnext ............. 07-04 H + * qt_token ............... 0B-08 H + * + * [ buffer, buffer_hi ] loaded with "buffer". + */ + qtd[qtd_counter].qt_next = + cpu_to_hc32(QT_NEXT_TERMINATE); + qtd[qtd_counter].qt_altnext = + cpu_to_hc32(QT_NEXT_TERMINATE); + token = (toggle << 31) | + (xfr_bytes << 16) | + ((req == NULL ? 1 : 0) << 15) | + (0 << 12) | + (3 << 10) | + ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); + qtd[qtd_counter].qt_token = cpu_to_hc32(token); + if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr, + xfr_bytes) != 0) { + printf("unable construct DATA td\n"); + goto fail; + } + /* Update previous qTD! */ + *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); + tdp = &qtd[qtd_counter++].qt_next; + buf_ptr += xfr_bytes; + left_length -= xfr_bytes; + } while (left_length > 0); } if (req != NULL) { @@ -346,7 +377,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, flush_dcache_range((uint32_t)qh_list, ALIGN_END_ADDR(struct QH, qh_list, 1)); flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); - flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3)); + flush_dcache_range((uint32_t)qtd, + ALIGN_END_ADDR(struct qTD, qtd, qtd_count)); /* Set async. queue head pointer. */ ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list); @@ -377,7 +409,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, invalidate_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); invalidate_dcache_range((uint32_t)qtd, - ALIGN_END_ADDR(struct qTD, qtd, 3)); + ALIGN_END_ADDR(struct qTD, qtd, qtd_count)); token = hc32_to_cpu(vtd->qt_token); if (!(token & 0x80)) @@ -450,9 +482,11 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, ehci_readl(&hcor->or_portsc[1])); } + free(qtd); return (dev->status != USB_ST_NOT_PROC) ? 0 : -1; fail: + free(qtd); return -1; }