Patchwork [U-Boot,v8] usb: align buffers at cacheline

login
register
mail settings
Submitter Puneet Saxena
Date March 5, 2012, 2:46 p.m.
Message ID <1330958781-20863-1-git-send-email-puneets@nvidia.com>
Download mbox | patch
Permalink /patch/144694/
State Changes Requested
Delegated to: Marek Vasut
Headers show

Comments

Puneet Saxena - March 5, 2012, 2:46 p.m.
As DMA expects the buffers to be equal and larger then
cache lines, This aligns buffers at cacheline.

Signed-off-by: Puneet Saxena <puneets@nvidia.com>
---

Changes for V7:
    - Trivial change, missed removing memcpy. Removed now.
Changes for V8:
    - Corrected "setup_packet" allocation using "ALLOC_CACHE_ALIGN_BUFFER". 

 common/cmd_usb.c            |    3 +-
 common/usb.c                |   49 ++++++++++++++++++-----------------
 common/usb_storage.c        |   59 ++++++++++++++++++++----------------------
 disk/part_dos.c             |    2 +-
 drivers/usb/host/ehci-hcd.c |    8 ++++++
 include/scsi.h              |    4 ++-
 include/usb.h               |    4 ++-
 7 files changed, 70 insertions(+), 59 deletions(-)
Marek Vasut - March 5, 2012, 3:35 p.m.
Dear Puneet Saxena,

I replaced the old patch with this one. Thanks!

> As DMA expects the buffers to be equal and larger then
> cache lines, This aligns buffers at cacheline.
> 
> Signed-off-by: Puneet Saxena <puneets@nvidia.com>
> ---
> 
> Changes for V7:
>     - Trivial change, missed removing memcpy. Removed now.
> Changes for V8:
>     - Corrected "setup_packet" allocation using "ALLOC_CACHE_ALIGN_BUFFER".
> 
>  common/cmd_usb.c            |    3 +-
>  common/usb.c                |   49 ++++++++++++++++++-----------------
>  common/usb_storage.c        |   59
> ++++++++++++++++++++---------------------- disk/part_dos.c             |  
>  2 +-
>  drivers/usb/host/ehci-hcd.c |    8 ++++++
>  include/scsi.h              |    4 ++-
>  include/usb.h               |    4 ++-
>  7 files changed, 70 insertions(+), 59 deletions(-)
> 
> diff --git a/common/cmd_usb.c b/common/cmd_usb.c
> index 320667f..bca9d94 100644
> --- a/common/cmd_usb.c
> +++ b/common/cmd_usb.c
> @@ -150,7 +150,8 @@ void usb_display_class_sub(unsigned char dclass,
> unsigned char subclass,
> 
>  void usb_display_string(struct usb_device *dev, int index)
>  {
> -	char buffer[256];
> +	ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);
> +
>  	if (index != 0) {
>  		if (usb_string(dev, index, &buffer[0], 256) > 0)
>  			printf("String: \"%s\"", buffer);
> diff --git a/common/usb.c b/common/usb.c
> index 6e21ae2..e3db7bc 100644
> --- a/common/usb.c
> +++ b/common/usb.c
> @@ -73,7 +73,6 @@ static struct usb_device usb_dev[USB_MAX_DEVICE];
>  static int dev_index;
>  static int running;
>  static int asynch_allowed;
> -static struct devrequest setup_packet;
> 
>  char usb_started; /* flag for the started/stopped USB status */
> 
> @@ -185,23 +184,24 @@ int usb_control_msg(struct usb_device *dev, unsigned
> int pipe, unsigned short value, unsigned short index,
>  			void *data, unsigned short size, int timeout)
>  {
> +	ALLOC_CACHE_ALIGN_BUFFER(struct devrequest, setup_packet, 1);
>  	if ((timeout == 0) && (!asynch_allowed)) {
>  		/* request for a asynch control pipe is not allowed */
>  		return -1;
>  	}
> 
>  	/* set setup command */
> -	setup_packet.requesttype = requesttype;
> -	setup_packet.request = request;
> -	setup_packet.value = cpu_to_le16(value);
> -	setup_packet.index = cpu_to_le16(index);
> -	setup_packet.length = cpu_to_le16(size);
> +	setup_packet->requesttype = requesttype;
> +	setup_packet->request = request;
> +	setup_packet->value = cpu_to_le16(value);
> +	setup_packet->index = cpu_to_le16(index);
> +	setup_packet->length = cpu_to_le16(size);
>  	USB_PRINTF("usb_control_msg: request: 0x%X, requesttype: 0x%X, " \
>  		   "value 0x%X index 0x%X length 0x%X\n",
>  		   request, requesttype, value, index, size);
>  	dev->status = USB_ST_NOT_PROC; /*not yet processed */
> 
> -	submit_control_msg(dev, pipe, data, size, &setup_packet);
> +	submit_control_msg(dev, pipe, data, size, setup_packet);
>  	if (timeout == 0)
>  		return (int)size;
> 
> @@ -698,7 +698,7 @@ static int usb_string_sub(struct usb_device *dev,
> unsigned int langid, */
>  int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
>  {
> -	unsigned char mybuf[USB_BUFSIZ];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, mybuf, USB_BUFSIZ);
>  	unsigned char *tbuf;
>  	int err;
>  	unsigned int u, idx;
> @@ -798,7 +798,7 @@ int usb_new_device(struct usb_device *dev)
>  {
>  	int addr, err;
>  	int tmp;
> -	unsigned char tmpbuf[USB_BUFSIZ];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, tmpbuf, USB_BUFSIZ);
> 
>  	/* We still haven't set the Address yet */
>  	addr = dev->devnum;
> @@ -925,8 +925,8 @@ int usb_new_device(struct usb_device *dev)
>  	le16_to_cpus(&dev->descriptor.idProduct);
>  	le16_to_cpus(&dev->descriptor.bcdDevice);
>  	/* only support for one config for now */
> -	usb_get_configuration_no(dev, &tmpbuf[0], 0);
> -	usb_parse_config(dev, &tmpbuf[0], 0);
> +	usb_get_configuration_no(dev, tmpbuf, 0);
> +	usb_parse_config(dev, tmpbuf, 0);
>  	usb_set_maxpacket(dev);
>  	/* we set the default configuration here */
>  	if (usb_set_configuration(dev, dev->config.desc.bConfigurationValue)) {
> @@ -1080,7 +1080,7 @@ static int hub_port_reset(struct usb_device *dev, int
> port, unsigned short *portstat)
>  {
>  	int tries;
> -	struct usb_port_status portsts;
> +	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>  	unsigned short portstatus, portchange;
> 
>  	USB_HUB_PRINTF("hub_port_reset: resetting port %d...\n", port);
> @@ -1089,13 +1089,13 @@ static int hub_port_reset(struct usb_device *dev,
> int port, usb_set_port_feature(dev, port + 1, USB_PORT_FEAT_RESET);
>  		wait_ms(200);
> 
> -		if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
> +		if (usb_get_port_status(dev, port + 1, portsts) < 0) {
>  			USB_HUB_PRINTF("get_port_status failed status %lX\n",
>  					dev->status);
>  			return -1;
>  		}
> -		portstatus = le16_to_cpu(portsts.wPortStatus);
> -		portchange = le16_to_cpu(portsts.wPortChange);
> +		portstatus = le16_to_cpu(portsts->wPortStatus);
> +		portchange = le16_to_cpu(portsts->wPortChange);
> 
>  		USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
>  				portstatus, portchange,
> @@ -1133,19 +1133,19 @@ static int hub_port_reset(struct usb_device *dev,
> int port, void usb_hub_port_connect_change(struct usb_device *dev, int
> port) {
>  	struct usb_device *usb;
> -	struct usb_port_status portsts;
> +	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>  	unsigned short portstatus;
> 
>  	/* Check status */
> -	if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
> +	if (usb_get_port_status(dev, port + 1, portsts) < 0) {
>  		USB_HUB_PRINTF("get_port_status failed\n");
>  		return;
>  	}
> 
> -	portstatus = le16_to_cpu(portsts.wPortStatus);
> +	portstatus = le16_to_cpu(portsts->wPortStatus);
>  	USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
>  			portstatus,
> -			le16_to_cpu(portsts.wPortChange),
> +			le16_to_cpu(portsts->wPortChange),
>  			portspeed(portstatus));
> 
>  	/* Clear the connection change status */
> @@ -1194,7 +1194,8 @@ void usb_hub_port_connect_change(struct usb_device
> *dev, int port) int usb_hub_configure(struct usb_device *dev)
>  {
>  	int i;
> -	unsigned char buffer[USB_BUFSIZ], *bitmap;
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, USB_BUFSIZ);
> +	unsigned char *bitmap;
>  	struct usb_hub_descriptor *descriptor;
>  	struct usb_hub_device *hub;
>  #ifdef USB_HUB_DEBUG
> @@ -1316,16 +1317,16 @@ int usb_hub_configure(struct usb_device *dev)
>  	usb_hub_power_on(hub);
> 
>  	for (i = 0; i < dev->maxchild; i++) {
> -		struct usb_port_status portsts;
> +		ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>  		unsigned short portstatus, portchange;
> 
> -		if (usb_get_port_status(dev, i + 1, &portsts) < 0) {
> +		if (usb_get_port_status(dev, i + 1, portsts) < 0) {
>  			USB_HUB_PRINTF("get_port_status failed\n");
>  			continue;
>  		}
> 
> -		portstatus = le16_to_cpu(portsts.wPortStatus);
> -		portchange = le16_to_cpu(portsts.wPortChange);
> +		portstatus = le16_to_cpu(portsts->wPortStatus);
> +		portchange = le16_to_cpu(portsts->wPortChange);
>  		USB_HUB_PRINTF("Port %d Status %X Change %X\n",
>  				i + 1, portstatus, portchange);
> 
> diff --git a/common/usb_storage.c b/common/usb_storage.c
> index de84c8d..88ca390 100644
> --- a/common/usb_storage.c
> +++ b/common/usb_storage.c
> @@ -79,8 +79,7 @@ static const unsigned char us_direction[256/8] = {
>  };
>  #define US_DIRECTION(x) ((us_direction[x>>3] >> (x & 7)) & 1)
> 
> -static unsigned char usb_stor_buf[512];
> -static ccb usb_ccb;
> +static ccb usb_ccb __attribute__((aligned(ARCH_DMA_MINALIGN)));
> 
>  /*
>   * CBI style
> @@ -210,17 +209,17 @@ int usb_stor_info(void)
>  static unsigned int usb_get_max_lun(struct us_data *us)
>  {
>  	int len;
> -	unsigned char result;
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, result, 1);
>  	len = usb_control_msg(us->pusb_dev,
>  			      usb_rcvctrlpipe(us->pusb_dev, 0),
>  			      US_BBB_GET_MAX_LUN,
>  			      USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
>  			      0, us->ifnum,
> -			      &result, sizeof(result),
> +			      result, sizeof(char),
>  			      USB_CNTL_TIMEOUT * 5);
>  	USB_STOR_PRINTF("Get Max LUN -> len = %i, result = %i\n",
> -			len, (int) result);
> -	return (len > 0) ? result : 0;
> +			len, (int) *result);
> +	return (len > 0) ? *result : 0;
>  }
> 
>  /*************************************************************************
> ****** @@ -233,9 +232,6 @@ int usb_stor_scan(int mode)
>  	unsigned char i;
>  	struct usb_device *dev;
> 
> -	/* GJ */
> -	memset(usb_stor_buf, 0, sizeof(usb_stor_buf));
> -
>  	if (mode == 1)
>  		printf("       scanning bus for storage devices... ");
> 
> @@ -499,7 +495,7 @@ int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
>  	int actlen;
>  	int dir_in;
>  	unsigned int pipe;
> -	umass_bbb_cbw_t cbw;
> +	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_cbw_t, cbw, 1);
> 
>  	dir_in = US_DIRECTION(srb->cmd[0]);
> 
> @@ -522,16 +518,16 @@ int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
>  	/* always OUT to the ep */
>  	pipe = usb_sndbulkpipe(us->pusb_dev, us->ep_out);
> 
> -	cbw.dCBWSignature = cpu_to_le32(CBWSIGNATURE);
> -	cbw.dCBWTag = cpu_to_le32(CBWTag++);
> -	cbw.dCBWDataTransferLength = cpu_to_le32(srb->datalen);
> -	cbw.bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
> -	cbw.bCBWLUN = srb->lun;
> -	cbw.bCDBLength = srb->cmdlen;
> +	cbw->dCBWSignature = cpu_to_le32(CBWSIGNATURE);
> +	cbw->dCBWTag = cpu_to_le32(CBWTag++);
> +	cbw->dCBWDataTransferLength = cpu_to_le32(srb->datalen);
> +	cbw->bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
> +	cbw->bCBWLUN = srb->lun;
> +	cbw->bCDBLength = srb->cmdlen;
>  	/* copy the command data into the CBW command data buffer */
>  	/* DST SRC LEN!!! */
> -	memcpy(cbw.CBWCDB, srb->cmd, srb->cmdlen);
> -	result = usb_bulk_msg(us->pusb_dev, pipe, &cbw, UMASS_BBB_CBW_SIZE,
> +	memcpy(cbw->CBWCDB, srb->cmd, srb->cmdlen);
> +	result = usb_bulk_msg(us->pusb_dev, pipe, cbw, UMASS_BBB_CBW_SIZE,
>  			      &actlen, USB_CNTL_TIMEOUT * 5);
>  	if (result < 0)
>  		USB_STOR_PRINTF("usb_stor_BBB_comdat:usb_bulk_msg error\n");
> @@ -675,7 +671,7 @@ int usb_stor_BBB_transport(ccb *srb, struct us_data
> *us) int dir_in;
>  	int actlen, data_actlen;
>  	unsigned int pipe, pipein, pipeout;
> -	umass_bbb_csw_t csw;
> +	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_csw_t, csw, 1);
>  #ifdef BBB_XPORT_TRACE
>  	unsigned char *ptr;
>  	int index;
> @@ -733,7 +729,7 @@ st:
>  	retry = 0;
>  again:
>  	USB_STOR_PRINTF("STATUS phase\n");
> -	result = usb_bulk_msg(us->pusb_dev, pipein, &csw, UMASS_BBB_CSW_SIZE,
> +	result = usb_bulk_msg(us->pusb_dev, pipein, csw, UMASS_BBB_CSW_SIZE,
>  				&actlen, USB_CNTL_TIMEOUT*5);
> 
>  	/* special handling of STALL in STATUS phase */
> @@ -753,28 +749,28 @@ again:
>  		return USB_STOR_TRANSPORT_FAILED;
>  	}
>  #ifdef BBB_XPORT_TRACE
> -	ptr = (unsigned char *)&csw;
> +	ptr = (unsigned char *)csw;
>  	for (index = 0; index < UMASS_BBB_CSW_SIZE; index++)
>  		printf("ptr[%d] %#x ", index, ptr[index]);
>  	printf("\n");
>  #endif
>  	/* misuse pipe to get the residue */
> -	pipe = le32_to_cpu(csw.dCSWDataResidue);
> +	pipe = le32_to_cpu(csw->dCSWDataResidue);
>  	if (pipe == 0 && srb->datalen != 0 && srb->datalen - data_actlen != 0)
>  		pipe = srb->datalen - data_actlen;
> -	if (CSWSIGNATURE != le32_to_cpu(csw.dCSWSignature)) {
> +	if (CSWSIGNATURE != le32_to_cpu(csw->dCSWSignature)) {
>  		USB_STOR_PRINTF("!CSWSIGNATURE\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if ((CBWTag - 1) != le32_to_cpu(csw.dCSWTag)) {
> +	} else if ((CBWTag - 1) != le32_to_cpu(csw->dCSWTag)) {
>  		USB_STOR_PRINTF("!Tag\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if (csw.bCSWStatus > CSWSTATUS_PHASE) {
> +	} else if (csw->bCSWStatus > CSWSTATUS_PHASE) {
>  		USB_STOR_PRINTF(">PHASE\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if (csw.bCSWStatus == CSWSTATUS_PHASE) {
> +	} else if (csw->bCSWStatus == CSWSTATUS_PHASE) {
>  		USB_STOR_PRINTF("=PHASE\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> @@ -782,7 +778,7 @@ again:
>  		USB_STOR_PRINTF("transferred %dB instead of %ldB\n",
>  			data_actlen, srb->datalen);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if (csw.bCSWStatus == CSWSTATUS_FAILED) {
> +	} else if (csw->bCSWStatus == CSWSTATUS_FAILED) {
>  		USB_STOR_PRINTF("FAILED\n");
>  		return USB_STOR_TRANSPORT_FAILED;
>  	}
> @@ -1343,7 +1339,8 @@ int usb_stor_get_info(struct usb_device *dev, struct
> us_data *ss, block_dev_desc_t *dev_desc)
>  {
>  	unsigned char perq, modi;
> -	unsigned long cap[2];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned long, cap, 2);
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, usb_stor_buf, 36);
>  	unsigned long *capacity, *blksz;
>  	ccb *pccb = &usb_ccb;
> 
> @@ -1367,9 +1364,9 @@ int usb_stor_get_info(struct usb_device *dev, struct
> us_data *ss, /* drive is removable */
>  		dev_desc->removable = 1;
>  	}
> -	memcpy(&dev_desc->vendor[0], &usb_stor_buf[8], 8);
> -	memcpy(&dev_desc->product[0], &usb_stor_buf[16], 16);
> -	memcpy(&dev_desc->revision[0], &usb_stor_buf[32], 4);
> +	memcpy(&dev_desc->vendor[0], (const void *) &usb_stor_buf[8], 8);
> +	memcpy(&dev_desc->product[0], (const void *) &usb_stor_buf[16], 16);
> +	memcpy(&dev_desc->revision[0], (const void *) &usb_stor_buf[32], 4);
>  	dev_desc->vendor[8] = 0;
>  	dev_desc->product[16] = 0;
>  	dev_desc->revision[4] = 0;
> diff --git a/disk/part_dos.c b/disk/part_dos.c
> index b5bcb37..70211ee 100644
> --- a/disk/part_dos.c
> +++ b/disk/part_dos.c
> @@ -87,7 +87,7 @@ static int test_block_type(unsigned char *buffer)
> 
>  int test_part_dos (block_dev_desc_t *dev_desc)
>  {
> -	unsigned char buffer[dev_desc->blksz];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, dev_desc->blksz);
> 
>  	if ((dev_desc->block_read(dev_desc->dev, 0, 1, (ulong *) buffer) != 1) 
||
>  	    (buffer[DOS_PART_MAGIC_OFFSET + 0] != 0x55) ||
> diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
> index d893b2a..eb5220b 100644
> --- a/drivers/usb/host/ehci-hcd.c
> +++ b/drivers/usb/host/ehci-hcd.c
> @@ -120,6 +120,14 @@ static struct descriptor {
>   */
>  static void flush_invalidate(u32 addr, int size, int flush)
>  {
> +	/*
> +	 * Size is the bytes actually moved during transaction,
> +	 * which may not equal to the cache line. This results
> +	 * stop address passed for invalidating cache may not be aligned.
> +	 * Therfore making size as multiple of cache line size.
> +	 */
> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> +
>  	if (flush)
>  		flush_dcache_range(addr, addr + size);
>  	else
> diff --git a/include/scsi.h b/include/scsi.h
> index c52759c..89ae45f 100644
> --- a/include/scsi.h
> +++ b/include/scsi.h
> @@ -26,7 +26,9 @@
> 
>  typedef struct SCSI_cmd_block{
>  	unsigned char		cmd[16];					
/* command				   */
> -	unsigned char		sense_buf[64];		/* for request sense */
> +	/* for request sense */
> +	unsigned char		sense_buf[64]
> +		__attribute__((aligned(ARCH_DMA_MINALIGN)));
>  	unsigned char		status;						
/* SCSI Status			 */
>  	unsigned char		target;						
/* Target ID				 */
>  	unsigned char		lun;							
/* Target LUN        */
> diff --git a/include/usb.h b/include/usb.h
> index 06170cd..5f4f110 100644
> --- a/include/usb.h
> +++ b/include/usb.h
> @@ -109,7 +109,9 @@ struct usb_device {
>  	int epmaxpacketout[16];		/* OUTput endpoint specific maximums */
> 
>  	int configno;			/* selected config number */
> -	struct usb_device_descriptor descriptor; /* Device Descriptor */
> +	/* Device Descriptor */
> +	struct usb_device_descriptor descriptor
> +		__attribute__((aligned(ARCH_DMA_MINALIGN)));
>  	struct usb_config config; /* config descriptor */
> 
>  	int have_langid;		/* whether string_langid is valid yet */
Simon Glass - March 5, 2012, 6:18 p.m.
Hi Puneet,

On Mon, Mar 5, 2012 at 6:46 AM, Puneet Saxena <puneets@nvidia.com> wrote:
> As DMA expects the buffers to be equal and larger then
> cache lines, This aligns buffers at cacheline.
>
> Signed-off-by: Puneet Saxena <puneets@nvidia.com>

Tested on Seaboard:

Tested-by: Simon Glass <sjg@chromium.org>
Acked-by: Simon Glass <sjg@chromium.org>

I do still see a few alignment warnings, but only a tidy fraction of
what we had. Do you see these?

Tegra2 (SeaBoard) # usb start
(Re)start USB...
USB:   Register 10011 NbrPorts 1
USB EHCI 1.00
scanning bus for devices... ERROR: v7_dcache_inval_range - start
address is not aligned - 0x3fbed6f2
ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed732
ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed484
ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed584
ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed492
ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed592
ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed49e
ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed59e
ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed4a2
ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed582
2 USB Device(s) found
       scanning bus for storage devices... 1 Storage Device(s) found


Regards,
Simon
Marek Vasut - March 6, 2012, 12:36 a.m.
Dear Simon Glass,

> Hi Puneet,
> 
> On Mon, Mar 5, 2012 at 6:46 AM, Puneet Saxena <puneets@nvidia.com> wrote:
> > As DMA expects the buffers to be equal and larger then
> > cache lines, This aligns buffers at cacheline.
> > 
> > Signed-off-by: Puneet Saxena <puneets@nvidia.com>
> 
> Tested on Seaboard:
> 
> Tested-by: Simon Glass <sjg@chromium.org>
> Acked-by: Simon Glass <sjg@chromium.org>
> 

Guys, I'm very happy that we finally got it here. But the USB framework changed 
in mainline recently a bit (usb.c was split to usb.c and usb_hub.c). Puneet, can 
you please adapt your patch and do one last loop?

Thanks in advance!

Best regards,
Marek Vasut
Marek Vasut - March 6, 2012, 12:39 a.m.
> Dear Simon Glass,
> 
> > Hi Puneet,
> > 
> > On Mon, Mar 5, 2012 at 6:46 AM, Puneet Saxena <puneets@nvidia.com> wrote:
> > > As DMA expects the buffers to be equal and larger then
> > > cache lines, This aligns buffers at cacheline.
> > > 
> > > Signed-off-by: Puneet Saxena <puneets@nvidia.com>
> > 
> > Tested on Seaboard:
> > 
> > Tested-by: Simon Glass <sjg@chromium.org>
> > Acked-by: Simon Glass <sjg@chromium.org>
> 
> Guys, I'm very happy that we finally got it here. But the USB framework
> changed in mainline recently a bit (usb.c was split to usb.c and
> usb_hub.c). Puneet, can you please adapt your patch and do one last loop?

Adapt your patch to this tree: git://git.denx.de/u-boot-usb.git master branch.

> 
> Thanks in advance!
> 
> Best regards,
> Marek Vasut

Best regards,
Marek Vasut
Mike Frysinger - March 6, 2012, 3:07 a.m.
On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
> As DMA expects the buffers to be equal and larger then
> cache lines, This aligns buffers at cacheline.

i don't think this statement is true.  DMA doesn't care about alignment (well, 
some do, but it's not related to cache lines but rather some other restriction 
in the peripheral DMA itself).  what does matter is that cache operations 
operate on cache lines and not individual bytes.  hence the core arm code was 
updated to warn when someone told it to invalidate X bytes but the hardware 
literally could not, so it had to invalidate X + Y bytes.

> --- a/drivers/usb/host/ehci-hcd.c
> +++ b/drivers/usb/host/ehci-hcd.c
>
>  static void flush_invalidate(u32 addr, int size, int flush)
>  {
> +	/*
> +	 * Size is the bytes actually moved during transaction,
> +	 * which may not equal to the cache line. This results
> +	 * stop address passed for invalidating cache may not be aligned.
> +	 * Therfore making size as multiple of cache line size.
> +	 */
> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> +
>  	if (flush)
>  		flush_dcache_range(addr, addr + size);
>  	else

i think this is wrong and merely hides the errors from higher up instead of 
fixing them.  the point of the warning was to tell you that the code was 
invalidating *too many* bytes.  this code still invalidates too many bytes 
without any justification as for why it's OK to do here.  further, this code 
path only matters to the invalidation logic, not the flush logic.
-mike
Puneet Saxena - March 6, 2012, 7 a.m.
Hi Simon,
I do see only first warning on all the devices and rest of the warnings
on a few mass storage device.

My patch fixing these warnings is not accepted. Please see below link 
for further info -

http://lists.denx.de/pipermail/u-boot/2012-March/119404.html

IMO, these warnings spew when we expects some info e.g. device 
descriptor, manf id, prod id... from the device.
The root cause of the issue is some race condition in H/w due to which, 
even though we receive "STD_ASS"(Async sequence status) as 0
still transfer descriptor token is not updated.

Thanx & Regards,
Puneet

On Monday 05 March 2012 11:48 PM, Simon Glass wrote:
> Hi Puneet,
>
> On Mon, Mar 5, 2012 at 6:46 AM, Puneet Saxena<puneets@nvidia.com>  wrote:
>> As DMA expects the buffers to be equal and larger then
>> cache lines, This aligns buffers at cacheline.
>>
>> Signed-off-by: Puneet Saxena<puneets@nvidia.com>
> Tested on Seaboard:
>
> Tested-by: Simon Glass<sjg@chromium.org>
> Acked-by: Simon Glass<sjg@chromium.org>
>
> I do still see a few alignment warnings, but only a tidy fraction of
> what we had. Do you see these?
>
> Tegra2 (SeaBoard) # usb start
> (Re)start USB...
> USB:   Register 10011 NbrPorts 1
> USB EHCI 1.00
> scanning bus for devices... ERROR: v7_dcache_inval_range - start
> address is not aligned - 0x3fbed6f2
> ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed732
> ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed484
> ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed584
> ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed492
> ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed592
> ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed49e
> ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed59e
> ERROR: v7_dcache_inval_range - start address is not aligned - 0x3fbed4a2
> ERROR: v7_dcache_inval_range - stop address is not aligned - 0x3fbed582
> 2 USB Device(s) found
>         scanning bus for storage devices... 1 Storage Device(s) found
>
>
> Regards,
> Simon


-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may contain
confidential information.  Any unauthorized review, use, disclosure or distribution
is prohibited.  If you are not the intended recipient, please contact the sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------
Marek Vasut - March 6, 2012, 8:22 a.m.
Dear Puneet Saxena,

> Hi Simon,
> I do see only first warning on all the devices and rest of the warnings
> on a few mass storage device.
> 
> My patch fixing these warnings is not accepted. Please see below link
> for further info -
> 
> http://lists.denx.de/pipermail/u-boot/2012-March/119404.html
> 
> IMO, these warnings spew when we expects some info e.g. device
> descriptor, manf id, prod id... from the device.
> The root cause of the issue is some race condition in H/w due to which,
> even though we receive "STD_ASS"(Async sequence status) as 0
> still transfer descriptor token is not updated.
> 
> Thanx & Regards,
> Puneet

I'd really love to accept the patch, but I can see it exposes even worse issue, 
so I'd be really happy to fix the root cause of the problem instead of putting 
layers of hack-arounds on top of it.

Best regards,
Marek Vasut
Puneet Saxena - March 7, 2012, 7:12 a.m.
Hi Mike,
On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
> * PGP Signed by an unknown key
>
> On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
>> As DMA expects the buffers to be equal and larger then
>> cache lines, This aligns buffers at cacheline.
> i don't think this statement is true.  DMA doesn't care about alignment (well,
> some do, but it's not related to cache lines but rather some other restriction
> in the peripheral DMA itself).  what does matter is that cache operations
> operate on cache lines and not individual bytes.  hence the core arm code was
> updated to warn when someone told it to invalidate X bytes but the hardware
> literally could not, so it had to invalidate X + Y bytes.
>
Agreed, Will update the commit message in next patchset.
>> --- a/drivers/usb/host/ehci-hcd.c
>> +++ b/drivers/usb/host/ehci-hcd.c
>>
>>   static void flush_invalidate(u32 addr, int size, int flush)
>>   {
>> +	/*
>> +	 * Size is the bytes actually moved during transaction,
>> +	 * which may not equal to the cache line. This results
>> +	 * stop address passed for invalidating cache may not be aligned.
>> +	 * Therfore making size as multiple of cache line size.
>> +	 */
>> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
>> +
>>   	if (flush)
>>   		flush_dcache_range(addr, addr + size);
>>   	else
> i think this is wrong and merely hides the errors from higher up instead of
> fixing them.  the point of the warning was to tell you that the code was
> invalidating *too many* bytes.  this code still invalidates too many bytes
> without any justification as for why it's OK to do here.  further, this code
> path only matters to the invalidation logic, not the flush logic.
> -mike
>
The sole purpose of this patch to remove the warnings as start/stop 
address sent for invalidating
is unaligned. Without this patch code works fine but with lots of 
spew...Which we don't want and discussed
in earlier thread which Simon posted. Please have a look on following link.

As I understood, you agree that we need to align start/stop buffer 
address and also agree that
to align stop address we need to align size as start address is already 
aligned.
Now, "why its OK to do here"?
We could have aligned the size in two places, cache_qtd() and cache_qh() 
but then we need to place alignment check
at all the places where size is passed. So I thought better Aligning at 
flush_invalidate() and "ALIGN" macro does not
increase the size if size is already aligned.


> * Unknown Key
> * 0xE837F581
Thanx & Regards,
Puneet

-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may contain
confidential information.  Any unauthorized review, use, disclosure or distribution
is prohibited.  If you are not the intended recipient, please contact the sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------
Puneet Saxena - March 7, 2012, 9:20 a.m.
Hi Mike,
Forgot to write the link in below mail.
Here is the link: 
http://lists.denx.de/pipermail/u-boot/2011-December/112138.html
Do you want me to show a warning where I am making size as cacheline size?

Thanx & Regards,,
Puneet
On Wednesday 07 March 2012 12:42 PM, puneets wrote:
> Hi Mike,
> On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
>> * PGP Signed by an unknown key
>>
>> On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
>>> As DMA expects the buffers to be equal and larger then
>>> cache lines, This aligns buffers at cacheline.
>> i don't think this statement is true.  DMA doesn't care about alignment (well,
>> some do, but it's not related to cache lines but rather some other restriction
>> in the peripheral DMA itself).  what does matter is that cache operations
>> operate on cache lines and not individual bytes.  hence the core arm code was
>> updated to warn when someone told it to invalidate X bytes but the hardware
>> literally could not, so it had to invalidate X + Y bytes.
>>
> Agreed, Will update the commit message in next patchset.
>>> --- a/drivers/usb/host/ehci-hcd.c
>>> +++ b/drivers/usb/host/ehci-hcd.c
>>>
>>>    static void flush_invalidate(u32 addr, int size, int flush)
>>>    {
>>> +	/*
>>> +	 * Size is the bytes actually moved during transaction,
>>> +	 * which may not equal to the cache line. This results
>>> +	 * stop address passed for invalidating cache may not be aligned.
>>> +	 * Therfore making size as multiple of cache line size.
>>> +	 */
>>> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
>>> +
>>>    	if (flush)
>>>    		flush_dcache_range(addr, addr + size);
>>>    	else
>> i think this is wrong and merely hides the errors from higher up instead of
>> fixing them.  the point of the warning was to tell you that the code was
>> invalidating *too many* bytes.  this code still invalidates too many bytes
>> without any justification as for why it's OK to do here.  further, this code
>> path only matters to the invalidation logic, not the flush logic.
>> -mike
>>
> The sole purpose of this patch to remove the warnings as start/stop
> address sent for invalidating
> is unaligned. Without this patch code works fine but with lots of
> spew...Which we don't want and discussed
> in earlier thread which Simon posted. Please have a look on following link.
>
> As I understood, you agree that we need to align start/stop buffer
> address and also agree that
> to align stop address we need to align size as start address is already
> aligned.
> Now, "why its OK to do here"?
> We could have aligned the size in two places, cache_qtd() and cache_qh()
> but then we need to place alignment check
> at all the places where size is passed. So I thought better Aligning at
> flush_invalidate() and "ALIGN" macro does not
> increase the size if size is already aligned.
>
>
>> * Unknown Key
>> * 0xE837F581
> Thanx&  Regards,
> Puneet


-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may contain
confidential information.  Any unauthorized review, use, disclosure or distribution
is prohibited.  If you are not the intended recipient, please contact the sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------
Marek Vasut - March 7, 2012, 10:06 p.m.
Dear puneets,

> Hi Mike,
> 
> On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
> > * PGP Signed by an unknown key
> > 
> > On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
> >> As DMA expects the buffers to be equal and larger then
> >> cache lines, This aligns buffers at cacheline.
> > 
> > i don't think this statement is true.  DMA doesn't care about alignment
> > (well, some do, but it's not related to cache lines but rather some
> > other restriction in the peripheral DMA itself).  what does matter is
> > that cache operations operate on cache lines and not individual bytes. 
> > hence the core arm code was updated to warn when someone told it to
> > invalidate X bytes but the hardware literally could not, so it had to
> > invalidate X + Y bytes.
> 
> Agreed, Will update the commit message in next patchset.
> 
> >> --- a/drivers/usb/host/ehci-hcd.c
> >> +++ b/drivers/usb/host/ehci-hcd.c
> >> 
> >>   static void flush_invalidate(u32 addr, int size, int flush)
> >>   {
> >> 
> >> +	/*
> >> +	 * Size is the bytes actually moved during transaction,
> >> +	 * which may not equal to the cache line. This results
> >> +	 * stop address passed for invalidating cache may not be aligned.
> >> +	 * Therfore making size as multiple of cache line size.
> >> +	 */
> >> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> >> +
> >> 
> >>   	if (flush)
> >>   	
> >>   		flush_dcache_range(addr, addr + size);
> >>   	
> >>   	else
> > 
> > i think this is wrong and merely hides the errors from higher up instead
> > of fixing them.  the point of the warning was to tell you that the code
> > was invalidating *too many* bytes.  this code still invalidates too many
> > bytes without any justification as for why it's OK to do here.  further,
> > this code path only matters to the invalidation logic, not the flush
> > logic. -mike
> 
> The sole purpose of this patch to remove the warnings as start/stop
> address sent for invalidating
> is unaligned. Without this patch code works fine but with lots of
> spew...Which we don't want and discussed
> in earlier thread which Simon posted. Please have a look on following link.
> 
> As I understood, you agree that we need to align start/stop buffer
> address and also agree that
> to align stop address we need to align size as start address is already
> aligned.
> Now, "why its OK to do here"?
> We could have aligned the size in two places, cache_qtd() and cache_qh()
> but then we need to place alignment check
> at all the places where size is passed. So I thought better Aligning at
> flush_invalidate() and "ALIGN" macro does not
> increase the size if size is already aligned.

Actually I have to agree with Mike here. Can you please remove that ALIGN() (and 
all others you might have added)? If it does spew, that's ok and it tells us 
something is wrong in the USB core subsystem. Such stuff can be fixed in 
subsequent patch.

Best regards,
Marek Vasut
Puneet Saxena - March 8, 2012, 11:21 a.m.
Hi Marek,
On Thursday 08 March 2012 03:36 AM, Marek Vasut wrote:
> Dear puneets,
>
>> Hi Mike,
>>
>> On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
>>> * PGP Signed by an unknown key
>>>
>>> On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
>>>> As DMA expects the buffers to be equal and larger then
>>>> cache lines, This aligns buffers at cacheline.
>>> i don't think this statement is true.  DMA doesn't care about alignment
>>> (well, some do, but it's not related to cache lines but rather some
>>> other restriction in the peripheral DMA itself).  what does matter is
>>> that cache operations operate on cache lines and not individual bytes.
>>> hence the core arm code was updated to warn when someone told it to
>>> invalidate X bytes but the hardware literally could not, so it had to
>>> invalidate X + Y bytes.
>> Agreed, Will update the commit message in next patchset.
>>
>>>> --- a/drivers/usb/host/ehci-hcd.c
>>>> +++ b/drivers/usb/host/ehci-hcd.c
>>>>
>>>>    static void flush_invalidate(u32 addr, int size, int flush)
>>>>    {
>>>>
>>>> +	/*
>>>> +	 * Size is the bytes actually moved during transaction,
>>>> +	 * which may not equal to the cache line. This results
>>>> +	 * stop address passed for invalidating cache may not be aligned.
>>>> +	 * Therfore making size as multiple of cache line size.
>>>> +	 */
>>>> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
>>>> +
>>>>
>>>>    	if (flush)
>>>>    	
>>>>    		flush_dcache_range(addr, addr + size);
>>>>    	
>>>>    	else
>>> i think this is wrong and merely hides the errors from higher up instead
>>> of fixing them.  the point of the warning was to tell you that the code
>>> was invalidating *too many* bytes.  this code still invalidates too many
>>> bytes without any justification as for why it's OK to do here.  further,
>>> this code path only matters to the invalidation logic, not the flush
>>> logic. -mike
>> The sole purpose of this patch to remove the warnings as start/stop
>> address sent for invalidating
>> is unaligned. Without this patch code works fine but with lots of
>> spew...Which we don't want and discussed
>> in earlier thread which Simon posted. Please have a look on following link.
>>
>> As I understood, you agree that we need to align start/stop buffer
>> address and also agree that
>> to align stop address we need to align size as start address is already
>> aligned.
>> Now, "why its OK to do here"?
>> We could have aligned the size in two places, cache_qtd() and cache_qh()
>> but then we need to place alignment check
>> at all the places where size is passed. So I thought better Aligning at
>> flush_invalidate() and "ALIGN" macro does not
>> increase the size if size is already aligned.
> Actually I have to agree with Mike here. Can you please remove that ALIGN() (and
> all others you might have added)? If it does spew, that's ok and it tells us
> something is wrong in the USB core subsystem. Such stuff can be fixed in
> subsequent patch.
>
Sorry, I could not understand "(and all others you might have added)".
Do you want me remove any HACK in the patch which is using ALIGN or 
making stop address
aligned? The patch has only the above line to make stop address align 
and rest of the code makes
start address align. Just to confirm, you are fine with the start 
address alignment code in the patch?

> Best regards,
> Marek Vasut
Thanx & Regards,
Puneet

-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may contain
confidential information.  Any unauthorized review, use, disclosure or distribution
is prohibited.  If you are not the intended recipient, please contact the sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------
Marek Vasut - March 8, 2012, 2:12 p.m.
Dear puneets,

> Hi Marek,
> 
> On Thursday 08 March 2012 03:36 AM, Marek Vasut wrote:
> > Dear puneets,
> > 
> >> Hi Mike,
> >> 
> >> On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
> >>> * PGP Signed by an unknown key
> >>> 
> >>> On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
> >>>> As DMA expects the buffers to be equal and larger then
> >>>> cache lines, This aligns buffers at cacheline.
> >>> 
> >>> i don't think this statement is true.  DMA doesn't care about alignment
> >>> (well, some do, but it's not related to cache lines but rather some
> >>> other restriction in the peripheral DMA itself).  what does matter is
> >>> that cache operations operate on cache lines and not individual bytes.
> >>> hence the core arm code was updated to warn when someone told it to
> >>> invalidate X bytes but the hardware literally could not, so it had to
> >>> invalidate X + Y bytes.
> >> 
> >> Agreed, Will update the commit message in next patchset.
> >> 
> >>>> --- a/drivers/usb/host/ehci-hcd.c
> >>>> +++ b/drivers/usb/host/ehci-hcd.c
> >>>> 
> >>>>    static void flush_invalidate(u32 addr, int size, int flush)
> >>>>    {
> >>>> 
> >>>> +	/*
> >>>> +	 * Size is the bytes actually moved during transaction,
> >>>> +	 * which may not equal to the cache line. This results
> >>>> +	 * stop address passed for invalidating cache may not be 
aligned.
> >>>> +	 * Therfore making size as multiple of cache line size.
> >>>> +	 */
> >>>> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> >>>> +
> >>>> 
> >>>>    	if (flush)
> >>>>    	
> >>>>    		flush_dcache_range(addr, addr + size);
> >>>>    	
> >>>>    	else
> >>> 
> >>> i think this is wrong and merely hides the errors from higher up
> >>> instead of fixing them.  the point of the warning was to tell you that
> >>> the code was invalidating *too many* bytes.  this code still
> >>> invalidates too many bytes without any justification as for why it's
> >>> OK to do here.  further, this code path only matters to the
> >>> invalidation logic, not the flush logic. -mike
> >> 
> >> The sole purpose of this patch to remove the warnings as start/stop
> >> address sent for invalidating
> >> is unaligned. Without this patch code works fine but with lots of
> >> spew...Which we don't want and discussed
> >> in earlier thread which Simon posted. Please have a look on following
> >> link.
> >> 
> >> As I understood, you agree that we need to align start/stop buffer
> >> address and also agree that
> >> to align stop address we need to align size as start address is already
> >> aligned.
> >> Now, "why its OK to do here"?
> >> We could have aligned the size in two places, cache_qtd() and cache_qh()
> >> but then we need to place alignment check
> >> at all the places where size is passed. So I thought better Aligning at
> >> flush_invalidate() and "ALIGN" macro does not
> >> increase the size if size is already aligned.
> > 
> > Actually I have to agree with Mike here. Can you please remove that
> > ALIGN() (and all others you might have added)? If it does spew, that's
> > ok and it tells us something is wrong in the USB core subsystem. Such
> > stuff can be fixed in subsequent patch.
> 
> Sorry, I could not understand "(and all others you might have added)".
> Do you want me remove any HACK in the patch which is using ALIGN or
> making stop address

No, only such hacks where it's certain they will either invalidate or flush some 
areas that weren't allocated for them, like this ALIGN you did here. This can 
cause trouble that will be very hard to find.

> aligned? The patch has only the above line to make stop address align
> and rest of the code makes
> start address align. Just to confirm, you are fine with the start
> address alignment code in the patch?

The start address alignment you do also aligns the end to the cacheline, doesn't 
it? (at least that's what I believe the macro is supposed to do).

> 
> > Best regards,
> > Marek Vasut
> 
> Thanx & Regards,
> Puneet
> 
> ---------------------------------------------------------------------------
> -------- This email message is for the sole use of the intended
> recipient(s) and may contain confidential information.  Any unauthorized
> review, use, disclosure or distribution is prohibited.  If you are not the
> intended recipient, please contact the sender by reply email and destroy
> all copies of the original message.
> ---------------------------------------------------------------------------
> --------

Best regards,
Marek Vasut
Puneet Saxena - March 9, 2012, 6:15 a.m.
Hi Marek,
On Thursday 08 March 2012 07:42 PM, Marek Vasut wrote:
> Dear puneets,
>
>> Hi Marek,
>>
>> On Thursday 08 March 2012 03:36 AM, Marek Vasut wrote:
>>> Dear puneets,
>>>
>>>> Hi Mike,
>>>>
>>>> On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
>>>>> * PGP Signed by an unknown key
>>>>>
>>>>> On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
>>>>>> As DMA expects the buffers to be equal and larger then
>>>>>> cache lines, This aligns buffers at cacheline.
>>>>> i don't think this statement is true.  DMA doesn't care about alignment
>>>>> (well, some do, but it's not related to cache lines but rather some
>>>>> other restriction in the peripheral DMA itself).  what does matter is
>>>>> that cache operations operate on cache lines and not individual bytes.
>>>>> hence the core arm code was updated to warn when someone told it to
>>>>> invalidate X bytes but the hardware literally could not, so it had to
>>>>> invalidate X + Y bytes.
>>>> Agreed, Will update the commit message in next patchset.
>>>>
>>>>>> --- a/drivers/usb/host/ehci-hcd.c
>>>>>> +++ b/drivers/usb/host/ehci-hcd.c
>>>>>>
>>>>>>     static void flush_invalidate(u32 addr, int size, int flush)
>>>>>>     {
>>>>>>
>>>>>> +	/*
>>>>>> +	 * Size is the bytes actually moved during transaction,
>>>>>> +	 * which may not equal to the cache line. This results
>>>>>> +	 * stop address passed for invalidating cache may not be
> aligned.
>>>>>> +	 * Therfore making size as multiple of cache line size.
>>>>>> +	 */
>>>>>> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
>>>>>> +
>>>>>>
>>>>>>     	if (flush)
>>>>>>     	
>>>>>>     		flush_dcache_range(addr, addr + size);
>>>>>>     	
>>>>>>     	else
>>>>> i think this is wrong and merely hides the errors from higher up
>>>>> instead of fixing them.  the point of the warning was to tell you that
>>>>> the code was invalidating *too many* bytes.  this code still
>>>>> invalidates too many bytes without any justification as for why it's
>>>>> OK to do here.  further, this code path only matters to the
>>>>> invalidation logic, not the flush logic. -mike
>>>> The sole purpose of this patch to remove the warnings as start/stop
>>>> address sent for invalidating
>>>> is unaligned. Without this patch code works fine but with lots of
>>>> spew...Which we don't want and discussed
>>>> in earlier thread which Simon posted. Please have a look on following
>>>> link.
>>>>
>>>> As I understood, you agree that we need to align start/stop buffer
>>>> address and also agree that
>>>> to align stop address we need to align size as start address is already
>>>> aligned.
>>>> Now, "why its OK to do here"?
>>>> We could have aligned the size in two places, cache_qtd() and cache_qh()
>>>> but then we need to place alignment check
>>>> at all the places where size is passed. So I thought better Aligning at
>>>> flush_invalidate() and "ALIGN" macro does not
>>>> increase the size if size is already aligned.
>>> Actually I have to agree with Mike here. Can you please remove that
>>> ALIGN() (and all others you might have added)? If it does spew, that's
>>> ok and it tells us something is wrong in the USB core subsystem. Such
>>> stuff can be fixed in subsequent patch.
>> Sorry, I could not understand "(and all others you might have added)".
>> Do you want me remove any HACK in the patch which is using ALIGN or
>> making stop address
> No, only such hacks where it's certain they will either invalidate or flush some
> areas that weren't allocated for them, like this ALIGN you did here. This can
> cause trouble that will be very hard to find.
>
>> aligned? The patch has only the above line to make stop address align
>> and rest of the code makes
>> start address align. Just to confirm, you are fine with the start
>> address alignment code in the patch?
> The start address alignment you do also aligns the end to the cacheline, doesn't
> it? (at least that's what I believe the macro is supposed to do).
>
Yes, start address alignment also aligns start address at the cache 
line. So, removing
stop address alignment code as depicted above, should make this patch 
acceptable?

Best regards,
>>> Marek Vasut
>> Thanx&  Regards,
>> Puneet
>>
>> ---------------------------------------------------------------------------
>> -------- This email message is for the sole use of the intended
>> recipient(s) and may contain confidential information.  Any unauthorized
>> review, use, disclosure or distribution is prohibited.  If you are not the
>> intended recipient, please contact the sender by reply email and destroy
>> all copies of the original message.
>> ---------------------------------------------------------------------------
>> --------
> Best regards,
> Marek Vasut
Thanx & Regards,
Puneet
Marek Vasut - March 9, 2012, 12:03 p.m.
Dear puneets,

> Hi Marek,
> 
> On Thursday 08 March 2012 07:42 PM, Marek Vasut wrote:
> > Dear puneets,
> > 
> >> Hi Marek,
> >> 
> >> On Thursday 08 March 2012 03:36 AM, Marek Vasut wrote:
> >>> Dear puneets,
> >>> 
> >>>> Hi Mike,
> >>>> 
> >>>> On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
> >>>>> * PGP Signed by an unknown key
> >>>>> 
> >>>>> On Monday 05 March 2012 09:46:21 Puneet Saxena wrote:
> >>>>>> As DMA expects the buffers to be equal and larger then
> >>>>>> cache lines, This aligns buffers at cacheline.
> >>>>> 
> >>>>> i don't think this statement is true.  DMA doesn't care about
> >>>>> alignment (well, some do, but it's not related to cache lines but
> >>>>> rather some other restriction in the peripheral DMA itself).  what
> >>>>> does matter is that cache operations operate on cache lines and not
> >>>>> individual bytes. hence the core arm code was updated to warn when
> >>>>> someone told it to invalidate X bytes but the hardware literally
> >>>>> could not, so it had to invalidate X + Y bytes.
> >>>> 
> >>>> Agreed, Will update the commit message in next patchset.
> >>>> 
> >>>>>> --- a/drivers/usb/host/ehci-hcd.c
> >>>>>> +++ b/drivers/usb/host/ehci-hcd.c
> >>>>>> 
> >>>>>>     static void flush_invalidate(u32 addr, int size, int flush)
> >>>>>>     {
> >>>>>> 
> >>>>>> +	/*
> >>>>>> +	 * Size is the bytes actually moved during transaction,
> >>>>>> +	 * which may not equal to the cache line. This results
> >>>>>> +	 * stop address passed for invalidating cache may not be
> > 
> > aligned.
> > 
> >>>>>> +	 * Therfore making size as multiple of cache line size.
> >>>>>> +	 */
> >>>>>> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> >>>>>> +
> >>>>>> 
> >>>>>>     	if (flush)
> >>>>>>     	
> >>>>>>     		flush_dcache_range(addr, addr + size);
> >>>>>>     	
> >>>>>>     	else
> >>>>> 
> >>>>> i think this is wrong and merely hides the errors from higher up
> >>>>> instead of fixing them.  the point of the warning was to tell you
> >>>>> that the code was invalidating *too many* bytes.  this code still
> >>>>> invalidates too many bytes without any justification as for why it's
> >>>>> OK to do here.  further, this code path only matters to the
> >>>>> invalidation logic, not the flush logic. -mike
> >>>> 
> >>>> The sole purpose of this patch to remove the warnings as start/stop
> >>>> address sent for invalidating
> >>>> is unaligned. Without this patch code works fine but with lots of
> >>>> spew...Which we don't want and discussed
> >>>> in earlier thread which Simon posted. Please have a look on following
> >>>> link.
> >>>> 
> >>>> As I understood, you agree that we need to align start/stop buffer
> >>>> address and also agree that
> >>>> to align stop address we need to align size as start address is
> >>>> already aligned.
> >>>> Now, "why its OK to do here"?
> >>>> We could have aligned the size in two places, cache_qtd() and
> >>>> cache_qh() but then we need to place alignment check
> >>>> at all the places where size is passed. So I thought better Aligning
> >>>> at flush_invalidate() and "ALIGN" macro does not
> >>>> increase the size if size is already aligned.
> >>> 
> >>> Actually I have to agree with Mike here. Can you please remove that
> >>> ALIGN() (and all others you might have added)? If it does spew, that's
> >>> ok and it tells us something is wrong in the USB core subsystem. Such
> >>> stuff can be fixed in subsequent patch.
> >> 
> >> Sorry, I could not understand "(and all others you might have added)".
> >> Do you want me remove any HACK in the patch which is using ALIGN or
> >> making stop address
> > 
> > No, only such hacks where it's certain they will either invalidate or
> > flush some areas that weren't allocated for them, like this ALIGN you
> > did here. This can cause trouble that will be very hard to find.
> > 
> >> aligned? The patch has only the above line to make stop address align
> >> and rest of the code makes
> >> start address align. Just to confirm, you are fine with the start
> >> address alignment code in the patch?
> > 
> > The start address alignment you do also aligns the end to the cacheline,
> > doesn't it? (at least that's what I believe the macro is supposed to
> > do).
> 
> Yes, start address alignment also aligns start address at the cache
> line. So, removing
> stop address alignment code as depicted above, should make this patch
> acceptable?
> 
Puneet, it's not a problem of acceptability, it's a problem of breaking uboot 
;-) Acceptability goes only after it we are sure doesn't break anything. The 
cache stuff is really fragile so we need to be very careful, please understand 
and let's figure this out together, I feel we're really close.

And yes, I believe removing such dangerous ALIGN() calls will be the last fix 
necessary, unless something other pops up.

Best regards,
Marek Vasut
Mike Frysinger - March 11, 2012, 2:35 a.m.
On Wednesday 07 March 2012 02:12:22 puneets wrote:
> On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
> >> --- a/drivers/usb/host/ehci-hcd.c
> >> +++ b/drivers/usb/host/ehci-hcd.c
> >> 
> >>   static void flush_invalidate(u32 addr, int size, int flush)
> >>   {
> >> +	/*
> >> +	 * Size is the bytes actually moved during transaction,
> >> +	 * which may not equal to the cache line. This results
> >> +	 * stop address passed for invalidating cache may not be aligned.
> >> +	 * Therfore making size as multiple of cache line size.
> >> +	 */
> >> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> >> +
> >>   	if (flush)
> >>   		flush_dcache_range(addr, addr + size);
> >>   	else
> > 
> > i think this is wrong and merely hides the errors from higher up instead
> > of fixing them.  the point of the warning was to tell you that the code
> > was invalidating *too many* bytes.  this code still invalidates too many
> > bytes without any justification as for why it's OK to do here.  further,
> > this code path only matters to the invalidation logic, not the flush
> > logic.
> 
> The sole purpose of this patch to remove the warnings as start/stop
> address sent for invalidating
> is unaligned. Without this patch code works fine but with lots of
> spew...Which we don't want and discussed
> in earlier thread which Simon posted. Please have a look on following link.
> 
> As I understood, you agree that we need to align start/stop buffer
> address and also agree that
> to align stop address we need to align size as start address is already
> aligned.
> Now, "why its OK to do here"?
> We could have aligned the size in two places, cache_qtd() and cache_qh()
> but then we need to place alignment check
> at all the places where size is passed. So I thought better Aligning at
> flush_invalidate() and "ALIGN" macro does not
> increase the size if size is already aligned.

i think you missed my point.  consider a func which has local vars like so:
	int i;
	char buf[1024];
	int k;

and let's say you're running on a core that has a cache line size of 32 bytes 
(which is fairly common).  if you execute a data cache invalid insn, the 
smallest region it can invalidate is 32 bytes.  doesn't matter if you only 
want to invalidate a buffer of 8 bytes ... everything else around it gets 
invalidated as well.

now, in the aforementioned stack, if it starts off aligned nicely at a 32 byte 
boundary, the integer "i" will share a cache line with the first 28 bytes of 
buffer "buf", and the integer "k" will share a cache line with the last 4 bytes 
of the buffer "buf".  (let's ignore what might or might not happen based on gcc 
since this example can trivially be expanded to structure layout.)

the trouble is when you attempt to invalidate the contents of "buf".  if the 
cache is in writeback mode (which means you could have changes in the cache 
which are not reflected in external RAM), then invalidating buf will also 
discard values that might be in "i" or "k".  this is why Simon put a warning 
in the core data cache invalidate function.  if the cache were in writethrough 
mode (which also tends to be the default), then most likely things would work 
fine and no one would notice.  or if the data cache was merely flushed, things 
would work, but at a decrease in performance: you'd be flushing cache lines to 
external memory that you know will be overwritten by a following transaction 
-- most likely DMA from a peripheral such as the USB controller, and you'd be 
flushing objects that the DMA wouldn't be touching, so they'd have to get 
refetched from external RAM ("i" and "k" in my example above).

simply rounding the address down to the start of the cache line and the length 
up to a multiple of a cache line to keep the core code from issuing the 
warning doesn't fix the problem i describe above.  you actually get the worst 
of both worlds -- silent runtime misbehavior when extra memory gets 
invalidated.

perhaps the warning in the core code could be dropped and all your changes in 
fringe code obsoleted (such as these USB patches): when it detects that an 
address is starting on an unaligned boundary, *flush* that line first, and then 
let it be invalidated.  accordingly, when the end length is on an unaligned 
boundary, do the same flush-then-invalidate step.  this should also make things 
work without a (significant) loss in performance.  if anything, i suspect the 
overhead of doing runtime buffer size calculations and manually aligning 
pointers (which is what ALLOC_CACHE_ALIGN_BUFFER does) is a wash compared to 
partially flushing cache lines in the core ...

Simon: what do you think of this last idea ?
-mike
Marek Vasut - March 14, 2012, 2:05 a.m.
Dear Mike Frysinger,

> On Wednesday 07 March 2012 02:12:22 puneets wrote:
> > On Tuesday 06 March 2012 08:37 AM, Mike Frysinger wrote:
> > >> --- a/drivers/usb/host/ehci-hcd.c
> > >> +++ b/drivers/usb/host/ehci-hcd.c
> > >> 
> > >>   static void flush_invalidate(u32 addr, int size, int flush)
> > >>   {
> > >> 
> > >> +	/*
> > >> +	 * Size is the bytes actually moved during transaction,
> > >> +	 * which may not equal to the cache line. This results
> > >> +	 * stop address passed for invalidating cache may not be 
aligned.
> > >> +	 * Therfore making size as multiple of cache line size.
> > >> +	 */
> > >> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> > >> +
> > >> 
> > >>   	if (flush)
> > >>   	
> > >>   		flush_dcache_range(addr, addr + size);
> > >>   	
> > >>   	else
> > > 
> > > i think this is wrong and merely hides the errors from higher up
> > > instead of fixing them.  the point of the warning was to tell you that
> > > the code was invalidating *too many* bytes.  this code still
> > > invalidates too many bytes without any justification as for why it's
> > > OK to do here.  further, this code path only matters to the
> > > invalidation logic, not the flush logic.
> > 
> > The sole purpose of this patch to remove the warnings as start/stop
> > address sent for invalidating
> > is unaligned. Without this patch code works fine but with lots of
> > spew...Which we don't want and discussed
> > in earlier thread which Simon posted. Please have a look on following
> > link.
> > 
> > As I understood, you agree that we need to align start/stop buffer
> > address and also agree that
> > to align stop address we need to align size as start address is already
> > aligned.
> > Now, "why its OK to do here"?
> > We could have aligned the size in two places, cache_qtd() and cache_qh()
> > but then we need to place alignment check
> > at all the places where size is passed. So I thought better Aligning at
> > flush_invalidate() and "ALIGN" macro does not
> > increase the size if size is already aligned.
> 
> i think you missed my point.  consider a func which has local vars like so:
> 	int i;
> 	char buf[1024];
> 	int k;
> 
> and let's say you're running on a core that has a cache line size of 32
> bytes (which is fairly common).  if you execute a data cache invalid insn,
> the smallest region it can invalidate is 32 bytes.  doesn't matter if you
> only want to invalidate a buffer of 8 bytes ... everything else around it
> gets invalidated as well.
> 
> now, in the aforementioned stack, if it starts off aligned nicely at a 32
> byte boundary, the integer "i" will share a cache line with the first 28
> bytes of buffer "buf", and the integer "k" will share a cache line with
> the last 4 bytes of the buffer "buf".  (let's ignore what might or might
> not happen based on gcc since this example can trivially be expanded to
> structure layout.)
> 
> the trouble is when you attempt to invalidate the contents of "buf".  if
> the cache is in writeback mode (which means you could have changes in the
> cache which are not reflected in external RAM), then invalidating buf will
> also discard values that might be in "i" or "k".  this is why Simon put a
> warning in the core data cache invalidate function.  if the cache were in
> writethrough mode (which also tends to be the default), then most likely
> things would work fine and no one would notice.  or if the data cache was
> merely flushed, things would work, but at a decrease in performance: you'd
> be flushing cache lines to external memory that you know will be
> overwritten by a following transaction -- most likely DMA from a
> peripheral such as the USB controller, and you'd be flushing objects that
> the DMA wouldn't be touching, so they'd have to get refetched from
> external RAM ("i" and "k" in my example above).
> 
> simply rounding the address down to the start of the cache line and the
> length up to a multiple of a cache line to keep the core code from issuing
> the warning doesn't fix the problem i describe above.  you actually get
> the worst of both worlds -- silent runtime misbehavior when extra memory
> gets invalidated.
> 
> perhaps the warning in the core code could be dropped and all your changes
> in fringe code obsoleted (such as these USB patches): when it detects that
> an address is starting on an unaligned boundary, *flush* that line first,
> and then let it be invalidated.  accordingly, when the end length is on an
> unaligned boundary, do the same flush-then-invalidate step.  this should
> also make things work without a (significant) loss in performance.  if
> anything, i suspect the overhead of doing runtime buffer size calculations
> and manually aligning pointers (which is what ALLOC_CACHE_ALIGN_BUFFER
> does) is a wash compared to partially flushing cache lines in the core ...
> 
> Simon: what do you think of this last idea ?
> -mike

Did we get anywhere with this?

Best regards,
Marek Vasut

Patch

diff --git a/common/cmd_usb.c b/common/cmd_usb.c
index 320667f..bca9d94 100644
--- a/common/cmd_usb.c
+++ b/common/cmd_usb.c
@@ -150,7 +150,8 @@  void usb_display_class_sub(unsigned char dclass, unsigned char subclass,
 
 void usb_display_string(struct usb_device *dev, int index)
 {
-	char buffer[256];
+	ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);
+
 	if (index != 0) {
 		if (usb_string(dev, index, &buffer[0], 256) > 0)
 			printf("String: \"%s\"", buffer);
diff --git a/common/usb.c b/common/usb.c
index 6e21ae2..e3db7bc 100644
--- a/common/usb.c
+++ b/common/usb.c
@@ -73,7 +73,6 @@  static struct usb_device usb_dev[USB_MAX_DEVICE];
 static int dev_index;
 static int running;
 static int asynch_allowed;
-static struct devrequest setup_packet;
 
 char usb_started; /* flag for the started/stopped USB status */
 
@@ -185,23 +184,24 @@  int usb_control_msg(struct usb_device *dev, unsigned int pipe,
 			unsigned short value, unsigned short index,
 			void *data, unsigned short size, int timeout)
 {
+	ALLOC_CACHE_ALIGN_BUFFER(struct devrequest, setup_packet, 1);
 	if ((timeout == 0) && (!asynch_allowed)) {
 		/* request for a asynch control pipe is not allowed */
 		return -1;
 	}
 
 	/* set setup command */
-	setup_packet.requesttype = requesttype;
-	setup_packet.request = request;
-	setup_packet.value = cpu_to_le16(value);
-	setup_packet.index = cpu_to_le16(index);
-	setup_packet.length = cpu_to_le16(size);
+	setup_packet->requesttype = requesttype;
+	setup_packet->request = request;
+	setup_packet->value = cpu_to_le16(value);
+	setup_packet->index = cpu_to_le16(index);
+	setup_packet->length = cpu_to_le16(size);
 	USB_PRINTF("usb_control_msg: request: 0x%X, requesttype: 0x%X, " \
 		   "value 0x%X index 0x%X length 0x%X\n",
 		   request, requesttype, value, index, size);
 	dev->status = USB_ST_NOT_PROC; /*not yet processed */
 
-	submit_control_msg(dev, pipe, data, size, &setup_packet);
+	submit_control_msg(dev, pipe, data, size, setup_packet);
 	if (timeout == 0)
 		return (int)size;
 
@@ -698,7 +698,7 @@  static int usb_string_sub(struct usb_device *dev, unsigned int langid,
  */
 int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
 {
-	unsigned char mybuf[USB_BUFSIZ];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, mybuf, USB_BUFSIZ);
 	unsigned char *tbuf;
 	int err;
 	unsigned int u, idx;
@@ -798,7 +798,7 @@  int usb_new_device(struct usb_device *dev)
 {
 	int addr, err;
 	int tmp;
-	unsigned char tmpbuf[USB_BUFSIZ];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, tmpbuf, USB_BUFSIZ);
 
 	/* We still haven't set the Address yet */
 	addr = dev->devnum;
@@ -925,8 +925,8 @@  int usb_new_device(struct usb_device *dev)
 	le16_to_cpus(&dev->descriptor.idProduct);
 	le16_to_cpus(&dev->descriptor.bcdDevice);
 	/* only support for one config for now */
-	usb_get_configuration_no(dev, &tmpbuf[0], 0);
-	usb_parse_config(dev, &tmpbuf[0], 0);
+	usb_get_configuration_no(dev, tmpbuf, 0);
+	usb_parse_config(dev, tmpbuf, 0);
 	usb_set_maxpacket(dev);
 	/* we set the default configuration here */
 	if (usb_set_configuration(dev, dev->config.desc.bConfigurationValue)) {
@@ -1080,7 +1080,7 @@  static int hub_port_reset(struct usb_device *dev, int port,
 			unsigned short *portstat)
 {
 	int tries;
-	struct usb_port_status portsts;
+	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
 	unsigned short portstatus, portchange;
 
 	USB_HUB_PRINTF("hub_port_reset: resetting port %d...\n", port);
@@ -1089,13 +1089,13 @@  static int hub_port_reset(struct usb_device *dev, int port,
 		usb_set_port_feature(dev, port + 1, USB_PORT_FEAT_RESET);
 		wait_ms(200);
 
-		if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
+		if (usb_get_port_status(dev, port + 1, portsts) < 0) {
 			USB_HUB_PRINTF("get_port_status failed status %lX\n",
 					dev->status);
 			return -1;
 		}
-		portstatus = le16_to_cpu(portsts.wPortStatus);
-		portchange = le16_to_cpu(portsts.wPortChange);
+		portstatus = le16_to_cpu(portsts->wPortStatus);
+		portchange = le16_to_cpu(portsts->wPortChange);
 
 		USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
 				portstatus, portchange,
@@ -1133,19 +1133,19 @@  static int hub_port_reset(struct usb_device *dev, int port,
 void usb_hub_port_connect_change(struct usb_device *dev, int port)
 {
 	struct usb_device *usb;
-	struct usb_port_status portsts;
+	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
 	unsigned short portstatus;
 
 	/* Check status */
-	if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
+	if (usb_get_port_status(dev, port + 1, portsts) < 0) {
 		USB_HUB_PRINTF("get_port_status failed\n");
 		return;
 	}
 
-	portstatus = le16_to_cpu(portsts.wPortStatus);
+	portstatus = le16_to_cpu(portsts->wPortStatus);
 	USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
 			portstatus,
-			le16_to_cpu(portsts.wPortChange),
+			le16_to_cpu(portsts->wPortChange),
 			portspeed(portstatus));
 
 	/* Clear the connection change status */
@@ -1194,7 +1194,8 @@  void usb_hub_port_connect_change(struct usb_device *dev, int port)
 int usb_hub_configure(struct usb_device *dev)
 {
 	int i;
-	unsigned char buffer[USB_BUFSIZ], *bitmap;
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, USB_BUFSIZ);
+	unsigned char *bitmap;
 	struct usb_hub_descriptor *descriptor;
 	struct usb_hub_device *hub;
 #ifdef USB_HUB_DEBUG
@@ -1316,16 +1317,16 @@  int usb_hub_configure(struct usb_device *dev)
 	usb_hub_power_on(hub);
 
 	for (i = 0; i < dev->maxchild; i++) {
-		struct usb_port_status portsts;
+		ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
 		unsigned short portstatus, portchange;
 
-		if (usb_get_port_status(dev, i + 1, &portsts) < 0) {
+		if (usb_get_port_status(dev, i + 1, portsts) < 0) {
 			USB_HUB_PRINTF("get_port_status failed\n");
 			continue;
 		}
 
-		portstatus = le16_to_cpu(portsts.wPortStatus);
-		portchange = le16_to_cpu(portsts.wPortChange);
+		portstatus = le16_to_cpu(portsts->wPortStatus);
+		portchange = le16_to_cpu(portsts->wPortChange);
 		USB_HUB_PRINTF("Port %d Status %X Change %X\n",
 				i + 1, portstatus, portchange);
 
diff --git a/common/usb_storage.c b/common/usb_storage.c
index de84c8d..88ca390 100644
--- a/common/usb_storage.c
+++ b/common/usb_storage.c
@@ -79,8 +79,7 @@  static const unsigned char us_direction[256/8] = {
 };
 #define US_DIRECTION(x) ((us_direction[x>>3] >> (x & 7)) & 1)
 
-static unsigned char usb_stor_buf[512];
-static ccb usb_ccb;
+static ccb usb_ccb __attribute__((aligned(ARCH_DMA_MINALIGN)));
 
 /*
  * CBI style
@@ -210,17 +209,17 @@  int usb_stor_info(void)
 static unsigned int usb_get_max_lun(struct us_data *us)
 {
 	int len;
-	unsigned char result;
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, result, 1);
 	len = usb_control_msg(us->pusb_dev,
 			      usb_rcvctrlpipe(us->pusb_dev, 0),
 			      US_BBB_GET_MAX_LUN,
 			      USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
 			      0, us->ifnum,
-			      &result, sizeof(result),
+			      result, sizeof(char),
 			      USB_CNTL_TIMEOUT * 5);
 	USB_STOR_PRINTF("Get Max LUN -> len = %i, result = %i\n",
-			len, (int) result);
-	return (len > 0) ? result : 0;
+			len, (int) *result);
+	return (len > 0) ? *result : 0;
 }
 
 /*******************************************************************************
@@ -233,9 +232,6 @@  int usb_stor_scan(int mode)
 	unsigned char i;
 	struct usb_device *dev;
 
-	/* GJ */
-	memset(usb_stor_buf, 0, sizeof(usb_stor_buf));
-
 	if (mode == 1)
 		printf("       scanning bus for storage devices... ");
 
@@ -499,7 +495,7 @@  int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
 	int actlen;
 	int dir_in;
 	unsigned int pipe;
-	umass_bbb_cbw_t cbw;
+	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_cbw_t, cbw, 1);
 
 	dir_in = US_DIRECTION(srb->cmd[0]);
 
@@ -522,16 +518,16 @@  int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
 	/* always OUT to the ep */
 	pipe = usb_sndbulkpipe(us->pusb_dev, us->ep_out);
 
-	cbw.dCBWSignature = cpu_to_le32(CBWSIGNATURE);
-	cbw.dCBWTag = cpu_to_le32(CBWTag++);
-	cbw.dCBWDataTransferLength = cpu_to_le32(srb->datalen);
-	cbw.bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
-	cbw.bCBWLUN = srb->lun;
-	cbw.bCDBLength = srb->cmdlen;
+	cbw->dCBWSignature = cpu_to_le32(CBWSIGNATURE);
+	cbw->dCBWTag = cpu_to_le32(CBWTag++);
+	cbw->dCBWDataTransferLength = cpu_to_le32(srb->datalen);
+	cbw->bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
+	cbw->bCBWLUN = srb->lun;
+	cbw->bCDBLength = srb->cmdlen;
 	/* copy the command data into the CBW command data buffer */
 	/* DST SRC LEN!!! */
-	memcpy(cbw.CBWCDB, srb->cmd, srb->cmdlen);
-	result = usb_bulk_msg(us->pusb_dev, pipe, &cbw, UMASS_BBB_CBW_SIZE,
+	memcpy(cbw->CBWCDB, srb->cmd, srb->cmdlen);
+	result = usb_bulk_msg(us->pusb_dev, pipe, cbw, UMASS_BBB_CBW_SIZE,
 			      &actlen, USB_CNTL_TIMEOUT * 5);
 	if (result < 0)
 		USB_STOR_PRINTF("usb_stor_BBB_comdat:usb_bulk_msg error\n");
@@ -675,7 +671,7 @@  int usb_stor_BBB_transport(ccb *srb, struct us_data *us)
 	int dir_in;
 	int actlen, data_actlen;
 	unsigned int pipe, pipein, pipeout;
-	umass_bbb_csw_t csw;
+	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_csw_t, csw, 1);
 #ifdef BBB_XPORT_TRACE
 	unsigned char *ptr;
 	int index;
@@ -733,7 +729,7 @@  st:
 	retry = 0;
 again:
 	USB_STOR_PRINTF("STATUS phase\n");
-	result = usb_bulk_msg(us->pusb_dev, pipein, &csw, UMASS_BBB_CSW_SIZE,
+	result = usb_bulk_msg(us->pusb_dev, pipein, csw, UMASS_BBB_CSW_SIZE,
 				&actlen, USB_CNTL_TIMEOUT*5);
 
 	/* special handling of STALL in STATUS phase */
@@ -753,28 +749,28 @@  again:
 		return USB_STOR_TRANSPORT_FAILED;
 	}
 #ifdef BBB_XPORT_TRACE
-	ptr = (unsigned char *)&csw;
+	ptr = (unsigned char *)csw;
 	for (index = 0; index < UMASS_BBB_CSW_SIZE; index++)
 		printf("ptr[%d] %#x ", index, ptr[index]);
 	printf("\n");
 #endif
 	/* misuse pipe to get the residue */
-	pipe = le32_to_cpu(csw.dCSWDataResidue);
+	pipe = le32_to_cpu(csw->dCSWDataResidue);
 	if (pipe == 0 && srb->datalen != 0 && srb->datalen - data_actlen != 0)
 		pipe = srb->datalen - data_actlen;
-	if (CSWSIGNATURE != le32_to_cpu(csw.dCSWSignature)) {
+	if (CSWSIGNATURE != le32_to_cpu(csw->dCSWSignature)) {
 		USB_STOR_PRINTF("!CSWSIGNATURE\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if ((CBWTag - 1) != le32_to_cpu(csw.dCSWTag)) {
+	} else if ((CBWTag - 1) != le32_to_cpu(csw->dCSWTag)) {
 		USB_STOR_PRINTF("!Tag\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if (csw.bCSWStatus > CSWSTATUS_PHASE) {
+	} else if (csw->bCSWStatus > CSWSTATUS_PHASE) {
 		USB_STOR_PRINTF(">PHASE\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if (csw.bCSWStatus == CSWSTATUS_PHASE) {
+	} else if (csw->bCSWStatus == CSWSTATUS_PHASE) {
 		USB_STOR_PRINTF("=PHASE\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
@@ -782,7 +778,7 @@  again:
 		USB_STOR_PRINTF("transferred %dB instead of %ldB\n",
 			data_actlen, srb->datalen);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if (csw.bCSWStatus == CSWSTATUS_FAILED) {
+	} else if (csw->bCSWStatus == CSWSTATUS_FAILED) {
 		USB_STOR_PRINTF("FAILED\n");
 		return USB_STOR_TRANSPORT_FAILED;
 	}
@@ -1343,7 +1339,8 @@  int usb_stor_get_info(struct usb_device *dev, struct us_data *ss,
 		      block_dev_desc_t *dev_desc)
 {
 	unsigned char perq, modi;
-	unsigned long cap[2];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned long, cap, 2);
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, usb_stor_buf, 36);
 	unsigned long *capacity, *blksz;
 	ccb *pccb = &usb_ccb;
 
@@ -1367,9 +1364,9 @@  int usb_stor_get_info(struct usb_device *dev, struct us_data *ss,
 		/* drive is removable */
 		dev_desc->removable = 1;
 	}
-	memcpy(&dev_desc->vendor[0], &usb_stor_buf[8], 8);
-	memcpy(&dev_desc->product[0], &usb_stor_buf[16], 16);
-	memcpy(&dev_desc->revision[0], &usb_stor_buf[32], 4);
+	memcpy(&dev_desc->vendor[0], (const void *) &usb_stor_buf[8], 8);
+	memcpy(&dev_desc->product[0], (const void *) &usb_stor_buf[16], 16);
+	memcpy(&dev_desc->revision[0], (const void *) &usb_stor_buf[32], 4);
 	dev_desc->vendor[8] = 0;
 	dev_desc->product[16] = 0;
 	dev_desc->revision[4] = 0;
diff --git a/disk/part_dos.c b/disk/part_dos.c
index b5bcb37..70211ee 100644
--- a/disk/part_dos.c
+++ b/disk/part_dos.c
@@ -87,7 +87,7 @@  static int test_block_type(unsigned char *buffer)
 
 int test_part_dos (block_dev_desc_t *dev_desc)
 {
-	unsigned char buffer[dev_desc->blksz];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, dev_desc->blksz);
 
 	if ((dev_desc->block_read(dev_desc->dev, 0, 1, (ulong *) buffer) != 1) ||
 	    (buffer[DOS_PART_MAGIC_OFFSET + 0] != 0x55) ||
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d893b2a..eb5220b 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -120,6 +120,14 @@  static struct descriptor {
  */
 static void flush_invalidate(u32 addr, int size, int flush)
 {
+	/*
+	 * Size is the bytes actually moved during transaction,
+	 * which may not equal to the cache line. This results
+	 * stop address passed for invalidating cache may not be aligned.
+	 * Therfore making size as multiple of cache line size.
+	 */
+	size = ALIGN(size, ARCH_DMA_MINALIGN);
+
 	if (flush)
 		flush_dcache_range(addr, addr + size);
 	else
diff --git a/include/scsi.h b/include/scsi.h
index c52759c..89ae45f 100644
--- a/include/scsi.h
+++ b/include/scsi.h
@@ -26,7 +26,9 @@ 
 
 typedef struct SCSI_cmd_block{
 	unsigned char		cmd[16];					/* command				   */
-	unsigned char		sense_buf[64];		/* for request sense */
+	/* for request sense */
+	unsigned char		sense_buf[64]
+		__attribute__((aligned(ARCH_DMA_MINALIGN)));
 	unsigned char		status;						/* SCSI Status			 */
 	unsigned char		target;						/* Target ID				 */
 	unsigned char		lun;							/* Target LUN        */
diff --git a/include/usb.h b/include/usb.h
index 06170cd..5f4f110 100644
--- a/include/usb.h
+++ b/include/usb.h
@@ -109,7 +109,9 @@  struct usb_device {
 	int epmaxpacketout[16];		/* OUTput endpoint specific maximums */
 
 	int configno;			/* selected config number */
-	struct usb_device_descriptor descriptor; /* Device Descriptor */
+	/* Device Descriptor */
+	struct usb_device_descriptor descriptor
+		__attribute__((aligned(ARCH_DMA_MINALIGN)));
 	struct usb_config config; /* config descriptor */
 
 	int have_langid;		/* whether string_langid is valid yet */