Patchwork [U-Boot,v2,1/2] usb: align buffers at cacheline

login
register
mail settings
Submitter Puneet Saxena
Date Feb. 27, 2012, 3:36 p.m.
Message ID <1330356992-21297-1-git-send-email-puneets@nvidia.com>
Download mbox | patch
Permalink /patch/143212/
State Superseded
Delegated to: Marek Vasut
Headers show

Comments

Puneet Saxena - Feb. 27, 2012, 3:36 p.m.
As DMA expects the buffers to be equal and larger then
cache lines, This aligns buffers at cacheline.

Signed-off-by: Puneet Saxena <puneets@nvidia.com>
Signed-off-by: Jim Lin <jilin@nvidia.com>
---

Changes for V2:
    - Use "ARCH_DMA_MINALIGN" directly  
    - Use "ALIGN" to align size as cacheline 
    - Removed headers from usb.h
    - Send 8 bytes of device descriptor size to read 
      Max packet size 
scsi.h header is needed to avoid extra memcpy from local buffer 
to global buffer.

 common/cmd_usb.c            |    3 +-
 common/usb.c                |   61 ++++++++++++++++++++++++------------------
 common/usb_storage.c        |   59 +++++++++++++++++++----------------------
 disk/part_dos.c             |    2 +-
 drivers/usb/host/ehci-hcd.c |    8 +++++
 include/scsi.h              |    4 ++-
 6 files changed, 77 insertions(+), 60 deletions(-)
Marek Vasut - Feb. 27, 2012, 4:49 p.m.
> As DMA expects the buffers to be equal and larger then
> cache lines, This aligns buffers at cacheline.
> 
> Signed-off-by: Puneet Saxena <puneets@nvidia.com>
> Signed-off-by: Jim Lin <jilin@nvidia.com>
> ---
> 

First of all, thanks for this patch, it really helps. But, there are a few 
things that concern me.

> Changes for V2:
>     - Use "ARCH_DMA_MINALIGN" directly
>     - Use "ALIGN" to align size as cacheline
>     - Removed headers from usb.h
>     - Send 8 bytes of device descriptor size to read
>       Max packet size
> scsi.h header is needed to avoid extra memcpy from local buffer
> to global buffer.
> 
>  common/cmd_usb.c            |    3 +-
>  common/usb.c                |   61
> ++++++++++++++++++++++++------------------ common/usb_storage.c        |  
> 59 +++++++++++++++++++---------------------- disk/part_dos.c             |
>    2 +-
>  drivers/usb/host/ehci-hcd.c |    8 +++++
>  include/scsi.h              |    4 ++-
>  6 files changed, 77 insertions(+), 60 deletions(-)
> 
> diff --git a/common/cmd_usb.c b/common/cmd_usb.c
> index 320667f..bca9d94 100644
> --- a/common/cmd_usb.c
> +++ b/common/cmd_usb.c
> @@ -150,7 +150,8 @@ void usb_display_class_sub(unsigned char dclass,
> unsigned char subclass,
> 
>  void usb_display_string(struct usb_device *dev, int index)
>  {
> -	char buffer[256];
> +	ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);

Why not memalign(), do you want to allocate on stack so badly ?

> +
>  	if (index != 0) {
>  		if (usb_string(dev, index, &buffer[0], 256) > 0)
>  			printf("String: \"%s\"", buffer);
> diff --git a/common/usb.c b/common/usb.c
> index 63a11c8..2279459 100644
> --- a/common/usb.c
> +++ b/common/usb.c
> @@ -73,7 +73,6 @@ static struct usb_device usb_dev[USB_MAX_DEVICE];
>  static int dev_index;
>  static int running;
>  static int asynch_allowed;
> -static struct devrequest setup_packet;
> 
>  char usb_started; /* flag for the started/stopped USB status */
> 
> @@ -185,23 +184,25 @@ int usb_control_msg(struct usb_device *dev, unsigned
> int pipe, unsigned short value, unsigned short index,
>  			void *data, unsigned short size, int timeout)
>  {
> +	ALLOC_CACHE_ALIGN_BUFFER(struct devrequest, setup_packet,
> +		sizeof(struct devrequest));

DTTO, btw does the setup packet need to be aligned too ?

>  	if ((timeout == 0) && (!asynch_allowed)) {
>  		/* request for a asynch control pipe is not allowed */
>  		return -1;
>  	}
> 
>  	/* set setup command */
> -	setup_packet.requesttype = requesttype;
> -	setup_packet.request = request;
> -	setup_packet.value = cpu_to_le16(value);
> -	setup_packet.index = cpu_to_le16(index);
> -	setup_packet.length = cpu_to_le16(size);
> +	setup_packet->requesttype = requesttype;
> +	setup_packet->request = request;
> +	setup_packet->value = cpu_to_le16(value);
> +	setup_packet->index = cpu_to_le16(index);
> +	setup_packet->length = cpu_to_le16(size);
>  	USB_PRINTF("usb_control_msg: request: 0x%X, requesttype: 0x%X, " \
>  		   "value 0x%X index 0x%X length 0x%X\n",
>  		   request, requesttype, value, index, size);
>  	dev->status = USB_ST_NOT_PROC; /*not yet processed */
> 
> -	submit_control_msg(dev, pipe, data, size, &setup_packet);
> +	submit_control_msg(dev, pipe, data, size, setup_packet);
>  	if (timeout == 0)
>  		return (int)size;
> 
> @@ -694,7 +695,7 @@ static int usb_string_sub(struct usb_device *dev,
> unsigned int langid, */
>  int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
>  {
> -	unsigned char mybuf[USB_BUFSIZ];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, mybuf, USB_BUFSIZ);
>  	unsigned char *tbuf;
>  	int err;
>  	unsigned int u, idx;
> @@ -794,7 +795,7 @@ int usb_new_device(struct usb_device *dev)
>  {
>  	int addr, err;
>  	int tmp;
> -	unsigned char tmpbuf[USB_BUFSIZ];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, tmpbuf, USB_BUFSIZ);

Why not simply memalign() them all?
> 
>  	/* We still haven't set the Address yet */
>  	addr = dev->devnum;
> @@ -842,7 +843,10 @@ int usb_new_device(struct usb_device *dev)
>  	dev->epmaxpacketin[0] = 64;
>  	dev->epmaxpacketout[0] = 64;
> 
> -	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc, 64);
> +	desc->bMaxPacketSize0 = 0;
> +	/*8 bytes of the descriptor to read Max packet size*/
> +	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc,
> +			8);
>  	if (err < 0) {
>  		USB_PRINTF("usb_new_device: usb_get_descriptor() failed\n");
>  		return 1;
> @@ -905,7 +909,7 @@ int usb_new_device(struct usb_device *dev)
>  	tmp = sizeof(dev->descriptor);
> 
>  	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0,
> -				 &dev->descriptor, sizeof(dev->descriptor));
> +				 desc, sizeof(dev->descriptor));
>  	if (err < tmp) {
>  		if (err < 0)
>  			printf("unable to get device descriptor (error=%d)\n",
> @@ -915,14 +919,18 @@ int usb_new_device(struct usb_device *dev)
>  				"(expected %i, got %i)\n", tmp, err);
>  		return 1;
>  	}
> +	dev->descriptor.bcdUSB = desc->bcdUSB;
> +	dev->descriptor.idVendor = desc->idVendor;
> +	dev->descriptor.idProduct = desc->idProduct;
> +	dev->descriptor.bcdDevice = desc->bcdDevice;

Isn't the above change unrelated?

>  	/* correct le values */
>  	le16_to_cpus(&dev->descriptor.bcdUSB);
>  	le16_to_cpus(&dev->descriptor.idVendor);
>  	le16_to_cpus(&dev->descriptor.idProduct);
>  	le16_to_cpus(&dev->descriptor.bcdDevice);
>  	/* only support for one config for now */
> -	usb_get_configuration_no(dev, &tmpbuf[0], 0);
> -	usb_parse_config(dev, &tmpbuf[0], 0);
> +	usb_get_configuration_no(dev, tmpbuf, 0);
> +	usb_parse_config(dev, tmpbuf, 0);
>  	usb_set_maxpacket(dev);
>  	/* we set the default configuration here */
>  	if (usb_set_configuration(dev, dev->config.desc.bConfigurationValue)) {
> @@ -1076,7 +1084,7 @@ static int hub_port_reset(struct usb_device *dev, int
> port, unsigned short *portstat)
>  {
>  	int tries;
> -	struct usb_port_status portsts;
> +	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);

Looking at all this stuff, it's already allocated on stack. Ok, I already 
outlined first option -- memalign(), but that'd really put quite a strain on the 
memory allocator -- the other option (and maybe even better) is to use 
__attribute__((aligned)), which will allow the compiler to reorder data 
allocated on the stack so the array you create is aligned and not much space 
around on the stack is wasted. Apparently, ALLOC_CACHE_ALIGN_BUFFER() wastes a 
lot of space on the stack.

>  	unsigned short portstatus, portchange;
> 
>  	USB_HUB_PRINTF("hub_port_reset: resetting port %d...\n", port);
> @@ -1085,13 +1093,13 @@ static int hub_port_reset(struct usb_device *dev,
> int port, usb_set_port_feature(dev, port + 1, USB_PORT_FEAT_RESET);
>  		wait_ms(200);
> 
> -		if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
> +		if (usb_get_port_status(dev, port + 1, portsts) < 0) {
>  			USB_HUB_PRINTF("get_port_status failed status %lX\n",
>  					dev->status);
>  			return -1;
>  		}
> -		portstatus = le16_to_cpu(portsts.wPortStatus);
> -		portchange = le16_to_cpu(portsts.wPortChange);
> +		portstatus = le16_to_cpu(portsts->wPortStatus);
> +		portchange = le16_to_cpu(portsts->wPortChange);
> 
>  		USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
>  				portstatus, portchange,
> @@ -1129,19 +1137,19 @@ static int hub_port_reset(struct usb_device *dev,
> int port, void usb_hub_port_connect_change(struct usb_device *dev, int
> port) {
>  	struct usb_device *usb;
> -	struct usb_port_status portsts;
> +	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>  	unsigned short portstatus;
> 
>  	/* Check status */
> -	if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
> +	if (usb_get_port_status(dev, port + 1, portsts) < 0) {
>  		USB_HUB_PRINTF("get_port_status failed\n");
>  		return;
>  	}
> 
> -	portstatus = le16_to_cpu(portsts.wPortStatus);
> +	portstatus = le16_to_cpu(portsts->wPortStatus);
>  	USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
>  			portstatus,
> -			le16_to_cpu(portsts.wPortChange),
> +			le16_to_cpu(portsts->wPortChange),
>  			portspeed(portstatus));
> 
>  	/* Clear the connection change status */
> @@ -1190,7 +1198,8 @@ void usb_hub_port_connect_change(struct usb_device
> *dev, int port) int usb_hub_configure(struct usb_device *dev)
>  {
>  	int i;
> -	unsigned char buffer[USB_BUFSIZ], *bitmap;
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, USB_BUFSIZ);
> +	unsigned char *bitmap;
>  	struct usb_hub_descriptor *descriptor;
>  	struct usb_hub_device *hub;
>  #ifdef USB_HUB_DEBUG
> @@ -1312,16 +1321,16 @@ int usb_hub_configure(struct usb_device *dev)
>  	usb_hub_power_on(hub);
> 
>  	for (i = 0; i < dev->maxchild; i++) {
> -		struct usb_port_status portsts;
> +		ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>  		unsigned short portstatus, portchange;
> 
> -		if (usb_get_port_status(dev, i + 1, &portsts) < 0) {
> +		if (usb_get_port_status(dev, i + 1, portsts) < 0) {
>  			USB_HUB_PRINTF("get_port_status failed\n");
>  			continue;
>  		}
> 
> -		portstatus = le16_to_cpu(portsts.wPortStatus);
> -		portchange = le16_to_cpu(portsts.wPortChange);
> +		portstatus = le16_to_cpu(portsts->wPortStatus);
> +		portchange = le16_to_cpu(portsts->wPortChange);
>  		USB_HUB_PRINTF("Port %d Status %X Change %X\n",
>  				i + 1, portstatus, portchange);
> 
> diff --git a/common/usb_storage.c b/common/usb_storage.c
> index de84c8d..88ca390 100644
> --- a/common/usb_storage.c
> +++ b/common/usb_storage.c
> @@ -79,8 +79,7 @@ static const unsigned char us_direction[256/8] = {
>  };
>  #define US_DIRECTION(x) ((us_direction[x>>3] >> (x & 7)) & 1)
> 
> -static unsigned char usb_stor_buf[512];
> -static ccb usb_ccb;
> +static ccb usb_ccb __attribute__((aligned(ARCH_DMA_MINALIGN)));
> 
>  /*
>   * CBI style
> @@ -210,17 +209,17 @@ int usb_stor_info(void)
>  static unsigned int usb_get_max_lun(struct us_data *us)
>  {
>  	int len;
> -	unsigned char result;
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, result, 1);
>  	len = usb_control_msg(us->pusb_dev,
>  			      usb_rcvctrlpipe(us->pusb_dev, 0),
>  			      US_BBB_GET_MAX_LUN,
>  			      USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
>  			      0, us->ifnum,
> -			      &result, sizeof(result),
> +			      result, sizeof(char),
>  			      USB_CNTL_TIMEOUT * 5);
>  	USB_STOR_PRINTF("Get Max LUN -> len = %i, result = %i\n",
> -			len, (int) result);
> -	return (len > 0) ? result : 0;
> +			len, (int) *result);
> +	return (len > 0) ? *result : 0;
>  }
> 
>  /*************************************************************************
> ****** @@ -233,9 +232,6 @@ int usb_stor_scan(int mode)
>  	unsigned char i;
>  	struct usb_device *dev;
> 
> -	/* GJ */
> -	memset(usb_stor_buf, 0, sizeof(usb_stor_buf));
> -
>  	if (mode == 1)
>  		printf("       scanning bus for storage devices... ");
> 
> @@ -499,7 +495,7 @@ int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
>  	int actlen;
>  	int dir_in;
>  	unsigned int pipe;
> -	umass_bbb_cbw_t cbw;
> +	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_cbw_t, cbw, 1);
> 
>  	dir_in = US_DIRECTION(srb->cmd[0]);
> 
> @@ -522,16 +518,16 @@ int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
>  	/* always OUT to the ep */
>  	pipe = usb_sndbulkpipe(us->pusb_dev, us->ep_out);
> 
> -	cbw.dCBWSignature = cpu_to_le32(CBWSIGNATURE);
> -	cbw.dCBWTag = cpu_to_le32(CBWTag++);
> -	cbw.dCBWDataTransferLength = cpu_to_le32(srb->datalen);
> -	cbw.bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
> -	cbw.bCBWLUN = srb->lun;
> -	cbw.bCDBLength = srb->cmdlen;
> +	cbw->dCBWSignature = cpu_to_le32(CBWSIGNATURE);
> +	cbw->dCBWTag = cpu_to_le32(CBWTag++);
> +	cbw->dCBWDataTransferLength = cpu_to_le32(srb->datalen);
> +	cbw->bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
> +	cbw->bCBWLUN = srb->lun;
> +	cbw->bCDBLength = srb->cmdlen;
>  	/* copy the command data into the CBW command data buffer */
>  	/* DST SRC LEN!!! */
> -	memcpy(cbw.CBWCDB, srb->cmd, srb->cmdlen);
> -	result = usb_bulk_msg(us->pusb_dev, pipe, &cbw, UMASS_BBB_CBW_SIZE,
> +	memcpy(cbw->CBWCDB, srb->cmd, srb->cmdlen);
> +	result = usb_bulk_msg(us->pusb_dev, pipe, cbw, UMASS_BBB_CBW_SIZE,
>  			      &actlen, USB_CNTL_TIMEOUT * 5);
>  	if (result < 0)
>  		USB_STOR_PRINTF("usb_stor_BBB_comdat:usb_bulk_msg error\n");
> @@ -675,7 +671,7 @@ int usb_stor_BBB_transport(ccb *srb, struct us_data
> *us) int dir_in;
>  	int actlen, data_actlen;
>  	unsigned int pipe, pipein, pipeout;
> -	umass_bbb_csw_t csw;
> +	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_csw_t, csw, 1);
>  #ifdef BBB_XPORT_TRACE
>  	unsigned char *ptr;
>  	int index;
> @@ -733,7 +729,7 @@ st:
>  	retry = 0;
>  again:
>  	USB_STOR_PRINTF("STATUS phase\n");
> -	result = usb_bulk_msg(us->pusb_dev, pipein, &csw, UMASS_BBB_CSW_SIZE,
> +	result = usb_bulk_msg(us->pusb_dev, pipein, csw, UMASS_BBB_CSW_SIZE,
>  				&actlen, USB_CNTL_TIMEOUT*5);
> 
>  	/* special handling of STALL in STATUS phase */
> @@ -753,28 +749,28 @@ again:
>  		return USB_STOR_TRANSPORT_FAILED;
>  	}
>  #ifdef BBB_XPORT_TRACE
> -	ptr = (unsigned char *)&csw;
> +	ptr = (unsigned char *)csw;
>  	for (index = 0; index < UMASS_BBB_CSW_SIZE; index++)
>  		printf("ptr[%d] %#x ", index, ptr[index]);
>  	printf("\n");
>  #endif
>  	/* misuse pipe to get the residue */
> -	pipe = le32_to_cpu(csw.dCSWDataResidue);
> +	pipe = le32_to_cpu(csw->dCSWDataResidue);
>  	if (pipe == 0 && srb->datalen != 0 && srb->datalen - data_actlen != 0)
>  		pipe = srb->datalen - data_actlen;
> -	if (CSWSIGNATURE != le32_to_cpu(csw.dCSWSignature)) {
> +	if (CSWSIGNATURE != le32_to_cpu(csw->dCSWSignature)) {
>  		USB_STOR_PRINTF("!CSWSIGNATURE\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if ((CBWTag - 1) != le32_to_cpu(csw.dCSWTag)) {
> +	} else if ((CBWTag - 1) != le32_to_cpu(csw->dCSWTag)) {
>  		USB_STOR_PRINTF("!Tag\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if (csw.bCSWStatus > CSWSTATUS_PHASE) {
> +	} else if (csw->bCSWStatus > CSWSTATUS_PHASE) {
>  		USB_STOR_PRINTF(">PHASE\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if (csw.bCSWStatus == CSWSTATUS_PHASE) {
> +	} else if (csw->bCSWStatus == CSWSTATUS_PHASE) {
>  		USB_STOR_PRINTF("=PHASE\n");
>  		usb_stor_BBB_reset(us);
>  		return USB_STOR_TRANSPORT_FAILED;
> @@ -782,7 +778,7 @@ again:
>  		USB_STOR_PRINTF("transferred %dB instead of %ldB\n",
>  			data_actlen, srb->datalen);
>  		return USB_STOR_TRANSPORT_FAILED;
> -	} else if (csw.bCSWStatus == CSWSTATUS_FAILED) {
> +	} else if (csw->bCSWStatus == CSWSTATUS_FAILED) {
>  		USB_STOR_PRINTF("FAILED\n");
>  		return USB_STOR_TRANSPORT_FAILED;
>  	}
> @@ -1343,7 +1339,8 @@ int usb_stor_get_info(struct usb_device *dev, struct
> us_data *ss, block_dev_desc_t *dev_desc)
>  {
>  	unsigned char perq, modi;
> -	unsigned long cap[2];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned long, cap, 2);
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, usb_stor_buf, 36);
>  	unsigned long *capacity, *blksz;
>  	ccb *pccb = &usb_ccb;
> 
> @@ -1367,9 +1364,9 @@ int usb_stor_get_info(struct usb_device *dev, struct
> us_data *ss, /* drive is removable */
>  		dev_desc->removable = 1;
>  	}
> -	memcpy(&dev_desc->vendor[0], &usb_stor_buf[8], 8);
> -	memcpy(&dev_desc->product[0], &usb_stor_buf[16], 16);
> -	memcpy(&dev_desc->revision[0], &usb_stor_buf[32], 4);
> +	memcpy(&dev_desc->vendor[0], (const void *) &usb_stor_buf[8], 8);
> +	memcpy(&dev_desc->product[0], (const void *) &usb_stor_buf[16], 16);
> +	memcpy(&dev_desc->revision[0], (const void *) &usb_stor_buf[32], 4);
>  	dev_desc->vendor[8] = 0;
>  	dev_desc->product[16] = 0;
>  	dev_desc->revision[4] = 0;
> diff --git a/disk/part_dos.c b/disk/part_dos.c
> index b5bcb37..70211ee 100644
> --- a/disk/part_dos.c
> +++ b/disk/part_dos.c
> @@ -87,7 +87,7 @@ static int test_block_type(unsigned char *buffer)
> 
>  int test_part_dos (block_dev_desc_t *dev_desc)
>  {
> -	unsigned char buffer[dev_desc->blksz];
> +	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, dev_desc->blksz);
> 
>  	if ((dev_desc->block_read(dev_desc->dev, 0, 1, (ulong *) buffer) != 1) 
||
>  	    (buffer[DOS_PART_MAGIC_OFFSET + 0] != 0x55) ||
> diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
> index d893b2a..eb5220b 100644
> --- a/drivers/usb/host/ehci-hcd.c
> +++ b/drivers/usb/host/ehci-hcd.c
> @@ -120,6 +120,14 @@ static struct descriptor {
>   */
>  static void flush_invalidate(u32 addr, int size, int flush)
>  {
> +	/*
> +	 * Size is the bytes actually moved during transaction,
> +	 * which may not equal to the cache line. This results
> +	 * stop address passed for invalidating cache may not be aligned.
> +	 * Therfore making size as multiple of cache line size.
> +	 */
> +	size = ALIGN(size, ARCH_DMA_MINALIGN);
> +
>  	if (flush)
>  		flush_dcache_range(addr, addr + size);
>  	else
> diff --git a/include/scsi.h b/include/scsi.h
> index c52759c..89ae45f 100644
> --- a/include/scsi.h
> +++ b/include/scsi.h
> @@ -26,7 +26,9 @@
> 
>  typedef struct SCSI_cmd_block{
>  	unsigned char		cmd[16];					
/* command				   */
> -	unsigned char		sense_buf[64];		/* for request sense */
> +	/* for request sense */
> +	unsigned char		sense_buf[64]
> +		__attribute__((aligned(ARCH_DMA_MINALIGN)));
>  	unsigned char		status;						
/* SCSI Status			 */
>  	unsigned char		target;						
/* Target ID				 */
>  	unsigned char		lun;							
/* Target LUN        */

Thanks!
Simon Glass - Feb. 27, 2012, 5:03 p.m.
Hi Marek,

On Mon, Feb 27, 2012 at 8:49 AM, Marek Vasut <marex@denx.de> wrote:
>> As DMA expects the buffers to be equal and larger then
>> cache lines, This aligns buffers at cacheline.
>>
>> Signed-off-by: Puneet Saxena <puneets@nvidia.com>
>> Signed-off-by: Jim Lin <jilin@nvidia.com>
>> ---
>>
>
> First of all, thanks for this patch, it really helps. But, there are a few
> things that concern me.
>
>> Changes for V2:
>>     - Use "ARCH_DMA_MINALIGN" directly
>>     - Use "ALIGN" to align size as cacheline
>>     - Removed headers from usb.h
>>     - Send 8 bytes of device descriptor size to read
>>       Max packet size
>> scsi.h header is needed to avoid extra memcpy from local buffer
>> to global buffer.
>>
>>  common/cmd_usb.c            |    3 +-
>>  common/usb.c                |   61
>> ++++++++++++++++++++++++------------------ common/usb_storage.c        |
>> 59 +++++++++++++++++++---------------------- disk/part_dos.c             |
>>    2 +-
>>  drivers/usb/host/ehci-hcd.c |    8 +++++
>>  include/scsi.h              |    4 ++-
>>  6 files changed, 77 insertions(+), 60 deletions(-)
>>
>> diff --git a/common/cmd_usb.c b/common/cmd_usb.c
>> index 320667f..bca9d94 100644
>> --- a/common/cmd_usb.c
>> +++ b/common/cmd_usb.c
>> @@ -150,7 +150,8 @@ void usb_display_class_sub(unsigned char dclass,
>> unsigned char subclass,
>>
>>  void usb_display_string(struct usb_device *dev, int index)
>>  {
>> -     char buffer[256];
>> +     ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);
>
> Why not memalign(), do you want to allocate on stack so badly ?

This issue came up with MMC and there was a strong request not to
sprinkle the code with malloc. In fact the macro above was devised at
some cost just to solve this problem.

[snip]
>>  {
>>       int tries;
>> -     struct usb_port_status portsts;
>> +     ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>
> Looking at all this stuff, it's already allocated on stack. Ok, I already
> outlined first option -- memalign(), but that'd really put quite a strain on the
> memory allocator -- the other option (and maybe even better) is to use
> __attribute__((aligned)), which will allow the compiler to reorder data
> allocated on the stack so the array you create is aligned and not much space
> around on the stack is wasted. Apparently, ALLOC_CACHE_ALIGN_BUFFER() wastes a
> lot of space on the stack.

There was quite a bit of discussion about this related to MMC. I think
the current solution is the result of that discussion...

Regards,
Simon
Marek Vasut - Feb. 27, 2012, 5:11 p.m.
> Hi Marek,
> 
> On Mon, Feb 27, 2012 at 8:49 AM, Marek Vasut <marex@denx.de> wrote:
> >> As DMA expects the buffers to be equal and larger then
> >> cache lines, This aligns buffers at cacheline.
> >> 
> >> Signed-off-by: Puneet Saxena <puneets@nvidia.com>
> >> Signed-off-by: Jim Lin <jilin@nvidia.com>
> >> ---
> > 
> > First of all, thanks for this patch, it really helps. But, there are a
> > few things that concern me.
> > 
> >> Changes for V2:
> >>     - Use "ARCH_DMA_MINALIGN" directly
> >>     - Use "ALIGN" to align size as cacheline
> >>     - Removed headers from usb.h
> >>     - Send 8 bytes of device descriptor size to read
> >>       Max packet size
> >> scsi.h header is needed to avoid extra memcpy from local buffer
> >> to global buffer.
> >> 
> >>  common/cmd_usb.c            |    3 +-
> >>  common/usb.c                |   61
> >> ++++++++++++++++++++++++------------------ common/usb_storage.c        |
> >> 59 +++++++++++++++++++---------------------- disk/part_dos.c            
> >> | 2 +-
> >>  drivers/usb/host/ehci-hcd.c |    8 +++++
> >>  include/scsi.h              |    4 ++-
> >>  6 files changed, 77 insertions(+), 60 deletions(-)
> >> 
> >> diff --git a/common/cmd_usb.c b/common/cmd_usb.c
> >> index 320667f..bca9d94 100644
> >> --- a/common/cmd_usb.c
> >> +++ b/common/cmd_usb.c
> >> @@ -150,7 +150,8 @@ void usb_display_class_sub(unsigned char dclass,
> >> unsigned char subclass,
> >> 
> >>  void usb_display_string(struct usb_device *dev, int index)
> >>  {
> >> -     char buffer[256];
> >> +     ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);
> > 
> > Why not memalign(), do you want to allocate on stack so badly ?
> 
> This issue came up with MMC and there was a strong request not to
> sprinkle the code with malloc. In fact the macro above was devised at
> some cost just to solve this problem.

This is really weird solution :-(
> 
> [snip]
> 
> >>  {
> >>       int tries;
> >> -     struct usb_port_status portsts;
> >> +     ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
> > 
> > Looking at all this stuff, it's already allocated on stack. Ok, I already
> > outlined first option -- memalign(), but that'd really put quite a strain
> > on the memory allocator -- the other option (and maybe even better) is
> > to use __attribute__((aligned)), which will allow the compiler to
> > reorder data allocated on the stack so the array you create is aligned
> > and not much space around on the stack is wasted. Apparently,
> > ALLOC_CACHE_ALIGN_BUFFER() wastes a lot of space on the stack.
> 
> There was quite a bit of discussion about this related to MMC. I think
> the current solution is the result of that discussion...

Can you please point me to such discussion?

Thanks!
Simon Glass - Feb. 27, 2012, 5:27 p.m.
Hi Marek,

On Mon, Feb 27, 2012 at 9:11 AM, Marek Vasut <marex@denx.de> wrote:
>> Hi Marek,
>>
>> On Mon, Feb 27, 2012 at 8:49 AM, Marek Vasut <marex@denx.de> wrote:
>> >> As DMA expects the buffers to be equal and larger then
>> >> cache lines, This aligns buffers at cacheline.
>> >>
>> >> Signed-off-by: Puneet Saxena <puneets@nvidia.com>
>> >> Signed-off-by: Jim Lin <jilin@nvidia.com>
>> >> ---
>> >
>> > First of all, thanks for this patch, it really helps. But, there are a
>> > few things that concern me.
>> >
>> >> Changes for V2:
>> >>     - Use "ARCH_DMA_MINALIGN" directly
>> >>     - Use "ALIGN" to align size as cacheline
>> >>     - Removed headers from usb.h
>> >>     - Send 8 bytes of device descriptor size to read
>> >>       Max packet size
>> >> scsi.h header is needed to avoid extra memcpy from local buffer
>> >> to global buffer.
>> >>
>> >>  common/cmd_usb.c            |    3 +-
>> >>  common/usb.c                |   61
>> >> ++++++++++++++++++++++++------------------ common/usb_storage.c        |
>> >> 59 +++++++++++++++++++---------------------- disk/part_dos.c
>> >> | 2 +-
>> >>  drivers/usb/host/ehci-hcd.c |    8 +++++
>> >>  include/scsi.h              |    4 ++-
>> >>  6 files changed, 77 insertions(+), 60 deletions(-)
>> >>
>> >> diff --git a/common/cmd_usb.c b/common/cmd_usb.c
>> >> index 320667f..bca9d94 100644
>> >> --- a/common/cmd_usb.c
>> >> +++ b/common/cmd_usb.c
>> >> @@ -150,7 +150,8 @@ void usb_display_class_sub(unsigned char dclass,
>> >> unsigned char subclass,
>> >>
>> >>  void usb_display_string(struct usb_device *dev, int index)
>> >>  {
>> >> -     char buffer[256];
>> >> +     ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);
>> >
>> > Why not memalign(), do you want to allocate on stack so badly ?
>>
>> This issue came up with MMC and there was a strong request not to
>> sprinkle the code with malloc. In fact the macro above was devised at
>> some cost just to solve this problem.
>
> This is really weird solution :-(

It's not pretty under the hood but it solves the problem well for MMC.
Apart from the alignment of buffers, it is as efficient as the
original code.

>>
>> [snip]
>>
>> >>  {
>> >>       int tries;
>> >> -     struct usb_port_status portsts;
>> >> +     ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>> >
>> > Looking at all this stuff, it's already allocated on stack. Ok, I already
>> > outlined first option -- memalign(), but that'd really put quite a strain
>> > on the memory allocator -- the other option (and maybe even better) is
>> > to use __attribute__((aligned)), which will allow the compiler to
>> > reorder data allocated on the stack so the array you create is aligned
>> > and not much space around on the stack is wasted. Apparently,
>> > ALLOC_CACHE_ALIGN_BUFFER() wastes a lot of space on the stack.
>>
>> There was quite a bit of discussion about this related to MMC. I think
>> the current solution is the result of that discussion...
>
> Can you please point me to such discussion?

Start here perhaps and look at the whole thread:

http://lists.denx.de/pipermail/u-boot/2011-August/099152.html

>
> Thanks!

Regards,
Simon
Puneet Saxena - Feb. 28, 2012, 9:34 a.m.
Hi Marek,
IMO, Simon has already mentioned the reason of using 
ALLOC_CACHE_ALIGN_BUFFER,
Please find below my explanation about other doubts.
On Monday 27 February 2012 10:19 PM, Marek Vasut wrote:
>> As DMA expects the buffers to be equal and larger then
>> cache lines, This aligns buffers at cacheline.
>>
>> Signed-off-by: Puneet Saxena<puneets@nvidia.com>
>> Signed-off-by: Jim Lin<jilin@nvidia.com>
>> ---
>>
> First of all, thanks for this patch, it really helps. But, there are a few
> things that concern me.
>
>> Changes for V2:
>>      - Use "ARCH_DMA_MINALIGN" directly
>>      - Use "ALIGN" to align size as cacheline
>>      - Removed headers from usb.h
>>      - Send 8 bytes of device descriptor size to read
>>        Max packet size
>> scsi.h header is needed to avoid extra memcpy from local buffer
>> to global buffer.
>>
>>   common/cmd_usb.c            |    3 +-
>>   common/usb.c                |   61
>> ++++++++++++++++++++++++------------------ common/usb_storage.c        |
>> 59 +++++++++++++++++++---------------------- disk/part_dos.c             |
>>     2 +-
>>   drivers/usb/host/ehci-hcd.c |    8 +++++
>>   include/scsi.h              |    4 ++-
>>   6 files changed, 77 insertions(+), 60 deletions(-)
>>
>> diff --git a/common/cmd_usb.c b/common/cmd_usb.c
>> index 320667f..bca9d94 100644
>> --- a/common/cmd_usb.c
>> +++ b/common/cmd_usb.c
>> @@ -150,7 +150,8 @@ void usb_display_class_sub(unsigned char dclass,
>> unsigned char subclass,
>>
>>   void usb_display_string(struct usb_device *dev, int index)
>>   {
>> -     char buffer[256];
>> +     ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);
> Why not memalign(), do you want to allocate on stack so badly ?
>
>> +
>>        if (index != 0) {
>>                if (usb_string(dev, index,&buffer[0], 256)>  0)
>>                        printf("String: \"%s\"", buffer);
>> diff --git a/common/usb.c b/common/usb.c
>> index 63a11c8..2279459 100644
>> --- a/common/usb.c
>> +++ b/common/usb.c
>> @@ -73,7 +73,6 @@ static struct usb_device usb_dev[USB_MAX_DEVICE];
>>   static int dev_index;
>>   static int running;
>>   static int asynch_allowed;
>> -static struct devrequest setup_packet;
>>
>>   char usb_started; /* flag for the started/stopped USB status */
>>
>> @@ -185,23 +184,25 @@ int usb_control_msg(struct usb_device *dev, unsigned
>> int pipe, unsigned short value, unsigned short index,
>>                        void *data, unsigned short size, int timeout)
>>   {
>> +     ALLOC_CACHE_ALIGN_BUFFER(struct devrequest, setup_packet,
>> +             sizeof(struct devrequest));
> DTTO, btw does the setup packet need to be aligned too ?
>
We need to align setup packet as it's used for constructing SETUP 
Transfer descriptor buffer.
Plz see the code - Ehci-hcd.c (drivers\usb\host) Line No - 402.

>>        if ((timeout == 0)&&  (!asynch_allowed)) {
>>                /* request for a asynch control pipe is not allowed */
>>                return -1;
>>        }
>>
>>        /* set setup command */
>> -     setup_packet.requesttype = requesttype;
>> -     setup_packet.request = request;
>> -     setup_packet.value = cpu_to_le16(value);
>> -     setup_packet.index = cpu_to_le16(index);
>> -     setup_packet.length = cpu_to_le16(size);
>> +     setup_packet->requesttype = requesttype;
>> +     setup_packet->request = request;
>> +     setup_packet->value = cpu_to_le16(value);
>> +     setup_packet->index = cpu_to_le16(index);
>> +     setup_packet->length = cpu_to_le16(size);
>>        USB_PRINTF("usb_control_msg: request: 0x%X, requesttype: 0x%X, " \
>>                   "value 0x%X index 0x%X length 0x%X\n",
>>                   request, requesttype, value, index, size);
>>        dev->status = USB_ST_NOT_PROC; /*not yet processed */
>>
>> -     submit_control_msg(dev, pipe, data, size,&setup_packet);
>> +     submit_control_msg(dev, pipe, data, size, setup_packet);
>>        if (timeout == 0)
>>                return (int)size;
>>
>> @@ -694,7 +695,7 @@ static int usb_string_sub(struct usb_device *dev,
>> unsigned int langid, */
>>   int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
>>   {
>> -     unsigned char mybuf[USB_BUFSIZ];
>> +     ALLOC_CACHE_ALIGN_BUFFER(unsigned char, mybuf, USB_BUFSIZ);
>>        unsigned char *tbuf;
>>        int err;
>>        unsigned int u, idx;
>> @@ -794,7 +795,7 @@ int usb_new_device(struct usb_device *dev)
>>   {
>>        int addr, err;
>>        int tmp;
>> -     unsigned char tmpbuf[USB_BUFSIZ];
>> +     ALLOC_CACHE_ALIGN_BUFFER(unsigned char, tmpbuf, USB_BUFSIZ);
> Why not simply memalign() them all?
>>        /* We still haven't set the Address yet */
>>        addr = dev->devnum;
>> @@ -842,7 +843,10 @@ int usb_new_device(struct usb_device *dev)
>>        dev->epmaxpacketin[0] = 64;
>>        dev->epmaxpacketout[0] = 64;
>>
>> -     err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc, 64);
>> +     desc->bMaxPacketSize0 = 0;
>> +     /*8 bytes of the descriptor to read Max packet size*/
>> +     err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc,
>> +                     8);
>>        if (err<  0) {
>>                USB_PRINTF("usb_new_device: usb_get_descriptor() failed\n");
>>                return 1;
>> @@ -905,7 +909,7 @@ int usb_new_device(struct usb_device *dev)
>>        tmp = sizeof(dev->descriptor);
>>
>>        err = usb_get_descriptor(dev, USB_DT_DEVICE, 0,
>> -&dev->descriptor, sizeof(dev->descriptor));
>> +                              desc, sizeof(dev->descriptor));
>>        if (err<  tmp) {
>>                if (err<  0)
>>                        printf("unable to get device descriptor (error=%d)\n",
>> @@ -915,14 +919,18 @@ int usb_new_device(struct usb_device *dev)
>>                                "(expected %i, got %i)\n", tmp, err);
>>                return 1;
>>        }
>> +     dev->descriptor.bcdUSB = desc->bcdUSB;
>> +     dev->descriptor.idVendor = desc->idVendor;
>> +     dev->descriptor.idProduct = desc->idProduct;
>> +     dev->descriptor.bcdDevice = desc->bcdDevice;
> Isn't the above change unrelated?
>
Agreed. Its not useful here at least. Will incorporate in next patch
>>        /* correct le values */
>>        le16_to_cpus(&dev->descriptor.bcdUSB);
>>        le16_to_cpus(&dev->descriptor.idVendor);
>>        le16_to_cpus(&dev->descriptor.idProduct);
>>        le16_to_cpus(&dev->descriptor.bcdDevice);
>>        /* only support for one config for now */
>> -     usb_get_configuration_no(dev,&tmpbuf[0], 0);
>> -     usb_parse_config(dev,&tmpbuf[0], 0);
>> +     usb_get_configuration_no(dev, tmpbuf, 0);
>> +     usb_parse_config(dev, tmpbuf, 0);
>>        usb_set_maxpacket(dev);
>>        /* we set the default configuration here */
>>        if (usb_set_configuration(dev, dev->config.desc.bConfigurationValue)) {
>> @@ -1076,7 +1084,7 @@ static int hub_port_reset(struct usb_device *dev, int
>> port, unsigned short *portstat)
>>   {
>>        int tries;
>> -     struct usb_port_status portsts;
>> +     ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
> Looking at all this stuff, it's already allocated on stack. Ok, I already
> outlined first option -- memalign(), but that'd really put quite a strain on the
> memory allocator -- the other option (and maybe even better) is to use
> __attribute__((aligned)), which will allow the compiler to reorder data
> allocated on the stack so the array you create is aligned and not much space
> around on the stack is wasted. Apparently, ALLOC_CACHE_ALIGN_BUFFER() wastes a
> lot of space on the stack.
>
>>        unsigned short portstatus, portchange;
>>
>>        USB_HUB_PRINTF("hub_port_reset: resetting port %d...\n", port);
>> @@ -1085,13 +1093,13 @@ static int hub_port_reset(struct usb_device *dev,
>> int port, usb_set_port_feature(dev, port + 1, USB_PORT_FEAT_RESET);
>>                wait_ms(200);
>>
>> -             if (usb_get_port_status(dev, port + 1,&portsts)<  0) {
>> +             if (usb_get_port_status(dev, port + 1, portsts)<  0) {
>>                        USB_HUB_PRINTF("get_port_status failed status %lX\n",
>>                                        dev->status);
>>                        return -1;
>>                }
>> -             portstatus = le16_to_cpu(portsts.wPortStatus);
>> -             portchange = le16_to_cpu(portsts.wPortChange);
>> +             portstatus = le16_to_cpu(portsts->wPortStatus);
>> +             portchange = le16_to_cpu(portsts->wPortChange);
>>
>>                USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
>>                                portstatus, portchange,
>> @@ -1129,19 +1137,19 @@ static int hub_port_reset(struct usb_device *dev,
>> int port, void usb_hub_port_connect_change(struct usb_device *dev, int
>> port) {
>>        struct usb_device *usb;
>> -     struct usb_port_status portsts;
>> +     ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>>        unsigned short portstatus;
>>
>>        /* Check status */
>> -     if (usb_get_port_status(dev, port + 1,&portsts)<  0) {
>> +     if (usb_get_port_status(dev, port + 1, portsts)<  0) {
>>                USB_HUB_PRINTF("get_port_status failed\n");
>>                return;
>>        }
>>
>> -     portstatus = le16_to_cpu(portsts.wPortStatus);
>> +     portstatus = le16_to_cpu(portsts->wPortStatus);
>>        USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
>>                        portstatus,
>> -                     le16_to_cpu(portsts.wPortChange),
>> +                     le16_to_cpu(portsts->wPortChange),
>>                        portspeed(portstatus));
>>
>>        /* Clear the connection change status */
>> @@ -1190,7 +1198,8 @@ void usb_hub_port_connect_change(struct usb_device
>> *dev, int port) int usb_hub_configure(struct usb_device *dev)
>>   {
>>        int i;
>> -     unsigned char buffer[USB_BUFSIZ], *bitmap;
>> +     ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, USB_BUFSIZ);
>> +     unsigned char *bitmap;
>>        struct usb_hub_descriptor *descriptor;
>>        struct usb_hub_device *hub;
>>   #ifdef USB_HUB_DEBUG
>> @@ -1312,16 +1321,16 @@ int usb_hub_configure(struct usb_device *dev)
>>        usb_hub_power_on(hub);
>>
>>        for (i = 0; i<  dev->maxchild; i++) {
>> -             struct usb_port_status portsts;
>> +             ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
>>                unsigned short portstatus, portchange;
>>
>> -             if (usb_get_port_status(dev, i + 1,&portsts)<  0) {
>> +             if (usb_get_port_status(dev, i + 1, portsts)<  0) {
>>                        USB_HUB_PRINTF("get_port_status failed\n");
>>                        continue;
>>                }
>>
>> -             portstatus = le16_to_cpu(portsts.wPortStatus);
>> -             portchange = le16_to_cpu(portsts.wPortChange);
>> +             portstatus = le16_to_cpu(portsts->wPortStatus);
>> +             portchange = le16_to_cpu(portsts->wPortChange);
>>                USB_HUB_PRINTF("Port %d Status %X Change %X\n",
>>                                i + 1, portstatus, portchange);
>>
>> diff --git a/common/usb_storage.c b/common/usb_storage.c
>> index de84c8d..88ca390 100644
>> --- a/common/usb_storage.c
>> +++ b/common/usb_storage.c
>> @@ -79,8 +79,7 @@ static const unsigned char us_direction[256/8] = {
>>   };
>>   #define US_DIRECTION(x) ((us_direction[x>>3]>>  (x&  7))&  1)
>>
>> -static unsigned char usb_stor_buf[512];
>> -static ccb usb_ccb;
>> +static ccb usb_ccb __attribute__((aligned(ARCH_DMA_MINALIGN)));
>>
>>   /*
>>    * CBI style
>> @@ -210,17 +209,17 @@ int usb_stor_info(void)
>>   static unsigned int usb_get_max_lun(struct us_data *us)
>>   {
>>        int len;
>> -     unsigned char result;
>> +     ALLOC_CACHE_ALIGN_BUFFER(unsigned char, result, 1);
>>        len = usb_control_msg(us->pusb_dev,
>>                              usb_rcvctrlpipe(us->pusb_dev, 0),
>>                              US_BBB_GET_MAX_LUN,
>>                              USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
>>                              0, us->ifnum,
>> -&result, sizeof(result),
>> +                           result, sizeof(char),
>>                              USB_CNTL_TIMEOUT * 5);
>>        USB_STOR_PRINTF("Get Max LUN ->  len = %i, result = %i\n",
>> -                     len, (int) result);
>> -     return (len>  0) ? result : 0;
>> +                     len, (int) *result);
>> +     return (len>  0) ? *result : 0;
>>   }
>>
>>   /*************************************************************************
>> ****** @@ -233,9 +232,6 @@ int usb_stor_scan(int mode)
>>        unsigned char i;
>>        struct usb_device *dev;
>>
>> -     /* GJ */
>> -     memset(usb_stor_buf, 0, sizeof(usb_stor_buf));
>> -
>>        if (mode == 1)
>>                printf("       scanning bus for storage devices... ");
>>
>> @@ -499,7 +495,7 @@ int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
>>        int actlen;
>>        int dir_in;
>>        unsigned int pipe;
>> -     umass_bbb_cbw_t cbw;
>> +     ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_cbw_t, cbw, 1);
>>
>>        dir_in = US_DIRECTION(srb->cmd[0]);
>>
>> @@ -522,16 +518,16 @@ int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
>>        /* always OUT to the ep */
>>        pipe = usb_sndbulkpipe(us->pusb_dev, us->ep_out);
>>
>> -     cbw.dCBWSignature = cpu_to_le32(CBWSIGNATURE);
>> -     cbw.dCBWTag = cpu_to_le32(CBWTag++);
>> -     cbw.dCBWDataTransferLength = cpu_to_le32(srb->datalen);
>> -     cbw.bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
>> -     cbw.bCBWLUN = srb->lun;
>> -     cbw.bCDBLength = srb->cmdlen;
>> +     cbw->dCBWSignature = cpu_to_le32(CBWSIGNATURE);
>> +     cbw->dCBWTag = cpu_to_le32(CBWTag++);
>> +     cbw->dCBWDataTransferLength = cpu_to_le32(srb->datalen);
>> +     cbw->bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
>> +     cbw->bCBWLUN = srb->lun;
>> +     cbw->bCDBLength = srb->cmdlen;
>>        /* copy the command data into the CBW command data buffer */
>>        /* DST SRC LEN!!! */
>> -     memcpy(cbw.CBWCDB, srb->cmd, srb->cmdlen);
>> -     result = usb_bulk_msg(us->pusb_dev, pipe,&cbw, UMASS_BBB_CBW_SIZE,
>> +     memcpy(cbw->CBWCDB, srb->cmd, srb->cmdlen);
>> +     result = usb_bulk_msg(us->pusb_dev, pipe, cbw, UMASS_BBB_CBW_SIZE,
>>                              &actlen, USB_CNTL_TIMEOUT * 5);
>>        if (result<  0)
>>                USB_STOR_PRINTF("usb_stor_BBB_comdat:usb_bulk_msg error\n");
>> @@ -675,7 +671,7 @@ int usb_stor_BBB_transport(ccb *srb, struct us_data
>> *us) int dir_in;
>>        int actlen, data_actlen;
>>        unsigned int pipe, pipein, pipeout;
>> -     umass_bbb_csw_t csw;
>> +     ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_csw_t, csw, 1);
>>   #ifdef BBB_XPORT_TRACE
>>        unsigned char *ptr;
>>        int index;
>> @@ -733,7 +729,7 @@ st:
>>        retry = 0;
>>   again:
>>        USB_STOR_PRINTF("STATUS phase\n");
>> -     result = usb_bulk_msg(us->pusb_dev, pipein,&csw, UMASS_BBB_CSW_SIZE,
>> +     result = usb_bulk_msg(us->pusb_dev, pipein, csw, UMASS_BBB_CSW_SIZE,
>>                                &actlen, USB_CNTL_TIMEOUT*5);
>>
>>        /* special handling of STALL in STATUS phase */
>> @@ -753,28 +749,28 @@ again:
>>                return USB_STOR_TRANSPORT_FAILED;
>>        }
>>   #ifdef BBB_XPORT_TRACE
>> -     ptr = (unsigned char *)&csw;
>> +     ptr = (unsigned char *)csw;
>>        for (index = 0; index<  UMASS_BBB_CSW_SIZE; index++)
>>                printf("ptr[%d] %#x ", index, ptr[index]);
>>        printf("\n");
>>   #endif
>>        /* misuse pipe to get the residue */
>> -     pipe = le32_to_cpu(csw.dCSWDataResidue);
>> +     pipe = le32_to_cpu(csw->dCSWDataResidue);
>>        if (pipe == 0&&  srb->datalen != 0&&  srb->datalen - data_actlen != 0)
>>                pipe = srb->datalen - data_actlen;
>> -     if (CSWSIGNATURE != le32_to_cpu(csw.dCSWSignature)) {
>> +     if (CSWSIGNATURE != le32_to_cpu(csw->dCSWSignature)) {
>>                USB_STOR_PRINTF("!CSWSIGNATURE\n");
>>                usb_stor_BBB_reset(us);
>>                return USB_STOR_TRANSPORT_FAILED;
>> -     } else if ((CBWTag - 1) != le32_to_cpu(csw.dCSWTag)) {
>> +     } else if ((CBWTag - 1) != le32_to_cpu(csw->dCSWTag)) {
>>                USB_STOR_PRINTF("!Tag\n");
>>                usb_stor_BBB_reset(us);
>>                return USB_STOR_TRANSPORT_FAILED;
>> -     } else if (csw.bCSWStatus>  CSWSTATUS_PHASE) {
>> +     } else if (csw->bCSWStatus>  CSWSTATUS_PHASE) {
>>                USB_STOR_PRINTF(">PHASE\n");
>>                usb_stor_BBB_reset(us);
>>                return USB_STOR_TRANSPORT_FAILED;
>> -     } else if (csw.bCSWStatus == CSWSTATUS_PHASE) {
>> +     } else if (csw->bCSWStatus == CSWSTATUS_PHASE) {
>>                USB_STOR_PRINTF("=PHASE\n");
>>                usb_stor_BBB_reset(us);
>>                return USB_STOR_TRANSPORT_FAILED;
>> @@ -782,7 +778,7 @@ again:
>>                USB_STOR_PRINTF("transferred %dB instead of %ldB\n",
>>                        data_actlen, srb->datalen);
>>                return USB_STOR_TRANSPORT_FAILED;
>> -     } else if (csw.bCSWStatus == CSWSTATUS_FAILED) {
>> +     } else if (csw->bCSWStatus == CSWSTATUS_FAILED) {
>>                USB_STOR_PRINTF("FAILED\n");
>>                return USB_STOR_TRANSPORT_FAILED;
>>        }
>> @@ -1343,7 +1339,8 @@ int usb_stor_get_info(struct usb_device *dev, struct
>> us_data *ss, block_dev_desc_t *dev_desc)
>>   {
>>        unsigned char perq, modi;
>> -     unsigned long cap[2];
>> +     ALLOC_CACHE_ALIGN_BUFFER(unsigned long, cap, 2);
>> +     ALLOC_CACHE_ALIGN_BUFFER(unsigned char, usb_stor_buf, 36);
>>        unsigned long *capacity, *blksz;
>>        ccb *pccb =&usb_ccb;
>>
>> @@ -1367,9 +1364,9 @@ int usb_stor_get_info(struct usb_device *dev, struct
>> us_data *ss, /* drive is removable */
>>                dev_desc->removable = 1;
>>        }
>> -     memcpy(&dev_desc->vendor[0],&usb_stor_buf[8], 8);
>> -     memcpy(&dev_desc->product[0],&usb_stor_buf[16], 16);
>> -     memcpy(&dev_desc->revision[0],&usb_stor_buf[32], 4);
>> +     memcpy(&dev_desc->vendor[0], (const void *)&usb_stor_buf[8], 8);
>> +     memcpy(&dev_desc->product[0], (const void *)&usb_stor_buf[16], 16);
>> +     memcpy(&dev_desc->revision[0], (const void *)&usb_stor_buf[32], 4);
>>        dev_desc->vendor[8] = 0;
>>        dev_desc->product[16] = 0;
>>        dev_desc->revision[4] = 0;
>> diff --git a/disk/part_dos.c b/disk/part_dos.c
>> index b5bcb37..70211ee 100644
>> --- a/disk/part_dos.c
>> +++ b/disk/part_dos.c
>> @@ -87,7 +87,7 @@ static int test_block_type(unsigned char *buffer)
>>
>>   int test_part_dos (block_dev_desc_t *dev_desc)
>>   {
>> -     unsigned char buffer[dev_desc->blksz];
>> +     ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, dev_desc->blksz);
>>
>>        if ((dev_desc->block_read(dev_desc->dev, 0, 1, (ulong *) buffer) != 1)
> ||
>>            (buffer[DOS_PART_MAGIC_OFFSET + 0] != 0x55) ||
>> diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
>> index d893b2a..eb5220b 100644
>> --- a/drivers/usb/host/ehci-hcd.c
>> +++ b/drivers/usb/host/ehci-hcd.c
>> @@ -120,6 +120,14 @@ static struct descriptor {
>>    */
>>   static void flush_invalidate(u32 addr, int size, int flush)
>>   {
>> +     /*
>> +      * Size is the bytes actually moved during transaction,
>> +      * which may not equal to the cache line. This results
>> +      * stop address passed for invalidating cache may not be aligned.
>> +      * Therfore making size as multiple of cache line size.
>> +      */
>> +     size = ALIGN(size, ARCH_DMA_MINALIGN);
>> +
>>        if (flush)
>>                flush_dcache_range(addr, addr + size);
>>        else
>> diff --git a/include/scsi.h b/include/scsi.h
>> index c52759c..89ae45f 100644
>> --- a/include/scsi.h
>> +++ b/include/scsi.h
>> @@ -26,7 +26,9 @@
>>
>>   typedef struct SCSI_cmd_block{
>>        unsigned char           cmd[16];
> /* command                                 */
>> -     unsigned char           sense_buf[64];          /* for request sense */
>> +     /* for request sense */
>> +     unsigned char           sense_buf[64]
>> +             __attribute__((aligned(ARCH_DMA_MINALIGN)));
>>        unsigned char           status;
> /* SCSI Status                   */
>>        unsigned char           target;
> /* Target ID                             */
>>        unsigned char           lun;
> /* Target LUN        */
>
> Thanks!
Thanx,
Puneet

-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may contain
confidential information.  Any unauthorized review, use, disclosure or distribution
is prohibited.  If you are not the intended recipient, please contact the sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------
Marek Vasut - Feb. 29, 2012, 9:38 p.m.
> Hi Marek,
> IMO, Simon has already mentioned the reason of using
> ALLOC_CACHE_ALIGN_BUFFER,
> Please find below my explanation about other doubts.
> 
> On Monday 27 February 2012 10:19 PM, Marek Vasut wrote:
> >> As DMA expects the buffers to be equal and larger then
> >> cache lines, This aligns buffers at cacheline.
> >> 
> >> Signed-off-by: Puneet Saxena<puneets@nvidia.com>
> >> Signed-off-by: Jim Lin<jilin@nvidia.com>
> >> ---
> > 
> > First of all, thanks for this patch, it really helps. But, there are a
> > few things that concern me.
> > 
> >> Changes for V2:
> >>      - Use "ARCH_DMA_MINALIGN" directly
> >>      - Use "ALIGN" to align size as cacheline
> >>      - Removed headers from usb.h
> >>      - Send 8 bytes of device descriptor size to read
> >>      
> >>        Max packet size
> >> 
> >> scsi.h header is needed to avoid extra memcpy from local buffer
> >> to global buffer.
> >> 
> >>   common/cmd_usb.c            |    3 +-
> >>   common/usb.c                |   61
> >> 
> >> ++++++++++++++++++++++++------------------ common/usb_storage.c        |
> >> 59 +++++++++++++++++++---------------------- disk/part_dos.c            
> >> |
> >> 
> >>     2 +-
> >>   
> >>   drivers/usb/host/ehci-hcd.c |    8 +++++
> >>   include/scsi.h              |    4 ++-
> >>   6 files changed, 77 insertions(+), 60 deletions(-)
> >> 
> >> diff --git a/common/cmd_usb.c b/common/cmd_usb.c

I see, thanks for explaining!

M

Patch

diff --git a/common/cmd_usb.c b/common/cmd_usb.c
index 320667f..bca9d94 100644
--- a/common/cmd_usb.c
+++ b/common/cmd_usb.c
@@ -150,7 +150,8 @@  void usb_display_class_sub(unsigned char dclass, unsigned char subclass,
 
 void usb_display_string(struct usb_device *dev, int index)
 {
-	char buffer[256];
+	ALLOC_CACHE_ALIGN_BUFFER(char, buffer, 256);
+
 	if (index != 0) {
 		if (usb_string(dev, index, &buffer[0], 256) > 0)
 			printf("String: \"%s\"", buffer);
diff --git a/common/usb.c b/common/usb.c
index 63a11c8..2279459 100644
--- a/common/usb.c
+++ b/common/usb.c
@@ -73,7 +73,6 @@  static struct usb_device usb_dev[USB_MAX_DEVICE];
 static int dev_index;
 static int running;
 static int asynch_allowed;
-static struct devrequest setup_packet;
 
 char usb_started; /* flag for the started/stopped USB status */
 
@@ -185,23 +184,25 @@  int usb_control_msg(struct usb_device *dev, unsigned int pipe,
 			unsigned short value, unsigned short index,
 			void *data, unsigned short size, int timeout)
 {
+	ALLOC_CACHE_ALIGN_BUFFER(struct devrequest, setup_packet,
+		sizeof(struct devrequest));
 	if ((timeout == 0) && (!asynch_allowed)) {
 		/* request for a asynch control pipe is not allowed */
 		return -1;
 	}
 
 	/* set setup command */
-	setup_packet.requesttype = requesttype;
-	setup_packet.request = request;
-	setup_packet.value = cpu_to_le16(value);
-	setup_packet.index = cpu_to_le16(index);
-	setup_packet.length = cpu_to_le16(size);
+	setup_packet->requesttype = requesttype;
+	setup_packet->request = request;
+	setup_packet->value = cpu_to_le16(value);
+	setup_packet->index = cpu_to_le16(index);
+	setup_packet->length = cpu_to_le16(size);
 	USB_PRINTF("usb_control_msg: request: 0x%X, requesttype: 0x%X, " \
 		   "value 0x%X index 0x%X length 0x%X\n",
 		   request, requesttype, value, index, size);
 	dev->status = USB_ST_NOT_PROC; /*not yet processed */
 
-	submit_control_msg(dev, pipe, data, size, &setup_packet);
+	submit_control_msg(dev, pipe, data, size, setup_packet);
 	if (timeout == 0)
 		return (int)size;
 
@@ -694,7 +695,7 @@  static int usb_string_sub(struct usb_device *dev, unsigned int langid,
  */
 int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
 {
-	unsigned char mybuf[USB_BUFSIZ];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, mybuf, USB_BUFSIZ);
 	unsigned char *tbuf;
 	int err;
 	unsigned int u, idx;
@@ -794,7 +795,7 @@  int usb_new_device(struct usb_device *dev)
 {
 	int addr, err;
 	int tmp;
-	unsigned char tmpbuf[USB_BUFSIZ];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, tmpbuf, USB_BUFSIZ);
 
 	/* We still haven't set the Address yet */
 	addr = dev->devnum;
@@ -842,7 +843,10 @@  int usb_new_device(struct usb_device *dev)
 	dev->epmaxpacketin[0] = 64;
 	dev->epmaxpacketout[0] = 64;
 
-	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc, 64);
+	desc->bMaxPacketSize0 = 0;
+	/*8 bytes of the descriptor to read Max packet size*/
+	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc,
+			8);
 	if (err < 0) {
 		USB_PRINTF("usb_new_device: usb_get_descriptor() failed\n");
 		return 1;
@@ -905,7 +909,7 @@  int usb_new_device(struct usb_device *dev)
 	tmp = sizeof(dev->descriptor);
 
 	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0,
-				 &dev->descriptor, sizeof(dev->descriptor));
+				 desc, sizeof(dev->descriptor));
 	if (err < tmp) {
 		if (err < 0)
 			printf("unable to get device descriptor (error=%d)\n",
@@ -915,14 +919,18 @@  int usb_new_device(struct usb_device *dev)
 				"(expected %i, got %i)\n", tmp, err);
 		return 1;
 	}
+	dev->descriptor.bcdUSB = desc->bcdUSB;
+	dev->descriptor.idVendor = desc->idVendor;
+	dev->descriptor.idProduct = desc->idProduct;
+	dev->descriptor.bcdDevice = desc->bcdDevice;
 	/* correct le values */
 	le16_to_cpus(&dev->descriptor.bcdUSB);
 	le16_to_cpus(&dev->descriptor.idVendor);
 	le16_to_cpus(&dev->descriptor.idProduct);
 	le16_to_cpus(&dev->descriptor.bcdDevice);
 	/* only support for one config for now */
-	usb_get_configuration_no(dev, &tmpbuf[0], 0);
-	usb_parse_config(dev, &tmpbuf[0], 0);
+	usb_get_configuration_no(dev, tmpbuf, 0);
+	usb_parse_config(dev, tmpbuf, 0);
 	usb_set_maxpacket(dev);
 	/* we set the default configuration here */
 	if (usb_set_configuration(dev, dev->config.desc.bConfigurationValue)) {
@@ -1076,7 +1084,7 @@  static int hub_port_reset(struct usb_device *dev, int port,
 			unsigned short *portstat)
 {
 	int tries;
-	struct usb_port_status portsts;
+	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
 	unsigned short portstatus, portchange;
 
 	USB_HUB_PRINTF("hub_port_reset: resetting port %d...\n", port);
@@ -1085,13 +1093,13 @@  static int hub_port_reset(struct usb_device *dev, int port,
 		usb_set_port_feature(dev, port + 1, USB_PORT_FEAT_RESET);
 		wait_ms(200);
 
-		if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
+		if (usb_get_port_status(dev, port + 1, portsts) < 0) {
 			USB_HUB_PRINTF("get_port_status failed status %lX\n",
 					dev->status);
 			return -1;
 		}
-		portstatus = le16_to_cpu(portsts.wPortStatus);
-		portchange = le16_to_cpu(portsts.wPortChange);
+		portstatus = le16_to_cpu(portsts->wPortStatus);
+		portchange = le16_to_cpu(portsts->wPortChange);
 
 		USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
 				portstatus, portchange,
@@ -1129,19 +1137,19 @@  static int hub_port_reset(struct usb_device *dev, int port,
 void usb_hub_port_connect_change(struct usb_device *dev, int port)
 {
 	struct usb_device *usb;
-	struct usb_port_status portsts;
+	ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
 	unsigned short portstatus;
 
 	/* Check status */
-	if (usb_get_port_status(dev, port + 1, &portsts) < 0) {
+	if (usb_get_port_status(dev, port + 1, portsts) < 0) {
 		USB_HUB_PRINTF("get_port_status failed\n");
 		return;
 	}
 
-	portstatus = le16_to_cpu(portsts.wPortStatus);
+	portstatus = le16_to_cpu(portsts->wPortStatus);
 	USB_HUB_PRINTF("portstatus %x, change %x, %s\n",
 			portstatus,
-			le16_to_cpu(portsts.wPortChange),
+			le16_to_cpu(portsts->wPortChange),
 			portspeed(portstatus));
 
 	/* Clear the connection change status */
@@ -1190,7 +1198,8 @@  void usb_hub_port_connect_change(struct usb_device *dev, int port)
 int usb_hub_configure(struct usb_device *dev)
 {
 	int i;
-	unsigned char buffer[USB_BUFSIZ], *bitmap;
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, USB_BUFSIZ);
+	unsigned char *bitmap;
 	struct usb_hub_descriptor *descriptor;
 	struct usb_hub_device *hub;
 #ifdef USB_HUB_DEBUG
@@ -1312,16 +1321,16 @@  int usb_hub_configure(struct usb_device *dev)
 	usb_hub_power_on(hub);
 
 	for (i = 0; i < dev->maxchild; i++) {
-		struct usb_port_status portsts;
+		ALLOC_CACHE_ALIGN_BUFFER(struct usb_port_status, portsts, 1);
 		unsigned short portstatus, portchange;
 
-		if (usb_get_port_status(dev, i + 1, &portsts) < 0) {
+		if (usb_get_port_status(dev, i + 1, portsts) < 0) {
 			USB_HUB_PRINTF("get_port_status failed\n");
 			continue;
 		}
 
-		portstatus = le16_to_cpu(portsts.wPortStatus);
-		portchange = le16_to_cpu(portsts.wPortChange);
+		portstatus = le16_to_cpu(portsts->wPortStatus);
+		portchange = le16_to_cpu(portsts->wPortChange);
 		USB_HUB_PRINTF("Port %d Status %X Change %X\n",
 				i + 1, portstatus, portchange);
 
diff --git a/common/usb_storage.c b/common/usb_storage.c
index de84c8d..88ca390 100644
--- a/common/usb_storage.c
+++ b/common/usb_storage.c
@@ -79,8 +79,7 @@  static const unsigned char us_direction[256/8] = {
 };
 #define US_DIRECTION(x) ((us_direction[x>>3] >> (x & 7)) & 1)
 
-static unsigned char usb_stor_buf[512];
-static ccb usb_ccb;
+static ccb usb_ccb __attribute__((aligned(ARCH_DMA_MINALIGN)));
 
 /*
  * CBI style
@@ -210,17 +209,17 @@  int usb_stor_info(void)
 static unsigned int usb_get_max_lun(struct us_data *us)
 {
 	int len;
-	unsigned char result;
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, result, 1);
 	len = usb_control_msg(us->pusb_dev,
 			      usb_rcvctrlpipe(us->pusb_dev, 0),
 			      US_BBB_GET_MAX_LUN,
 			      USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
 			      0, us->ifnum,
-			      &result, sizeof(result),
+			      result, sizeof(char),
 			      USB_CNTL_TIMEOUT * 5);
 	USB_STOR_PRINTF("Get Max LUN -> len = %i, result = %i\n",
-			len, (int) result);
-	return (len > 0) ? result : 0;
+			len, (int) *result);
+	return (len > 0) ? *result : 0;
 }
 
 /*******************************************************************************
@@ -233,9 +232,6 @@  int usb_stor_scan(int mode)
 	unsigned char i;
 	struct usb_device *dev;
 
-	/* GJ */
-	memset(usb_stor_buf, 0, sizeof(usb_stor_buf));
-
 	if (mode == 1)
 		printf("       scanning bus for storage devices... ");
 
@@ -499,7 +495,7 @@  int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
 	int actlen;
 	int dir_in;
 	unsigned int pipe;
-	umass_bbb_cbw_t cbw;
+	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_cbw_t, cbw, 1);
 
 	dir_in = US_DIRECTION(srb->cmd[0]);
 
@@ -522,16 +518,16 @@  int usb_stor_BBB_comdat(ccb *srb, struct us_data *us)
 	/* always OUT to the ep */
 	pipe = usb_sndbulkpipe(us->pusb_dev, us->ep_out);
 
-	cbw.dCBWSignature = cpu_to_le32(CBWSIGNATURE);
-	cbw.dCBWTag = cpu_to_le32(CBWTag++);
-	cbw.dCBWDataTransferLength = cpu_to_le32(srb->datalen);
-	cbw.bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
-	cbw.bCBWLUN = srb->lun;
-	cbw.bCDBLength = srb->cmdlen;
+	cbw->dCBWSignature = cpu_to_le32(CBWSIGNATURE);
+	cbw->dCBWTag = cpu_to_le32(CBWTag++);
+	cbw->dCBWDataTransferLength = cpu_to_le32(srb->datalen);
+	cbw->bCBWFlags = (dir_in ? CBWFLAGS_IN : CBWFLAGS_OUT);
+	cbw->bCBWLUN = srb->lun;
+	cbw->bCDBLength = srb->cmdlen;
 	/* copy the command data into the CBW command data buffer */
 	/* DST SRC LEN!!! */
-	memcpy(cbw.CBWCDB, srb->cmd, srb->cmdlen);
-	result = usb_bulk_msg(us->pusb_dev, pipe, &cbw, UMASS_BBB_CBW_SIZE,
+	memcpy(cbw->CBWCDB, srb->cmd, srb->cmdlen);
+	result = usb_bulk_msg(us->pusb_dev, pipe, cbw, UMASS_BBB_CBW_SIZE,
 			      &actlen, USB_CNTL_TIMEOUT * 5);
 	if (result < 0)
 		USB_STOR_PRINTF("usb_stor_BBB_comdat:usb_bulk_msg error\n");
@@ -675,7 +671,7 @@  int usb_stor_BBB_transport(ccb *srb, struct us_data *us)
 	int dir_in;
 	int actlen, data_actlen;
 	unsigned int pipe, pipein, pipeout;
-	umass_bbb_csw_t csw;
+	ALLOC_CACHE_ALIGN_BUFFER(umass_bbb_csw_t, csw, 1);
 #ifdef BBB_XPORT_TRACE
 	unsigned char *ptr;
 	int index;
@@ -733,7 +729,7 @@  st:
 	retry = 0;
 again:
 	USB_STOR_PRINTF("STATUS phase\n");
-	result = usb_bulk_msg(us->pusb_dev, pipein, &csw, UMASS_BBB_CSW_SIZE,
+	result = usb_bulk_msg(us->pusb_dev, pipein, csw, UMASS_BBB_CSW_SIZE,
 				&actlen, USB_CNTL_TIMEOUT*5);
 
 	/* special handling of STALL in STATUS phase */
@@ -753,28 +749,28 @@  again:
 		return USB_STOR_TRANSPORT_FAILED;
 	}
 #ifdef BBB_XPORT_TRACE
-	ptr = (unsigned char *)&csw;
+	ptr = (unsigned char *)csw;
 	for (index = 0; index < UMASS_BBB_CSW_SIZE; index++)
 		printf("ptr[%d] %#x ", index, ptr[index]);
 	printf("\n");
 #endif
 	/* misuse pipe to get the residue */
-	pipe = le32_to_cpu(csw.dCSWDataResidue);
+	pipe = le32_to_cpu(csw->dCSWDataResidue);
 	if (pipe == 0 && srb->datalen != 0 && srb->datalen - data_actlen != 0)
 		pipe = srb->datalen - data_actlen;
-	if (CSWSIGNATURE != le32_to_cpu(csw.dCSWSignature)) {
+	if (CSWSIGNATURE != le32_to_cpu(csw->dCSWSignature)) {
 		USB_STOR_PRINTF("!CSWSIGNATURE\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if ((CBWTag - 1) != le32_to_cpu(csw.dCSWTag)) {
+	} else if ((CBWTag - 1) != le32_to_cpu(csw->dCSWTag)) {
 		USB_STOR_PRINTF("!Tag\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if (csw.bCSWStatus > CSWSTATUS_PHASE) {
+	} else if (csw->bCSWStatus > CSWSTATUS_PHASE) {
 		USB_STOR_PRINTF(">PHASE\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if (csw.bCSWStatus == CSWSTATUS_PHASE) {
+	} else if (csw->bCSWStatus == CSWSTATUS_PHASE) {
 		USB_STOR_PRINTF("=PHASE\n");
 		usb_stor_BBB_reset(us);
 		return USB_STOR_TRANSPORT_FAILED;
@@ -782,7 +778,7 @@  again:
 		USB_STOR_PRINTF("transferred %dB instead of %ldB\n",
 			data_actlen, srb->datalen);
 		return USB_STOR_TRANSPORT_FAILED;
-	} else if (csw.bCSWStatus == CSWSTATUS_FAILED) {
+	} else if (csw->bCSWStatus == CSWSTATUS_FAILED) {
 		USB_STOR_PRINTF("FAILED\n");
 		return USB_STOR_TRANSPORT_FAILED;
 	}
@@ -1343,7 +1339,8 @@  int usb_stor_get_info(struct usb_device *dev, struct us_data *ss,
 		      block_dev_desc_t *dev_desc)
 {
 	unsigned char perq, modi;
-	unsigned long cap[2];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned long, cap, 2);
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, usb_stor_buf, 36);
 	unsigned long *capacity, *blksz;
 	ccb *pccb = &usb_ccb;
 
@@ -1367,9 +1364,9 @@  int usb_stor_get_info(struct usb_device *dev, struct us_data *ss,
 		/* drive is removable */
 		dev_desc->removable = 1;
 	}
-	memcpy(&dev_desc->vendor[0], &usb_stor_buf[8], 8);
-	memcpy(&dev_desc->product[0], &usb_stor_buf[16], 16);
-	memcpy(&dev_desc->revision[0], &usb_stor_buf[32], 4);
+	memcpy(&dev_desc->vendor[0], (const void *) &usb_stor_buf[8], 8);
+	memcpy(&dev_desc->product[0], (const void *) &usb_stor_buf[16], 16);
+	memcpy(&dev_desc->revision[0], (const void *) &usb_stor_buf[32], 4);
 	dev_desc->vendor[8] = 0;
 	dev_desc->product[16] = 0;
 	dev_desc->revision[4] = 0;
diff --git a/disk/part_dos.c b/disk/part_dos.c
index b5bcb37..70211ee 100644
--- a/disk/part_dos.c
+++ b/disk/part_dos.c
@@ -87,7 +87,7 @@  static int test_block_type(unsigned char *buffer)
 
 int test_part_dos (block_dev_desc_t *dev_desc)
 {
-	unsigned char buffer[dev_desc->blksz];
+	ALLOC_CACHE_ALIGN_BUFFER(unsigned char, buffer, dev_desc->blksz);
 
 	if ((dev_desc->block_read(dev_desc->dev, 0, 1, (ulong *) buffer) != 1) ||
 	    (buffer[DOS_PART_MAGIC_OFFSET + 0] != 0x55) ||
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d893b2a..eb5220b 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -120,6 +120,14 @@  static struct descriptor {
  */
 static void flush_invalidate(u32 addr, int size, int flush)
 {
+	/*
+	 * Size is the bytes actually moved during transaction,
+	 * which may not equal to the cache line. This results
+	 * stop address passed for invalidating cache may not be aligned.
+	 * Therfore making size as multiple of cache line size.
+	 */
+	size = ALIGN(size, ARCH_DMA_MINALIGN);
+
 	if (flush)
 		flush_dcache_range(addr, addr + size);
 	else
diff --git a/include/scsi.h b/include/scsi.h
index c52759c..89ae45f 100644
--- a/include/scsi.h
+++ b/include/scsi.h
@@ -26,7 +26,9 @@ 
 
 typedef struct SCSI_cmd_block{
 	unsigned char		cmd[16];					/* command				   */
-	unsigned char		sense_buf[64];		/* for request sense */
+	/* for request sense */
+	unsigned char		sense_buf[64]
+		__attribute__((aligned(ARCH_DMA_MINALIGN)));
 	unsigned char		status;						/* SCSI Status			 */
 	unsigned char		target;						/* Target ID				 */
 	unsigned char		lun;							/* Target LUN        */