Patchwork Oneiric SRU: xHCI: AMD isoc link TRB chain bit quirk

login
register
mail settings
Submitter Tim Gardner
Date Oct. 13, 2011, 4:42 p.m.
Message ID <20111013164228.C2250F88CF@lochsa.rtg.net>
Download mbox | patch
Permalink /patch/119568/
State New
Headers show

Comments

Tim Gardner - Oct. 13, 2011, 4:42 p.m.
From 931a2ec0f2c74498f861c75c44f973c615285b11 Mon Sep 17 00:00:00 2001
From: Andiry Xu <andiry.xu@amd.com>
Date: Fri, 23 Sep 2011 14:19:54 -0700
Subject: [PATCH] UBUNTU: SAUCE: xHCI: AMD isoc link TRB chain bit quirk

BugLink: http://bugs.launchpad.net/bugs/872811

Setting the chain (CH) bit in the link TRB of isochronous transfer rings
is required by AMD 0.96 xHCI host controller to successfully transverse
multi-TRB TD that span through different memory segments.

When a Missed Service Error event occurs, if the chain bit is not set in
the link TRB and the host skips TDs which just across a link TRB, the
host may falsely recognize the link TRB as a normal TRB. You can see
this may cause big trouble - the host does not jump to the right address
which is pointed by the link TRB, but continue fetching the memory which
is after the link TRB address, which may not even belong to the host,
and the result cannot be predicted.

This causes some big problems. Without the former patch I sent: "xHCI:
prevent infinite loop when processing MSE event", the system may hang.
With that patch applied, system does not hang, but the host still access
wrong memory address and isoc transfer will fail. With this patch,
isochronous transfer works as expected.

This patch should be applied to kernels as old as 2.6.36, which was when
the first isochronous support was added for the xHCI host controller.

Signed-off-by: Andiry Xu <andiry.xu@amd.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

cherry-picked from git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-next
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
---
 drivers/usb/host/xhci-mem.c  |   32 ++++++++++++++-----------
 drivers/usb/host/xhci-pci.c  |    3 ++
 drivers/usb/host/xhci-ring.c |   53 +++++++++++++++++++++++-------------------
 drivers/usb/host/xhci.h      |    1 +
 4 files changed, 51 insertions(+), 38 deletions(-)
Leann Ogasawara - Oct. 13, 2011, 7:43 p.m.
On Thu, 2011-10-13 at 10:42 -0600, Tim Gardner wrote:
> From 931a2ec0f2c74498f861c75c44f973c615285b11 Mon Sep 17 00:00:00 2001
> From: Andiry Xu <andiry.xu@amd.com>
> Date: Fri, 23 Sep 2011 14:19:54 -0700
> Subject: [PATCH] UBUNTU: SAUCE: xHCI: AMD isoc link TRB chain bit quirk
> 
> BugLink: http://bugs.launchpad.net/bugs/872811
> 
> Setting the chain (CH) bit in the link TRB of isochronous transfer rings
> is required by AMD 0.96 xHCI host controller to successfully transverse
> multi-TRB TD that span through different memory segments.
> 
> When a Missed Service Error event occurs, if the chain bit is not set in
> the link TRB and the host skips TDs which just across a link TRB, the
> host may falsely recognize the link TRB as a normal TRB. You can see
> this may cause big trouble - the host does not jump to the right address
> which is pointed by the link TRB, but continue fetching the memory which
> is after the link TRB address, which may not even belong to the host,
> and the result cannot be predicted.
> 
> This causes some big problems. Without the former patch I sent: "xHCI:
> prevent infinite loop when processing MSE event", the system may hang.
> With that patch applied, system does not hang, but the host still access
> wrong memory address and isoc transfer will fail. With this patch,
> isochronous transfer works as expected.
> 
> This patch should be applied to kernels as old as 2.6.36, which was when
> the first isochronous support was added for the xHCI host controller.
> 
> Signed-off-by: Andiry Xu <andiry.xu@amd.com>
> Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
> Cc: stable@kernel.org
> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
> 
> cherry-picked from git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-next
> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>

Acked-by: Leann Ogasawara <leann.ogasawara@canonical.com>

> ---
>  drivers/usb/host/xhci-mem.c  |   32 ++++++++++++++-----------
>  drivers/usb/host/xhci-pci.c  |    3 ++
>  drivers/usb/host/xhci-ring.c |   53 +++++++++++++++++++++++-------------------
>  drivers/usb/host/xhci.h      |    1 +
>  4 files changed, 51 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
> index fcb7f7e..2d671af 100644
> --- a/drivers/usb/host/xhci-mem.c
> +++ b/drivers/usb/host/xhci-mem.c
> @@ -81,7 +81,7 @@ static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg)
>   * related flags, such as End TRB, Toggle Cycle, and no snoop.
>   */
>  static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
> -		struct xhci_segment *next, bool link_trbs)
> +		struct xhci_segment *next, bool link_trbs, bool isoc)
>  {
>  	u32 val;
>  
> @@ -97,7 +97,9 @@ static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
>  		val &= ~TRB_TYPE_BITMASK;
>  		val |= TRB_TYPE(TRB_LINK);
>  		/* Always set the chain bit with 0.95 hardware */
> -		if (xhci_link_trb_quirk(xhci))
> +		/* Set chain bit for isoc rings on AMD 0.96 host */
> +		if (xhci_link_trb_quirk(xhci) ||
> +				(isoc && (xhci->quirks & XHCI_AMD_0x96_HOST)))
>  			val |= TRB_CHAIN;
>  		prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val);
>  	}
> @@ -152,7 +154,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring)
>   * See section 4.9.1 and figures 15 and 16.
>   */
>  static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
> -		unsigned int num_segs, bool link_trbs, gfp_t flags)
> +		unsigned int num_segs, bool link_trbs, bool isoc, gfp_t flags)
>  {
>  	struct xhci_ring	*ring;
>  	struct xhci_segment	*prev;
> @@ -178,12 +180,12 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
>  		next = xhci_segment_alloc(xhci, flags);
>  		if (!next)
>  			goto fail;
> -		xhci_link_segments(xhci, prev, next, link_trbs);
> +		xhci_link_segments(xhci, prev, next, link_trbs, isoc);
>  
>  		prev = next;
>  		num_segs--;
>  	}
> -	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs);
> +	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs, isoc);
>  
>  	if (link_trbs) {
>  		/* See section 4.9.2.1 and 6.4.4.1 */
> @@ -229,14 +231,14 @@ void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci,
>   * pointers to the beginning of the ring.
>   */
>  static void xhci_reinit_cached_ring(struct xhci_hcd *xhci,
> -		struct xhci_ring *ring)
> +		struct xhci_ring *ring, bool isoc)
>  {
>  	struct xhci_segment	*seg = ring->first_seg;
>  	do {
>  		memset(seg->trbs, 0,
>  				sizeof(union xhci_trb)*TRBS_PER_SEGMENT);
>  		/* All endpoint rings have link TRBs */
> -		xhci_link_segments(xhci, seg, seg->next, 1);
> +		xhci_link_segments(xhci, seg, seg->next, 1, isoc);
>  		seg = seg->next;
>  	} while (seg != ring->first_seg);
>  	xhci_initialize_ring_info(ring);
> @@ -540,7 +542,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
>  	 */
>  	for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
>  		stream_info->stream_rings[cur_stream] =
> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>  		cur_ring = stream_info->stream_rings[cur_stream];
>  		if (!cur_ring)
>  			goto cleanup_rings;
> @@ -765,7 +767,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
>  	}
>  
>  	/* Allocate endpoint 0 ring */
> -	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, flags);
> +	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>  	if (!dev->eps[0].ring)
>  		goto fail;
>  
> @@ -1175,10 +1177,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>  	 */
>  	if (usb_endpoint_xfer_isoc(&ep->desc))
>  		virt_dev->eps[ep_index].new_ring =
> -			xhci_ring_alloc(xhci, 8, true, mem_flags);
> +			xhci_ring_alloc(xhci, 8, true, true, mem_flags);
>  	else
>  		virt_dev->eps[ep_index].new_ring =
> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>  	if (!virt_dev->eps[ep_index].new_ring) {
>  		/* Attempt to use the ring cache */
>  		if (virt_dev->num_rings_cached == 0)
> @@ -1187,7 +1189,8 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>  			virt_dev->ring_cache[virt_dev->num_rings_cached];
>  		virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
>  		virt_dev->num_rings_cached--;
> -		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring);
> +		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
> +			usb_endpoint_xfer_isoc(&ep->desc) ? true : false);
>  	}
>  	virt_dev->eps[ep_index].skip = false;
>  	ep_ring = virt_dev->eps[ep_index].new_ring;
> @@ -2001,7 +2004,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>  		goto fail;
>  
>  	/* Set up the command ring to have one segments for now. */
> -	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, flags);
> +	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>  	if (!xhci->cmd_ring)
>  		goto fail;
>  	xhci_dbg(xhci, "Allocated command ring at %p\n", xhci->cmd_ring);
> @@ -2032,7 +2035,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>  	 * the event ring segment table (ERST).  Section 4.9.3.
>  	 */
>  	xhci_dbg(xhci, "// Allocating event ring\n");
> -	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, flags);
> +	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, false,
> +						flags);
>  	if (!xhci->event_ring)
>  		goto fail;
>  	if (xhci_check_trb_in_td_math(xhci, flags) < 0)
> diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
> index cb16de2..50e7156 100644
> --- a/drivers/usb/host/xhci-pci.c
> +++ b/drivers/usb/host/xhci-pci.c
> @@ -128,6 +128,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
>  	if (pdev->vendor == PCI_VENDOR_ID_NEC)
>  		xhci->quirks |= XHCI_NEC_HOST;
>  
> +	if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96)
> +		xhci->quirks |= XHCI_AMD_0x96_HOST;
> +
>  	/* AMD PLL quirk */
>  	if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
>  		xhci->quirks |= XHCI_AMD_PLL_FIX;
> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
> index d0871ea..e64bd6f 100644
> --- a/drivers/usb/host/xhci-ring.c
> +++ b/drivers/usb/host/xhci-ring.c
> @@ -187,7 +187,7 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer
>   *			prepare_transfer()?
>   */
>  static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
> -		bool consumer, bool more_trbs_coming)
> +		bool consumer, bool more_trbs_coming, bool isoc)
>  {
>  	u32 chain;
>  	union xhci_trb *next;
> @@ -214,11 +214,13 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
>  				if (!chain && !more_trbs_coming)
>  					break;
>  
> -				/* If we're not dealing with 0.95 hardware,
> +				/* If we're not dealing with 0.95 hardware or
> +				 * isoc rings on AMD 0.96 host,
>  				 * carry over the chain bit of the previous TRB
>  				 * (which may mean the chain bit is cleared).
>  				 */
> -				if (!xhci_link_trb_quirk(xhci)) {
> +				if (!(isoc && (xhci->quirks & XHCI_AMD_0x96_HOST))
> +						&& !xhci_link_trb_quirk(xhci)) {
>  					next->link.control &=
>  						cpu_to_le32(~TRB_CHAIN);
>  					next->link.control |=
> @@ -2398,7 +2400,7 @@ irqreturn_t xhci_msi_irq(int irq, struct usb_hcd *hcd)
>   *			prepare_transfer()?
>   */
>  static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
> -		bool consumer, bool more_trbs_coming,
> +		bool consumer, bool more_trbs_coming, bool isoc,
>  		u32 field1, u32 field2, u32 field3, u32 field4)
>  {
>  	struct xhci_generic_trb *trb;
> @@ -2408,7 +2410,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>  	trb->field[1] = cpu_to_le32(field2);
>  	trb->field[2] = cpu_to_le32(field3);
>  	trb->field[3] = cpu_to_le32(field4);
> -	inc_enq(xhci, ring, consumer, more_trbs_coming);
> +	inc_enq(xhci, ring, consumer, more_trbs_coming, isoc);
>  }
>  
>  /*
> @@ -2416,7 +2418,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>   * FIXME allocate segments if the ring is full.
>   */
>  static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
> -		u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
> +		u32 ep_state, unsigned int num_trbs, bool isoc, gfp_t mem_flags)
>  {
>  	/* Make sure the endpoint has been added to xHC schedule */
>  	switch (ep_state) {
> @@ -2458,10 +2460,11 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
>  		next = ring->enqueue;
>  
>  		while (last_trb(xhci, ring, ring->enq_seg, next)) {
> -			/* If we're not dealing with 0.95 hardware,
> -			 * clear the chain bit.
> +			/* If we're not dealing with 0.95 hardware or isoc rings
> +			 * on AMD 0.96 host, clear the chain bit.
>  			 */
> -			if (!xhci_link_trb_quirk(xhci))
> +			if (!xhci_link_trb_quirk(xhci) && !(isoc &&
> +					(xhci->quirks & XHCI_AMD_0x96_HOST)))
>  				next->link.control &= cpu_to_le32(~TRB_CHAIN);
>  			else
>  				next->link.control |= cpu_to_le32(TRB_CHAIN);
> @@ -2494,6 +2497,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>  		unsigned int num_trbs,
>  		struct urb *urb,
>  		unsigned int td_index,
> +		bool isoc,
>  		gfp_t mem_flags)
>  {
>  	int ret;
> @@ -2511,7 +2515,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>  
>  	ret = prepare_ring(xhci, ep_ring,
>  			   le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
> -			   num_trbs, mem_flags);
> +			   num_trbs, isoc, mem_flags);
>  	if (ret)
>  		return ret;
>  
> @@ -2734,7 +2738,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  
>  	trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id],
>  			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>  	if (trb_buff_len < 0)
>  		return trb_buff_len;
>  
> @@ -2829,7 +2833,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  			more_trbs_coming = true;
>  		else
>  			more_trbs_coming = false;
> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>  				lower_32_bits(addr),
>  				upper_32_bits(addr),
>  				length_field,
> @@ -2920,7 +2924,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  
>  	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>  			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>  	if (ret < 0)
>  		return ret;
>  
> @@ -2992,7 +2996,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  			more_trbs_coming = true;
>  		else
>  			more_trbs_coming = false;
> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>  				lower_32_bits(addr),
>  				upper_32_bits(addr),
>  				length_field,
> @@ -3052,7 +3056,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		num_trbs++;
>  	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>  			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>  	if (ret < 0)
>  		return ret;
>  
> @@ -3085,7 +3089,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		}
>  	}
>  
> -	queue_trb(xhci, ep_ring, false, true,
> +	queue_trb(xhci, ep_ring, false, true, false,
>  		  setup->bRequestType | setup->bRequest << 8 | le16_to_cpu(setup->wValue) << 16,
>  		  le16_to_cpu(setup->wIndex) | le16_to_cpu(setup->wLength) << 16,
>  		  TRB_LEN(8) | TRB_INTR_TARGET(0),
> @@ -3105,7 +3109,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  	if (urb->transfer_buffer_length > 0) {
>  		if (setup->bRequestType & USB_DIR_IN)
>  			field |= TRB_DIR_IN;
> -		queue_trb(xhci, ep_ring, false, true,
> +		queue_trb(xhci, ep_ring, false, true, false,
>  				lower_32_bits(urb->transfer_dma),
>  				upper_32_bits(urb->transfer_dma),
>  				length_field,
> @@ -3121,7 +3125,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		field = 0;
>  	else
>  		field = TRB_DIR_IN;
> -	queue_trb(xhci, ep_ring, false, false,
> +	queue_trb(xhci, ep_ring, false, false, false,
>  			0,
>  			0,
>  			TRB_INTR_TARGET(0),
> @@ -3270,7 +3274,8 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		trbs_per_td = count_isoc_trbs_needed(xhci, urb, i);
>  
>  		ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
> -				urb->stream_id, trbs_per_td, urb, i, mem_flags);
> +				urb->stream_id, trbs_per_td, urb, i, true,
> +				mem_flags);
>  		if (ret < 0) {
>  			if (i == 0)
>  				return ret;
> @@ -3340,7 +3345,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  				remainder |
>  				TRB_INTR_TARGET(0);
>  
> -			queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +			queue_trb(xhci, ep_ring, false, more_trbs_coming, true,
>  				lower_32_bits(addr),
>  				upper_32_bits(addr),
>  				length_field,
> @@ -3422,7 +3427,7 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags,
>  	 * Do not insert any td of the urb to the ring if the check failed.
>  	 */
>  	ret = prepare_ring(xhci, ep_ring, le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
> -			   num_trbs, mem_flags);
> +			   num_trbs, true, mem_flags);
>  	if (ret)
>  		return ret;
>  
> @@ -3481,7 +3486,7 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>  		reserved_trbs++;
>  
>  	ret = prepare_ring(xhci, xhci->cmd_ring, EP_STATE_RUNNING,
> -			reserved_trbs, GFP_ATOMIC);
> +			reserved_trbs, false, GFP_ATOMIC);
>  	if (ret < 0) {
>  		xhci_err(xhci, "ERR: No room for command on command ring\n");
>  		if (command_must_succeed)
> @@ -3489,8 +3494,8 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>  					"unfailable commands failed.\n");
>  		return ret;
>  	}
> -	queue_trb(xhci, xhci->cmd_ring, false, false, field1, field2, field3,
> -			field4 | xhci->cmd_ring->cycle_state);
> +	queue_trb(xhci, xhci->cmd_ring, false, false, false, field1, field2,
> +			field3,	field4 | xhci->cmd_ring->cycle_state);
>  	return 0;
>  }
>  
> diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
> index d8bbf5c..8a98416 100644
> --- a/drivers/usb/host/xhci.h
> +++ b/drivers/usb/host/xhci.h
> @@ -1311,6 +1311,7 @@ struct xhci_hcd {
>  #define XHCI_EP_LIMIT_QUIRK	(1 << 5)
>  #define XHCI_BROKEN_MSI		(1 << 6)
>  #define XHCI_RESET_ON_RESUME	(1 << 7)
> +#define XHCI_AMD_0x96_HOST	(1 << 9)
>  	unsigned int		num_active_eps;
>  	unsigned int		limit_active_eps;
>  	/* There are two roothubs to keep track of bus suspend info for */
> -- 
> 1.7.0.4
> 
>
Andy Whitcroft - Oct. 14, 2011, 3:01 p.m.
On Thu, Oct 13, 2011 at 10:42:28AM -0600, Tim Gardner wrote:
> From 931a2ec0f2c74498f861c75c44f973c615285b11 Mon Sep 17 00:00:00 2001
> From: Andiry Xu <andiry.xu@amd.com>
> Date: Fri, 23 Sep 2011 14:19:54 -0700
> Subject: [PATCH] UBUNTU: SAUCE: xHCI: AMD isoc link TRB chain bit quirk
> 
> BugLink: http://bugs.launchpad.net/bugs/872811
> 
> Setting the chain (CH) bit in the link TRB of isochronous transfer rings
> is required by AMD 0.96 xHCI host controller to successfully transverse
> multi-TRB TD that span through different memory segments.
> 
> When a Missed Service Error event occurs, if the chain bit is not set in
> the link TRB and the host skips TDs which just across a link TRB, the
> host may falsely recognize the link TRB as a normal TRB. You can see
> this may cause big trouble - the host does not jump to the right address
> which is pointed by the link TRB, but continue fetching the memory which
> is after the link TRB address, which may not even belong to the host,
> and the result cannot be predicted.
> 
> This causes some big problems. Without the former patch I sent: "xHCI:
> prevent infinite loop when processing MSE event", the system may hang.
> With that patch applied, system does not hang, but the host still access
> wrong memory address and isoc transfer will fail. With this patch,
> isochronous transfer works as expected.
> 
> This patch should be applied to kernels as old as 2.6.36, which was when
> the first isochronous support was added for the xHCI host controller.
> 
> Signed-off-by: Andiry Xu <andiry.xu@amd.com>
> Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
> Cc: stable@kernel.org
> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

Are we not expecting this via stable?  I assume we'll drop this when
that arrives.

> cherry-picked from git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-next
> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
> ---
>  drivers/usb/host/xhci-mem.c  |   32 ++++++++++++++-----------
>  drivers/usb/host/xhci-pci.c  |    3 ++
>  drivers/usb/host/xhci-ring.c |   53 +++++++++++++++++++++++-------------------
>  drivers/usb/host/xhci.h      |    1 +
>  4 files changed, 51 insertions(+), 38 deletions(-)
> 
> diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
> index fcb7f7e..2d671af 100644
> --- a/drivers/usb/host/xhci-mem.c
> +++ b/drivers/usb/host/xhci-mem.c
> @@ -81,7 +81,7 @@ static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg)
>   * related flags, such as End TRB, Toggle Cycle, and no snoop.
>   */
>  static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
> -		struct xhci_segment *next, bool link_trbs)
> +		struct xhci_segment *next, bool link_trbs, bool isoc)
>  {
>  	u32 val;
>  
> @@ -97,7 +97,9 @@ static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
>  		val &= ~TRB_TYPE_BITMASK;
>  		val |= TRB_TYPE(TRB_LINK);
>  		/* Always set the chain bit with 0.95 hardware */
> -		if (xhci_link_trb_quirk(xhci))
> +		/* Set chain bit for isoc rings on AMD 0.96 host */
> +		if (xhci_link_trb_quirk(xhci) ||
> +				(isoc && (xhci->quirks & XHCI_AMD_0x96_HOST)))
>  			val |= TRB_CHAIN;
>  		prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val);
>  	}
> @@ -152,7 +154,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring)
>   * See section 4.9.1 and figures 15 and 16.
>   */
>  static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
> -		unsigned int num_segs, bool link_trbs, gfp_t flags)
> +		unsigned int num_segs, bool link_trbs, bool isoc, gfp_t flags)
>  {
>  	struct xhci_ring	*ring;
>  	struct xhci_segment	*prev;
> @@ -178,12 +180,12 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
>  		next = xhci_segment_alloc(xhci, flags);
>  		if (!next)
>  			goto fail;
> -		xhci_link_segments(xhci, prev, next, link_trbs);
> +		xhci_link_segments(xhci, prev, next, link_trbs, isoc);
>  
>  		prev = next;
>  		num_segs--;
>  	}
> -	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs);
> +	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs, isoc);
>  
>  	if (link_trbs) {
>  		/* See section 4.9.2.1 and 6.4.4.1 */
> @@ -229,14 +231,14 @@ void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci,
>   * pointers to the beginning of the ring.
>   */
>  static void xhci_reinit_cached_ring(struct xhci_hcd *xhci,
> -		struct xhci_ring *ring)
> +		struct xhci_ring *ring, bool isoc)
>  {
>  	struct xhci_segment	*seg = ring->first_seg;
>  	do {
>  		memset(seg->trbs, 0,
>  				sizeof(union xhci_trb)*TRBS_PER_SEGMENT);
>  		/* All endpoint rings have link TRBs */
> -		xhci_link_segments(xhci, seg, seg->next, 1);
> +		xhci_link_segments(xhci, seg, seg->next, 1, isoc);
>  		seg = seg->next;
>  	} while (seg != ring->first_seg);
>  	xhci_initialize_ring_info(ring);
> @@ -540,7 +542,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
>  	 */
>  	for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
>  		stream_info->stream_rings[cur_stream] =
> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>  		cur_ring = stream_info->stream_rings[cur_stream];
>  		if (!cur_ring)
>  			goto cleanup_rings;
> @@ -765,7 +767,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
>  	}
>  
>  	/* Allocate endpoint 0 ring */
> -	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, flags);
> +	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>  	if (!dev->eps[0].ring)
>  		goto fail;
>  
> @@ -1175,10 +1177,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>  	 */
>  	if (usb_endpoint_xfer_isoc(&ep->desc))
>  		virt_dev->eps[ep_index].new_ring =
> -			xhci_ring_alloc(xhci, 8, true, mem_flags);
> +			xhci_ring_alloc(xhci, 8, true, true, mem_flags);
>  	else
>  		virt_dev->eps[ep_index].new_ring =
> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>  	if (!virt_dev->eps[ep_index].new_ring) {
>  		/* Attempt to use the ring cache */
>  		if (virt_dev->num_rings_cached == 0)
> @@ -1187,7 +1189,8 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>  			virt_dev->ring_cache[virt_dev->num_rings_cached];
>  		virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
>  		virt_dev->num_rings_cached--;
> -		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring);
> +		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
> +			usb_endpoint_xfer_isoc(&ep->desc) ? true : false);
>  	}
>  	virt_dev->eps[ep_index].skip = false;
>  	ep_ring = virt_dev->eps[ep_index].new_ring;
> @@ -2001,7 +2004,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>  		goto fail;
>  
>  	/* Set up the command ring to have one segments for now. */
> -	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, flags);
> +	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>  	if (!xhci->cmd_ring)
>  		goto fail;
>  	xhci_dbg(xhci, "Allocated command ring at %p\n", xhci->cmd_ring);
> @@ -2032,7 +2035,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>  	 * the event ring segment table (ERST).  Section 4.9.3.
>  	 */
>  	xhci_dbg(xhci, "// Allocating event ring\n");
> -	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, flags);
> +	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, false,
> +						flags);
>  	if (!xhci->event_ring)
>  		goto fail;
>  	if (xhci_check_trb_in_td_math(xhci, flags) < 0)
> diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
> index cb16de2..50e7156 100644
> --- a/drivers/usb/host/xhci-pci.c
> +++ b/drivers/usb/host/xhci-pci.c
> @@ -128,6 +128,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
>  	if (pdev->vendor == PCI_VENDOR_ID_NEC)
>  		xhci->quirks |= XHCI_NEC_HOST;
>  
> +	if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96)
> +		xhci->quirks |= XHCI_AMD_0x96_HOST;
> +
>  	/* AMD PLL quirk */
>  	if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
>  		xhci->quirks |= XHCI_AMD_PLL_FIX;
> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
> index d0871ea..e64bd6f 100644
> --- a/drivers/usb/host/xhci-ring.c
> +++ b/drivers/usb/host/xhci-ring.c
> @@ -187,7 +187,7 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer
>   *			prepare_transfer()?
>   */
>  static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
> -		bool consumer, bool more_trbs_coming)
> +		bool consumer, bool more_trbs_coming, bool isoc)
>  {
>  	u32 chain;
>  	union xhci_trb *next;
> @@ -214,11 +214,13 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
>  				if (!chain && !more_trbs_coming)
>  					break;
>  
> -				/* If we're not dealing with 0.95 hardware,
> +				/* If we're not dealing with 0.95 hardware or
> +				 * isoc rings on AMD 0.96 host,
>  				 * carry over the chain bit of the previous TRB
>  				 * (which may mean the chain bit is cleared).
>  				 */
> -				if (!xhci_link_trb_quirk(xhci)) {
> +				if (!(isoc && (xhci->quirks & XHCI_AMD_0x96_HOST))
> +						&& !xhci_link_trb_quirk(xhci)) {
>  					next->link.control &=
>  						cpu_to_le32(~TRB_CHAIN);
>  					next->link.control |=
> @@ -2398,7 +2400,7 @@ irqreturn_t xhci_msi_irq(int irq, struct usb_hcd *hcd)
>   *			prepare_transfer()?
>   */
>  static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
> -		bool consumer, bool more_trbs_coming,
> +		bool consumer, bool more_trbs_coming, bool isoc,
>  		u32 field1, u32 field2, u32 field3, u32 field4)
>  {
>  	struct xhci_generic_trb *trb;
> @@ -2408,7 +2410,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>  	trb->field[1] = cpu_to_le32(field2);
>  	trb->field[2] = cpu_to_le32(field3);
>  	trb->field[3] = cpu_to_le32(field4);
> -	inc_enq(xhci, ring, consumer, more_trbs_coming);
> +	inc_enq(xhci, ring, consumer, more_trbs_coming, isoc);
>  }
>  
>  /*
> @@ -2416,7 +2418,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>   * FIXME allocate segments if the ring is full.
>   */
>  static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
> -		u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
> +		u32 ep_state, unsigned int num_trbs, bool isoc, gfp_t mem_flags)
>  {
>  	/* Make sure the endpoint has been added to xHC schedule */
>  	switch (ep_state) {
> @@ -2458,10 +2460,11 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
>  		next = ring->enqueue;
>  
>  		while (last_trb(xhci, ring, ring->enq_seg, next)) {
> -			/* If we're not dealing with 0.95 hardware,
> -			 * clear the chain bit.
> +			/* If we're not dealing with 0.95 hardware or isoc rings
> +			 * on AMD 0.96 host, clear the chain bit.
>  			 */
> -			if (!xhci_link_trb_quirk(xhci))
> +			if (!xhci_link_trb_quirk(xhci) && !(isoc &&
> +					(xhci->quirks & XHCI_AMD_0x96_HOST)))
>  				next->link.control &= cpu_to_le32(~TRB_CHAIN);
>  			else
>  				next->link.control |= cpu_to_le32(TRB_CHAIN);
> @@ -2494,6 +2497,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>  		unsigned int num_trbs,
>  		struct urb *urb,
>  		unsigned int td_index,
> +		bool isoc,
>  		gfp_t mem_flags)
>  {
>  	int ret;
> @@ -2511,7 +2515,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>  
>  	ret = prepare_ring(xhci, ep_ring,
>  			   le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
> -			   num_trbs, mem_flags);
> +			   num_trbs, isoc, mem_flags);
>  	if (ret)
>  		return ret;
>  
> @@ -2734,7 +2738,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  
>  	trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id],
>  			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>  	if (trb_buff_len < 0)
>  		return trb_buff_len;
>  
> @@ -2829,7 +2833,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  			more_trbs_coming = true;
>  		else
>  			more_trbs_coming = false;
> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>  				lower_32_bits(addr),
>  				upper_32_bits(addr),
>  				length_field,
> @@ -2920,7 +2924,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  
>  	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>  			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>  	if (ret < 0)
>  		return ret;
>  
> @@ -2992,7 +2996,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  			more_trbs_coming = true;
>  		else
>  			more_trbs_coming = false;
> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>  				lower_32_bits(addr),
>  				upper_32_bits(addr),
>  				length_field,
> @@ -3052,7 +3056,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		num_trbs++;
>  	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>  			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>  	if (ret < 0)
>  		return ret;
>  
> @@ -3085,7 +3089,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		}
>  	}
>  
> -	queue_trb(xhci, ep_ring, false, true,
> +	queue_trb(xhci, ep_ring, false, true, false,
>  		  setup->bRequestType | setup->bRequest << 8 | le16_to_cpu(setup->wValue) << 16,
>  		  le16_to_cpu(setup->wIndex) | le16_to_cpu(setup->wLength) << 16,
>  		  TRB_LEN(8) | TRB_INTR_TARGET(0),
> @@ -3105,7 +3109,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  	if (urb->transfer_buffer_length > 0) {
>  		if (setup->bRequestType & USB_DIR_IN)
>  			field |= TRB_DIR_IN;
> -		queue_trb(xhci, ep_ring, false, true,
> +		queue_trb(xhci, ep_ring, false, true, false,
>  				lower_32_bits(urb->transfer_dma),
>  				upper_32_bits(urb->transfer_dma),
>  				length_field,
> @@ -3121,7 +3125,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		field = 0;
>  	else
>  		field = TRB_DIR_IN;
> -	queue_trb(xhci, ep_ring, false, false,
> +	queue_trb(xhci, ep_ring, false, false, false,
>  			0,
>  			0,
>  			TRB_INTR_TARGET(0),
> @@ -3270,7 +3274,8 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  		trbs_per_td = count_isoc_trbs_needed(xhci, urb, i);
>  
>  		ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
> -				urb->stream_id, trbs_per_td, urb, i, mem_flags);
> +				urb->stream_id, trbs_per_td, urb, i, true,
> +				mem_flags);
>  		if (ret < 0) {
>  			if (i == 0)
>  				return ret;
> @@ -3340,7 +3345,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>  				remainder |
>  				TRB_INTR_TARGET(0);
>  
> -			queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +			queue_trb(xhci, ep_ring, false, more_trbs_coming, true,
>  				lower_32_bits(addr),
>  				upper_32_bits(addr),
>  				length_field,
> @@ -3422,7 +3427,7 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags,
>  	 * Do not insert any td of the urb to the ring if the check failed.
>  	 */
>  	ret = prepare_ring(xhci, ep_ring, le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
> -			   num_trbs, mem_flags);
> +			   num_trbs, true, mem_flags);
>  	if (ret)
>  		return ret;
>  
> @@ -3481,7 +3486,7 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>  		reserved_trbs++;
>  
>  	ret = prepare_ring(xhci, xhci->cmd_ring, EP_STATE_RUNNING,
> -			reserved_trbs, GFP_ATOMIC);
> +			reserved_trbs, false, GFP_ATOMIC);
>  	if (ret < 0) {
>  		xhci_err(xhci, "ERR: No room for command on command ring\n");
>  		if (command_must_succeed)
> @@ -3489,8 +3494,8 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>  					"unfailable commands failed.\n");
>  		return ret;
>  	}
> -	queue_trb(xhci, xhci->cmd_ring, false, false, field1, field2, field3,
> -			field4 | xhci->cmd_ring->cycle_state);
> +	queue_trb(xhci, xhci->cmd_ring, false, false, false, field1, field2,
> +			field3,	field4 | xhci->cmd_ring->cycle_state);
>  	return 0;
>  }
>  
> diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
> index d8bbf5c..8a98416 100644
> --- a/drivers/usb/host/xhci.h
> +++ b/drivers/usb/host/xhci.h
> @@ -1311,6 +1311,7 @@ struct xhci_hcd {
>  #define XHCI_EP_LIMIT_QUIRK	(1 << 5)
>  #define XHCI_BROKEN_MSI		(1 << 6)
>  #define XHCI_RESET_ON_RESUME	(1 << 7)
> +#define XHCI_AMD_0x96_HOST	(1 << 9)
>  	unsigned int		num_active_eps;
>  	unsigned int		limit_active_eps;
>  	/* There are two roothubs to keep track of bus suspend info for */

If its going to stable it sounds suitable for us.  Sounds like it could
cause all sorts of randomness on AMD h/w.  Therefore:

Acked-by: Andy Whitcroft <apw@canonical.com>

-apw
Tim Gardner - Oct. 14, 2011, 3:04 p.m.
On 10/14/2011 04:01 PM, Andy Whitcroft wrote:
> On Thu, Oct 13, 2011 at 10:42:28AM -0600, Tim Gardner wrote:
>>  From 931a2ec0f2c74498f861c75c44f973c615285b11 Mon Sep 17 00:00:00 2001
>> From: Andiry Xu<andiry.xu@amd.com>
>> Date: Fri, 23 Sep 2011 14:19:54 -0700
>> Subject: [PATCH] UBUNTU: SAUCE: xHCI: AMD isoc link TRB chain bit quirk
>>
>> BugLink: http://bugs.launchpad.net/bugs/872811
>>
>> Setting the chain (CH) bit in the link TRB of isochronous transfer rings
>> is required by AMD 0.96 xHCI host controller to successfully transverse
>> multi-TRB TD that span through different memory segments.
>>
>> When a Missed Service Error event occurs, if the chain bit is not set in
>> the link TRB and the host skips TDs which just across a link TRB, the
>> host may falsely recognize the link TRB as a normal TRB. You can see
>> this may cause big trouble - the host does not jump to the right address
>> which is pointed by the link TRB, but continue fetching the memory which
>> is after the link TRB address, which may not even belong to the host,
>> and the result cannot be predicted.
>>
>> This causes some big problems. Without the former patch I sent: "xHCI:
>> prevent infinite loop when processing MSE event", the system may hang.
>> With that patch applied, system does not hang, but the host still access
>> wrong memory address and isoc transfer will fail. With this patch,
>> isochronous transfer works as expected.
>>
>> This patch should be applied to kernels as old as 2.6.36, which was when
>> the first isochronous support was added for the xHCI host controller.
>>
>> Signed-off-by: Andiry Xu<andiry.xu@amd.com>
>> Signed-off-by: Sarah Sharp<sarah.a.sharp@linux.intel.com>
>> Cc: stable@kernel.org
>> Signed-off-by: Greg Kroah-Hartman<gregkh@suse.de>
>
> Are we not expecting this via stable?  I assume we'll drop this when
> that arrives.
>
>> cherry-picked from git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-next
>> Signed-off-by: Tim Gardner<tim.gardner@canonical.com>
>> ---
>>   drivers/usb/host/xhci-mem.c  |   32 ++++++++++++++-----------
>>   drivers/usb/host/xhci-pci.c  |    3 ++
>>   drivers/usb/host/xhci-ring.c |   53 +++++++++++++++++++++++-------------------
>>   drivers/usb/host/xhci.h      |    1 +
>>   4 files changed, 51 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
>> index fcb7f7e..2d671af 100644
>> --- a/drivers/usb/host/xhci-mem.c
>> +++ b/drivers/usb/host/xhci-mem.c
>> @@ -81,7 +81,7 @@ static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg)
>>    * related flags, such as End TRB, Toggle Cycle, and no snoop.
>>    */
>>   static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
>> -		struct xhci_segment *next, bool link_trbs)
>> +		struct xhci_segment *next, bool link_trbs, bool isoc)
>>   {
>>   	u32 val;
>>
>> @@ -97,7 +97,9 @@ static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
>>   		val&= ~TRB_TYPE_BITMASK;
>>   		val |= TRB_TYPE(TRB_LINK);
>>   		/* Always set the chain bit with 0.95 hardware */
>> -		if (xhci_link_trb_quirk(xhci))
>> +		/* Set chain bit for isoc rings on AMD 0.96 host */
>> +		if (xhci_link_trb_quirk(xhci) ||
>> +				(isoc&&  (xhci->quirks&  XHCI_AMD_0x96_HOST)))
>>   			val |= TRB_CHAIN;
>>   		prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val);
>>   	}
>> @@ -152,7 +154,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring)
>>    * See section 4.9.1 and figures 15 and 16.
>>    */
>>   static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
>> -		unsigned int num_segs, bool link_trbs, gfp_t flags)
>> +		unsigned int num_segs, bool link_trbs, bool isoc, gfp_t flags)
>>   {
>>   	struct xhci_ring	*ring;
>>   	struct xhci_segment	*prev;
>> @@ -178,12 +180,12 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
>>   		next = xhci_segment_alloc(xhci, flags);
>>   		if (!next)
>>   			goto fail;
>> -		xhci_link_segments(xhci, prev, next, link_trbs);
>> +		xhci_link_segments(xhci, prev, next, link_trbs, isoc);
>>
>>   		prev = next;
>>   		num_segs--;
>>   	}
>> -	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs);
>> +	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs, isoc);
>>
>>   	if (link_trbs) {
>>   		/* See section 4.9.2.1 and 6.4.4.1 */
>> @@ -229,14 +231,14 @@ void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci,
>>    * pointers to the beginning of the ring.
>>    */
>>   static void xhci_reinit_cached_ring(struct xhci_hcd *xhci,
>> -		struct xhci_ring *ring)
>> +		struct xhci_ring *ring, bool isoc)
>>   {
>>   	struct xhci_segment	*seg = ring->first_seg;
>>   	do {
>>   		memset(seg->trbs, 0,
>>   				sizeof(union xhci_trb)*TRBS_PER_SEGMENT);
>>   		/* All endpoint rings have link TRBs */
>> -		xhci_link_segments(xhci, seg, seg->next, 1);
>> +		xhci_link_segments(xhci, seg, seg->next, 1, isoc);
>>   		seg = seg->next;
>>   	} while (seg != ring->first_seg);
>>   	xhci_initialize_ring_info(ring);
>> @@ -540,7 +542,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
>>   	 */
>>   	for (cur_stream = 1; cur_stream<  num_streams; cur_stream++) {
>>   		stream_info->stream_rings[cur_stream] =
>> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
>> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>>   		cur_ring = stream_info->stream_rings[cur_stream];
>>   		if (!cur_ring)
>>   			goto cleanup_rings;
>> @@ -765,7 +767,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
>>   	}
>>
>>   	/* Allocate endpoint 0 ring */
>> -	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, flags);
>> +	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>>   	if (!dev->eps[0].ring)
>>   		goto fail;
>>
>> @@ -1175,10 +1177,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>>   	 */
>>   	if (usb_endpoint_xfer_isoc(&ep->desc))
>>   		virt_dev->eps[ep_index].new_ring =
>> -			xhci_ring_alloc(xhci, 8, true, mem_flags);
>> +			xhci_ring_alloc(xhci, 8, true, true, mem_flags);
>>   	else
>>   		virt_dev->eps[ep_index].new_ring =
>> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
>> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>>   	if (!virt_dev->eps[ep_index].new_ring) {
>>   		/* Attempt to use the ring cache */
>>   		if (virt_dev->num_rings_cached == 0)
>> @@ -1187,7 +1189,8 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>>   			virt_dev->ring_cache[virt_dev->num_rings_cached];
>>   		virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
>>   		virt_dev->num_rings_cached--;
>> -		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring);
>> +		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
>> +			usb_endpoint_xfer_isoc(&ep->desc) ? true : false);
>>   	}
>>   	virt_dev->eps[ep_index].skip = false;
>>   	ep_ring = virt_dev->eps[ep_index].new_ring;
>> @@ -2001,7 +2004,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>>   		goto fail;
>>
>>   	/* Set up the command ring to have one segments for now. */
>> -	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, flags);
>> +	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>>   	if (!xhci->cmd_ring)
>>   		goto fail;
>>   	xhci_dbg(xhci, "Allocated command ring at %p\n", xhci->cmd_ring);
>> @@ -2032,7 +2035,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>>   	 * the event ring segment table (ERST).  Section 4.9.3.
>>   	 */
>>   	xhci_dbg(xhci, "// Allocating event ring\n");
>> -	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, flags);
>> +	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, false,
>> +						flags);
>>   	if (!xhci->event_ring)
>>   		goto fail;
>>   	if (xhci_check_trb_in_td_math(xhci, flags)<  0)
>> diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
>> index cb16de2..50e7156 100644
>> --- a/drivers/usb/host/xhci-pci.c
>> +++ b/drivers/usb/host/xhci-pci.c
>> @@ -128,6 +128,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
>>   	if (pdev->vendor == PCI_VENDOR_ID_NEC)
>>   		xhci->quirks |= XHCI_NEC_HOST;
>>
>> +	if (pdev->vendor == PCI_VENDOR_ID_AMD&&  xhci->hci_version == 0x96)
>> +		xhci->quirks |= XHCI_AMD_0x96_HOST;
>> +
>>   	/* AMD PLL quirk */
>>   	if (pdev->vendor == PCI_VENDOR_ID_AMD&&  usb_amd_find_chipset_info())
>>   		xhci->quirks |= XHCI_AMD_PLL_FIX;
>> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
>> index d0871ea..e64bd6f 100644
>> --- a/drivers/usb/host/xhci-ring.c
>> +++ b/drivers/usb/host/xhci-ring.c
>> @@ -187,7 +187,7 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer
>>    *			prepare_transfer()?
>>    */
>>   static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
>> -		bool consumer, bool more_trbs_coming)
>> +		bool consumer, bool more_trbs_coming, bool isoc)
>>   {
>>   	u32 chain;
>>   	union xhci_trb *next;
>> @@ -214,11 +214,13 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
>>   				if (!chain&&  !more_trbs_coming)
>>   					break;
>>
>> -				/* If we're not dealing with 0.95 hardware,
>> +				/* If we're not dealing with 0.95 hardware or
>> +				 * isoc rings on AMD 0.96 host,
>>   				 * carry over the chain bit of the previous TRB
>>   				 * (which may mean the chain bit is cleared).
>>   				 */
>> -				if (!xhci_link_trb_quirk(xhci)) {
>> +				if (!(isoc&&  (xhci->quirks&  XHCI_AMD_0x96_HOST))
>> +						&&  !xhci_link_trb_quirk(xhci)) {
>>   					next->link.control&=
>>   						cpu_to_le32(~TRB_CHAIN);
>>   					next->link.control |=
>> @@ -2398,7 +2400,7 @@ irqreturn_t xhci_msi_irq(int irq, struct usb_hcd *hcd)
>>    *			prepare_transfer()?
>>    */
>>   static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>> -		bool consumer, bool more_trbs_coming,
>> +		bool consumer, bool more_trbs_coming, bool isoc,
>>   		u32 field1, u32 field2, u32 field3, u32 field4)
>>   {
>>   	struct xhci_generic_trb *trb;
>> @@ -2408,7 +2410,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>>   	trb->field[1] = cpu_to_le32(field2);
>>   	trb->field[2] = cpu_to_le32(field3);
>>   	trb->field[3] = cpu_to_le32(field4);
>> -	inc_enq(xhci, ring, consumer, more_trbs_coming);
>> +	inc_enq(xhci, ring, consumer, more_trbs_coming, isoc);
>>   }
>>
>>   /*
>> @@ -2416,7 +2418,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>>    * FIXME allocate segments if the ring is full.
>>    */
>>   static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
>> -		u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
>> +		u32 ep_state, unsigned int num_trbs, bool isoc, gfp_t mem_flags)
>>   {
>>   	/* Make sure the endpoint has been added to xHC schedule */
>>   	switch (ep_state) {
>> @@ -2458,10 +2460,11 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
>>   		next = ring->enqueue;
>>
>>   		while (last_trb(xhci, ring, ring->enq_seg, next)) {
>> -			/* If we're not dealing with 0.95 hardware,
>> -			 * clear the chain bit.
>> +			/* If we're not dealing with 0.95 hardware or isoc rings
>> +			 * on AMD 0.96 host, clear the chain bit.
>>   			 */
>> -			if (!xhci_link_trb_quirk(xhci))
>> +			if (!xhci_link_trb_quirk(xhci)&&  !(isoc&&
>> +					(xhci->quirks&  XHCI_AMD_0x96_HOST)))
>>   				next->link.control&= cpu_to_le32(~TRB_CHAIN);
>>   			else
>>   				next->link.control |= cpu_to_le32(TRB_CHAIN);
>> @@ -2494,6 +2497,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>>   		unsigned int num_trbs,
>>   		struct urb *urb,
>>   		unsigned int td_index,
>> +		bool isoc,
>>   		gfp_t mem_flags)
>>   {
>>   	int ret;
>> @@ -2511,7 +2515,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>>
>>   	ret = prepare_ring(xhci, ep_ring,
>>   			   le32_to_cpu(ep_ctx->ep_info)&  EP_STATE_MASK,
>> -			   num_trbs, mem_flags);
>> +			   num_trbs, isoc, mem_flags);
>>   	if (ret)
>>   		return ret;
>>
>> @@ -2734,7 +2738,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>
>>   	trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id],
>>   			ep_index, urb->stream_id,
>> -			num_trbs, urb, 0, mem_flags);
>> +			num_trbs, urb, 0, false, mem_flags);
>>   	if (trb_buff_len<  0)
>>   		return trb_buff_len;
>>
>> @@ -2829,7 +2833,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   			more_trbs_coming = true;
>>   		else
>>   			more_trbs_coming = false;
>> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
>> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>>   				lower_32_bits(addr),
>>   				upper_32_bits(addr),
>>   				length_field,
>> @@ -2920,7 +2924,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>
>>   	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>>   			ep_index, urb->stream_id,
>> -			num_trbs, urb, 0, mem_flags);
>> +			num_trbs, urb, 0, false, mem_flags);
>>   	if (ret<  0)
>>   		return ret;
>>
>> @@ -2992,7 +2996,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   			more_trbs_coming = true;
>>   		else
>>   			more_trbs_coming = false;
>> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
>> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>>   				lower_32_bits(addr),
>>   				upper_32_bits(addr),
>>   				length_field,
>> @@ -3052,7 +3056,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   		num_trbs++;
>>   	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>>   			ep_index, urb->stream_id,
>> -			num_trbs, urb, 0, mem_flags);
>> +			num_trbs, urb, 0, false, mem_flags);
>>   	if (ret<  0)
>>   		return ret;
>>
>> @@ -3085,7 +3089,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   		}
>>   	}
>>
>> -	queue_trb(xhci, ep_ring, false, true,
>> +	queue_trb(xhci, ep_ring, false, true, false,
>>   		  setup->bRequestType | setup->bRequest<<  8 | le16_to_cpu(setup->wValue)<<  16,
>>   		  le16_to_cpu(setup->wIndex) | le16_to_cpu(setup->wLength)<<  16,
>>   		  TRB_LEN(8) | TRB_INTR_TARGET(0),
>> @@ -3105,7 +3109,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   	if (urb->transfer_buffer_length>  0) {
>>   		if (setup->bRequestType&  USB_DIR_IN)
>>   			field |= TRB_DIR_IN;
>> -		queue_trb(xhci, ep_ring, false, true,
>> +		queue_trb(xhci, ep_ring, false, true, false,
>>   				lower_32_bits(urb->transfer_dma),
>>   				upper_32_bits(urb->transfer_dma),
>>   				length_field,
>> @@ -3121,7 +3125,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   		field = 0;
>>   	else
>>   		field = TRB_DIR_IN;
>> -	queue_trb(xhci, ep_ring, false, false,
>> +	queue_trb(xhci, ep_ring, false, false, false,
>>   			0,
>>   			0,
>>   			TRB_INTR_TARGET(0),
>> @@ -3270,7 +3274,8 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   		trbs_per_td = count_isoc_trbs_needed(xhci, urb, i);
>>
>>   		ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
>> -				urb->stream_id, trbs_per_td, urb, i, mem_flags);
>> +				urb->stream_id, trbs_per_td, urb, i, true,
>> +				mem_flags);
>>   		if (ret<  0) {
>>   			if (i == 0)
>>   				return ret;
>> @@ -3340,7 +3345,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   				remainder |
>>   				TRB_INTR_TARGET(0);
>>
>> -			queue_trb(xhci, ep_ring, false, more_trbs_coming,
>> +			queue_trb(xhci, ep_ring, false, more_trbs_coming, true,
>>   				lower_32_bits(addr),
>>   				upper_32_bits(addr),
>>   				length_field,
>> @@ -3422,7 +3427,7 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags,
>>   	 * Do not insert any td of the urb to the ring if the check failed.
>>   	 */
>>   	ret = prepare_ring(xhci, ep_ring, le32_to_cpu(ep_ctx->ep_info)&  EP_STATE_MASK,
>> -			   num_trbs, mem_flags);
>> +			   num_trbs, true, mem_flags);
>>   	if (ret)
>>   		return ret;
>>
>> @@ -3481,7 +3486,7 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>>   		reserved_trbs++;
>>
>>   	ret = prepare_ring(xhci, xhci->cmd_ring, EP_STATE_RUNNING,
>> -			reserved_trbs, GFP_ATOMIC);
>> +			reserved_trbs, false, GFP_ATOMIC);
>>   	if (ret<  0) {
>>   		xhci_err(xhci, "ERR: No room for command on command ring\n");
>>   		if (command_must_succeed)
>> @@ -3489,8 +3494,8 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>>   					"unfailable commands failed.\n");
>>   		return ret;
>>   	}
>> -	queue_trb(xhci, xhci->cmd_ring, false, false, field1, field2, field3,
>> -			field4 | xhci->cmd_ring->cycle_state);
>> +	queue_trb(xhci, xhci->cmd_ring, false, false, false, field1, field2,
>> +			field3,	field4 | xhci->cmd_ring->cycle_state);
>>   	return 0;
>>   }
>>
>> diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
>> index d8bbf5c..8a98416 100644
>> --- a/drivers/usb/host/xhci.h
>> +++ b/drivers/usb/host/xhci.h
>> @@ -1311,6 +1311,7 @@ struct xhci_hcd {
>>   #define XHCI_EP_LIMIT_QUIRK	(1<<  5)
>>   #define XHCI_BROKEN_MSI		(1<<  6)
>>   #define XHCI_RESET_ON_RESUME	(1<<  7)
>> +#define XHCI_AMD_0x96_HOST	(1<<  9)
>>   	unsigned int		num_active_eps;
>>   	unsigned int		limit_active_eps;
>>   	/* There are two roothubs to keep track of bus suspend info for */
>
> If its going to stable it sounds suitable for us.  Sounds like it could
> cause all sorts of randomness on AMD h/w.  Therefore:
>
> Acked-by: Andy Whitcroft<apw@canonical.com>
>
> -apw
>

I do expect this patch to get dropped in favour of what comes down from 
stable. In the meantime, the opportunity for corruption seemed such that 
we ought to get this in our kernel ASAP. Its possible that the carnage 
could be wider spread then just AMD.

rtg
Tim Gardner - Oct. 15, 2011, 10:51 a.m.
On 10/13/2011 05:42 PM, Tim Gardner wrote:
>  From 931a2ec0f2c74498f861c75c44f973c615285b11 Mon Sep 17 00:00:00 2001
> From: Andiry Xu<andiry.xu@amd.com>
> Date: Fri, 23 Sep 2011 14:19:54 -0700
> Subject: [PATCH] UBUNTU: SAUCE: xHCI: AMD isoc link TRB chain bit quirk
>
> BugLink: http://bugs.launchpad.net/bugs/872811
>
> Setting the chain (CH) bit in the link TRB of isochronous transfer rings
> is required by AMD 0.96 xHCI host controller to successfully transverse
> multi-TRB TD that span through different memory segments.
>
> When a Missed Service Error event occurs, if the chain bit is not set in
> the link TRB and the host skips TDs which just across a link TRB, the
> host may falsely recognize the link TRB as a normal TRB. You can see
> this may cause big trouble - the host does not jump to the right address
> which is pointed by the link TRB, but continue fetching the memory which
> is after the link TRB address, which may not even belong to the host,
> and the result cannot be predicted.
>
> This causes some big problems. Without the former patch I sent: "xHCI:
> prevent infinite loop when processing MSE event", the system may hang.
> With that patch applied, system does not hang, but the host still access
> wrong memory address and isoc transfer will fail. With this patch,
> isochronous transfer works as expected.
>
> This patch should be applied to kernels as old as 2.6.36, which was when
> the first isochronous support was added for the xHCI host controller.
>
> Signed-off-by: Andiry Xu<andiry.xu@amd.com>
> Signed-off-by: Sarah Sharp<sarah.a.sharp@linux.intel.com>
> Cc: stable@kernel.org
> Signed-off-by: Greg Kroah-Hartman<gregkh@suse.de>
>
> cherry-picked from git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-next
> Signed-off-by: Tim Gardner<tim.gardner@canonical.com>
> ---
>   drivers/usb/host/xhci-mem.c  |   32 ++++++++++++++-----------
>   drivers/usb/host/xhci-pci.c  |    3 ++
>   drivers/usb/host/xhci-ring.c |   53 +++++++++++++++++++++++-------------------
>   drivers/usb/host/xhci.h      |    1 +
>   4 files changed, 51 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
> index fcb7f7e..2d671af 100644
> --- a/drivers/usb/host/xhci-mem.c
> +++ b/drivers/usb/host/xhci-mem.c
> @@ -81,7 +81,7 @@ static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg)
>    * related flags, such as End TRB, Toggle Cycle, and no snoop.
>    */
>   static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
> -		struct xhci_segment *next, bool link_trbs)
> +		struct xhci_segment *next, bool link_trbs, bool isoc)
>   {
>   	u32 val;
>
> @@ -97,7 +97,9 @@ static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
>   		val&= ~TRB_TYPE_BITMASK;
>   		val |= TRB_TYPE(TRB_LINK);
>   		/* Always set the chain bit with 0.95 hardware */
> -		if (xhci_link_trb_quirk(xhci))
> +		/* Set chain bit for isoc rings on AMD 0.96 host */
> +		if (xhci_link_trb_quirk(xhci) ||
> +				(isoc&&  (xhci->quirks&  XHCI_AMD_0x96_HOST)))
>   			val |= TRB_CHAIN;
>   		prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val);
>   	}
> @@ -152,7 +154,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring)
>    * See section 4.9.1 and figures 15 and 16.
>    */
>   static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
> -		unsigned int num_segs, bool link_trbs, gfp_t flags)
> +		unsigned int num_segs, bool link_trbs, bool isoc, gfp_t flags)
>   {
>   	struct xhci_ring	*ring;
>   	struct xhci_segment	*prev;
> @@ -178,12 +180,12 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
>   		next = xhci_segment_alloc(xhci, flags);
>   		if (!next)
>   			goto fail;
> -		xhci_link_segments(xhci, prev, next, link_trbs);
> +		xhci_link_segments(xhci, prev, next, link_trbs, isoc);
>
>   		prev = next;
>   		num_segs--;
>   	}
> -	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs);
> +	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs, isoc);
>
>   	if (link_trbs) {
>   		/* See section 4.9.2.1 and 6.4.4.1 */
> @@ -229,14 +231,14 @@ void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci,
>    * pointers to the beginning of the ring.
>    */
>   static void xhci_reinit_cached_ring(struct xhci_hcd *xhci,
> -		struct xhci_ring *ring)
> +		struct xhci_ring *ring, bool isoc)
>   {
>   	struct xhci_segment	*seg = ring->first_seg;
>   	do {
>   		memset(seg->trbs, 0,
>   				sizeof(union xhci_trb)*TRBS_PER_SEGMENT);
>   		/* All endpoint rings have link TRBs */
> -		xhci_link_segments(xhci, seg, seg->next, 1);
> +		xhci_link_segments(xhci, seg, seg->next, 1, isoc);
>   		seg = seg->next;
>   	} while (seg != ring->first_seg);
>   	xhci_initialize_ring_info(ring);
> @@ -540,7 +542,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
>   	 */
>   	for (cur_stream = 1; cur_stream<  num_streams; cur_stream++) {
>   		stream_info->stream_rings[cur_stream] =
> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>   		cur_ring = stream_info->stream_rings[cur_stream];
>   		if (!cur_ring)
>   			goto cleanup_rings;
> @@ -765,7 +767,7 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
>   	}
>
>   	/* Allocate endpoint 0 ring */
> -	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, flags);
> +	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>   	if (!dev->eps[0].ring)
>   		goto fail;
>
> @@ -1175,10 +1177,10 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>   	 */
>   	if (usb_endpoint_xfer_isoc(&ep->desc))
>   		virt_dev->eps[ep_index].new_ring =
> -			xhci_ring_alloc(xhci, 8, true, mem_flags);
> +			xhci_ring_alloc(xhci, 8, true, true, mem_flags);
>   	else
>   		virt_dev->eps[ep_index].new_ring =
> -			xhci_ring_alloc(xhci, 1, true, mem_flags);
> +			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
>   	if (!virt_dev->eps[ep_index].new_ring) {
>   		/* Attempt to use the ring cache */
>   		if (virt_dev->num_rings_cached == 0)
> @@ -1187,7 +1189,8 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
>   			virt_dev->ring_cache[virt_dev->num_rings_cached];
>   		virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
>   		virt_dev->num_rings_cached--;
> -		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring);
> +		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
> +			usb_endpoint_xfer_isoc(&ep->desc) ? true : false);
>   	}
>   	virt_dev->eps[ep_index].skip = false;
>   	ep_ring = virt_dev->eps[ep_index].new_ring;
> @@ -2001,7 +2004,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>   		goto fail;
>
>   	/* Set up the command ring to have one segments for now. */
> -	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, flags);
> +	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, false, flags);
>   	if (!xhci->cmd_ring)
>   		goto fail;
>   	xhci_dbg(xhci, "Allocated command ring at %p\n", xhci->cmd_ring);
> @@ -2032,7 +2035,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
>   	 * the event ring segment table (ERST).  Section 4.9.3.
>   	 */
>   	xhci_dbg(xhci, "// Allocating event ring\n");
> -	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, flags);
> +	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, false,
> +						flags);
>   	if (!xhci->event_ring)
>   		goto fail;
>   	if (xhci_check_trb_in_td_math(xhci, flags)<  0)
> diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
> index cb16de2..50e7156 100644
> --- a/drivers/usb/host/xhci-pci.c
> +++ b/drivers/usb/host/xhci-pci.c
> @@ -128,6 +128,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd)
>   	if (pdev->vendor == PCI_VENDOR_ID_NEC)
>   		xhci->quirks |= XHCI_NEC_HOST;
>
> +	if (pdev->vendor == PCI_VENDOR_ID_AMD&&  xhci->hci_version == 0x96)
> +		xhci->quirks |= XHCI_AMD_0x96_HOST;
> +
>   	/* AMD PLL quirk */
>   	if (pdev->vendor == PCI_VENDOR_ID_AMD&&  usb_amd_find_chipset_info())
>   		xhci->quirks |= XHCI_AMD_PLL_FIX;
> diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
> index d0871ea..e64bd6f 100644
> --- a/drivers/usb/host/xhci-ring.c
> +++ b/drivers/usb/host/xhci-ring.c
> @@ -187,7 +187,7 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer
>    *			prepare_transfer()?
>    */
>   static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
> -		bool consumer, bool more_trbs_coming)
> +		bool consumer, bool more_trbs_coming, bool isoc)
>   {
>   	u32 chain;
>   	union xhci_trb *next;
> @@ -214,11 +214,13 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
>   				if (!chain&&  !more_trbs_coming)
>   					break;
>
> -				/* If we're not dealing with 0.95 hardware,
> +				/* If we're not dealing with 0.95 hardware or
> +				 * isoc rings on AMD 0.96 host,
>   				 * carry over the chain bit of the previous TRB
>   				 * (which may mean the chain bit is cleared).
>   				 */
> -				if (!xhci_link_trb_quirk(xhci)) {
> +				if (!(isoc&&  (xhci->quirks&  XHCI_AMD_0x96_HOST))
> +						&&  !xhci_link_trb_quirk(xhci)) {
>   					next->link.control&=
>   						cpu_to_le32(~TRB_CHAIN);
>   					next->link.control |=
> @@ -2398,7 +2400,7 @@ irqreturn_t xhci_msi_irq(int irq, struct usb_hcd *hcd)
>    *			prepare_transfer()?
>    */
>   static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
> -		bool consumer, bool more_trbs_coming,
> +		bool consumer, bool more_trbs_coming, bool isoc,
>   		u32 field1, u32 field2, u32 field3, u32 field4)
>   {
>   	struct xhci_generic_trb *trb;
> @@ -2408,7 +2410,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>   	trb->field[1] = cpu_to_le32(field2);
>   	trb->field[2] = cpu_to_le32(field3);
>   	trb->field[3] = cpu_to_le32(field4);
> -	inc_enq(xhci, ring, consumer, more_trbs_coming);
> +	inc_enq(xhci, ring, consumer, more_trbs_coming, isoc);
>   }
>
>   /*
> @@ -2416,7 +2418,7 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
>    * FIXME allocate segments if the ring is full.
>    */
>   static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
> -		u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
> +		u32 ep_state, unsigned int num_trbs, bool isoc, gfp_t mem_flags)
>   {
>   	/* Make sure the endpoint has been added to xHC schedule */
>   	switch (ep_state) {
> @@ -2458,10 +2460,11 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
>   		next = ring->enqueue;
>
>   		while (last_trb(xhci, ring, ring->enq_seg, next)) {
> -			/* If we're not dealing with 0.95 hardware,
> -			 * clear the chain bit.
> +			/* If we're not dealing with 0.95 hardware or isoc rings
> +			 * on AMD 0.96 host, clear the chain bit.
>   			 */
> -			if (!xhci_link_trb_quirk(xhci))
> +			if (!xhci_link_trb_quirk(xhci)&&  !(isoc&&
> +					(xhci->quirks&  XHCI_AMD_0x96_HOST)))
>   				next->link.control&= cpu_to_le32(~TRB_CHAIN);
>   			else
>   				next->link.control |= cpu_to_le32(TRB_CHAIN);
> @@ -2494,6 +2497,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>   		unsigned int num_trbs,
>   		struct urb *urb,
>   		unsigned int td_index,
> +		bool isoc,
>   		gfp_t mem_flags)
>   {
>   	int ret;
> @@ -2511,7 +2515,7 @@ static int prepare_transfer(struct xhci_hcd *xhci,
>
>   	ret = prepare_ring(xhci, ep_ring,
>   			   le32_to_cpu(ep_ctx->ep_info)&  EP_STATE_MASK,
> -			   num_trbs, mem_flags);
> +			   num_trbs, isoc, mem_flags);
>   	if (ret)
>   		return ret;
>
> @@ -2734,7 +2738,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>
>   	trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id],
>   			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>   	if (trb_buff_len<  0)
>   		return trb_buff_len;
>
> @@ -2829,7 +2833,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   			more_trbs_coming = true;
>   		else
>   			more_trbs_coming = false;
> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>   				lower_32_bits(addr),
>   				upper_32_bits(addr),
>   				length_field,
> @@ -2920,7 +2924,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>
>   	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>   			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>   	if (ret<  0)
>   		return ret;
>
> @@ -2992,7 +2996,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   			more_trbs_coming = true;
>   		else
>   			more_trbs_coming = false;
> -		queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
>   				lower_32_bits(addr),
>   				upper_32_bits(addr),
>   				length_field,
> @@ -3052,7 +3056,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   		num_trbs++;
>   	ret = prepare_transfer(xhci, xhci->devs[slot_id],
>   			ep_index, urb->stream_id,
> -			num_trbs, urb, 0, mem_flags);
> +			num_trbs, urb, 0, false, mem_flags);
>   	if (ret<  0)
>   		return ret;
>
> @@ -3085,7 +3089,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   		}
>   	}
>
> -	queue_trb(xhci, ep_ring, false, true,
> +	queue_trb(xhci, ep_ring, false, true, false,
>   		  setup->bRequestType | setup->bRequest<<  8 | le16_to_cpu(setup->wValue)<<  16,
>   		  le16_to_cpu(setup->wIndex) | le16_to_cpu(setup->wLength)<<  16,
>   		  TRB_LEN(8) | TRB_INTR_TARGET(0),
> @@ -3105,7 +3109,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   	if (urb->transfer_buffer_length>  0) {
>   		if (setup->bRequestType&  USB_DIR_IN)
>   			field |= TRB_DIR_IN;
> -		queue_trb(xhci, ep_ring, false, true,
> +		queue_trb(xhci, ep_ring, false, true, false,
>   				lower_32_bits(urb->transfer_dma),
>   				upper_32_bits(urb->transfer_dma),
>   				length_field,
> @@ -3121,7 +3125,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   		field = 0;
>   	else
>   		field = TRB_DIR_IN;
> -	queue_trb(xhci, ep_ring, false, false,
> +	queue_trb(xhci, ep_ring, false, false, false,
>   			0,
>   			0,
>   			TRB_INTR_TARGET(0),
> @@ -3270,7 +3274,8 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   		trbs_per_td = count_isoc_trbs_needed(xhci, urb, i);
>
>   		ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
> -				urb->stream_id, trbs_per_td, urb, i, mem_flags);
> +				urb->stream_id, trbs_per_td, urb, i, true,
> +				mem_flags);
>   		if (ret<  0) {
>   			if (i == 0)
>   				return ret;
> @@ -3340,7 +3345,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
>   				remainder |
>   				TRB_INTR_TARGET(0);
>
> -			queue_trb(xhci, ep_ring, false, more_trbs_coming,
> +			queue_trb(xhci, ep_ring, false, more_trbs_coming, true,
>   				lower_32_bits(addr),
>   				upper_32_bits(addr),
>   				length_field,
> @@ -3422,7 +3427,7 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags,
>   	 * Do not insert any td of the urb to the ring if the check failed.
>   	 */
>   	ret = prepare_ring(xhci, ep_ring, le32_to_cpu(ep_ctx->ep_info)&  EP_STATE_MASK,
> -			   num_trbs, mem_flags);
> +			   num_trbs, true, mem_flags);
>   	if (ret)
>   		return ret;
>
> @@ -3481,7 +3486,7 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>   		reserved_trbs++;
>
>   	ret = prepare_ring(xhci, xhci->cmd_ring, EP_STATE_RUNNING,
> -			reserved_trbs, GFP_ATOMIC);
> +			reserved_trbs, false, GFP_ATOMIC);
>   	if (ret<  0) {
>   		xhci_err(xhci, "ERR: No room for command on command ring\n");
>   		if (command_must_succeed)
> @@ -3489,8 +3494,8 @@ static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
>   					"unfailable commands failed.\n");
>   		return ret;
>   	}
> -	queue_trb(xhci, xhci->cmd_ring, false, false, field1, field2, field3,
> -			field4 | xhci->cmd_ring->cycle_state);
> +	queue_trb(xhci, xhci->cmd_ring, false, false, false, field1, field2,
> +			field3,	field4 | xhci->cmd_ring->cycle_state);
>   	return 0;
>   }
>
> diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
> index d8bbf5c..8a98416 100644
> --- a/drivers/usb/host/xhci.h
> +++ b/drivers/usb/host/xhci.h
> @@ -1311,6 +1311,7 @@ struct xhci_hcd {
>   #define XHCI_EP_LIMIT_QUIRK	(1<<  5)
>   #define XHCI_BROKEN_MSI		(1<<  6)
>   #define XHCI_RESET_ON_RESUME	(1<<  7)
> +#define XHCI_AMD_0x96_HOST	(1<<  9)
>   	unsigned int		num_active_eps;
>   	unsigned int		limit_active_eps;
>   	/* There are two roothubs to keep track of bus suspend info for */

Patch

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index fcb7f7e..2d671af 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -81,7 +81,7 @@  static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg)
  * related flags, such as End TRB, Toggle Cycle, and no snoop.
  */
 static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
-		struct xhci_segment *next, bool link_trbs)
+		struct xhci_segment *next, bool link_trbs, bool isoc)
 {
 	u32 val;
 
@@ -97,7 +97,9 @@  static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
 		val &= ~TRB_TYPE_BITMASK;
 		val |= TRB_TYPE(TRB_LINK);
 		/* Always set the chain bit with 0.95 hardware */
-		if (xhci_link_trb_quirk(xhci))
+		/* Set chain bit for isoc rings on AMD 0.96 host */
+		if (xhci_link_trb_quirk(xhci) ||
+				(isoc && (xhci->quirks & XHCI_AMD_0x96_HOST)))
 			val |= TRB_CHAIN;
 		prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val);
 	}
@@ -152,7 +154,7 @@  static void xhci_initialize_ring_info(struct xhci_ring *ring)
  * See section 4.9.1 and figures 15 and 16.
  */
 static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
-		unsigned int num_segs, bool link_trbs, gfp_t flags)
+		unsigned int num_segs, bool link_trbs, bool isoc, gfp_t flags)
 {
 	struct xhci_ring	*ring;
 	struct xhci_segment	*prev;
@@ -178,12 +180,12 @@  static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
 		next = xhci_segment_alloc(xhci, flags);
 		if (!next)
 			goto fail;
-		xhci_link_segments(xhci, prev, next, link_trbs);
+		xhci_link_segments(xhci, prev, next, link_trbs, isoc);
 
 		prev = next;
 		num_segs--;
 	}
-	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs);
+	xhci_link_segments(xhci, prev, ring->first_seg, link_trbs, isoc);
 
 	if (link_trbs) {
 		/* See section 4.9.2.1 and 6.4.4.1 */
@@ -229,14 +231,14 @@  void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci,
  * pointers to the beginning of the ring.
  */
 static void xhci_reinit_cached_ring(struct xhci_hcd *xhci,
-		struct xhci_ring *ring)
+		struct xhci_ring *ring, bool isoc)
 {
 	struct xhci_segment	*seg = ring->first_seg;
 	do {
 		memset(seg->trbs, 0,
 				sizeof(union xhci_trb)*TRBS_PER_SEGMENT);
 		/* All endpoint rings have link TRBs */
-		xhci_link_segments(xhci, seg, seg->next, 1);
+		xhci_link_segments(xhci, seg, seg->next, 1, isoc);
 		seg = seg->next;
 	} while (seg != ring->first_seg);
 	xhci_initialize_ring_info(ring);
@@ -540,7 +542,7 @@  struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
 	 */
 	for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
 		stream_info->stream_rings[cur_stream] =
-			xhci_ring_alloc(xhci, 1, true, mem_flags);
+			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
 		cur_ring = stream_info->stream_rings[cur_stream];
 		if (!cur_ring)
 			goto cleanup_rings;
@@ -765,7 +767,7 @@  int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
 	}
 
 	/* Allocate endpoint 0 ring */
-	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, flags);
+	dev->eps[0].ring = xhci_ring_alloc(xhci, 1, true, false, flags);
 	if (!dev->eps[0].ring)
 		goto fail;
 
@@ -1175,10 +1177,10 @@  int xhci_endpoint_init(struct xhci_hcd *xhci,
 	 */
 	if (usb_endpoint_xfer_isoc(&ep->desc))
 		virt_dev->eps[ep_index].new_ring =
-			xhci_ring_alloc(xhci, 8, true, mem_flags);
+			xhci_ring_alloc(xhci, 8, true, true, mem_flags);
 	else
 		virt_dev->eps[ep_index].new_ring =
-			xhci_ring_alloc(xhci, 1, true, mem_flags);
+			xhci_ring_alloc(xhci, 1, true, false, mem_flags);
 	if (!virt_dev->eps[ep_index].new_ring) {
 		/* Attempt to use the ring cache */
 		if (virt_dev->num_rings_cached == 0)
@@ -1187,7 +1189,8 @@  int xhci_endpoint_init(struct xhci_hcd *xhci,
 			virt_dev->ring_cache[virt_dev->num_rings_cached];
 		virt_dev->ring_cache[virt_dev->num_rings_cached] = NULL;
 		virt_dev->num_rings_cached--;
-		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring);
+		xhci_reinit_cached_ring(xhci, virt_dev->eps[ep_index].new_ring,
+			usb_endpoint_xfer_isoc(&ep->desc) ? true : false);
 	}
 	virt_dev->eps[ep_index].skip = false;
 	ep_ring = virt_dev->eps[ep_index].new_ring;
@@ -2001,7 +2004,7 @@  int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 		goto fail;
 
 	/* Set up the command ring to have one segments for now. */
-	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, flags);
+	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, false, flags);
 	if (!xhci->cmd_ring)
 		goto fail;
 	xhci_dbg(xhci, "Allocated command ring at %p\n", xhci->cmd_ring);
@@ -2032,7 +2035,8 @@  int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 	 * the event ring segment table (ERST).  Section 4.9.3.
 	 */
 	xhci_dbg(xhci, "// Allocating event ring\n");
-	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, flags);
+	xhci->event_ring = xhci_ring_alloc(xhci, ERST_NUM_SEGS, false, false,
+						flags);
 	if (!xhci->event_ring)
 		goto fail;
 	if (xhci_check_trb_in_td_math(xhci, flags) < 0)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index cb16de2..50e7156 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -128,6 +128,9 @@  static int xhci_pci_setup(struct usb_hcd *hcd)
 	if (pdev->vendor == PCI_VENDOR_ID_NEC)
 		xhci->quirks |= XHCI_NEC_HOST;
 
+	if (pdev->vendor == PCI_VENDOR_ID_AMD && xhci->hci_version == 0x96)
+		xhci->quirks |= XHCI_AMD_0x96_HOST;
+
 	/* AMD PLL quirk */
 	if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
 		xhci->quirks |= XHCI_AMD_PLL_FIX;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index d0871ea..e64bd6f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -187,7 +187,7 @@  static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer
  *			prepare_transfer()?
  */
 static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
-		bool consumer, bool more_trbs_coming)
+		bool consumer, bool more_trbs_coming, bool isoc)
 {
 	u32 chain;
 	union xhci_trb *next;
@@ -214,11 +214,13 @@  static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring,
 				if (!chain && !more_trbs_coming)
 					break;
 
-				/* If we're not dealing with 0.95 hardware,
+				/* If we're not dealing with 0.95 hardware or
+				 * isoc rings on AMD 0.96 host,
 				 * carry over the chain bit of the previous TRB
 				 * (which may mean the chain bit is cleared).
 				 */
-				if (!xhci_link_trb_quirk(xhci)) {
+				if (!(isoc && (xhci->quirks & XHCI_AMD_0x96_HOST))
+						&& !xhci_link_trb_quirk(xhci)) {
 					next->link.control &=
 						cpu_to_le32(~TRB_CHAIN);
 					next->link.control |=
@@ -2398,7 +2400,7 @@  irqreturn_t xhci_msi_irq(int irq, struct usb_hcd *hcd)
  *			prepare_transfer()?
  */
 static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
-		bool consumer, bool more_trbs_coming,
+		bool consumer, bool more_trbs_coming, bool isoc,
 		u32 field1, u32 field2, u32 field3, u32 field4)
 {
 	struct xhci_generic_trb *trb;
@@ -2408,7 +2410,7 @@  static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
 	trb->field[1] = cpu_to_le32(field2);
 	trb->field[2] = cpu_to_le32(field3);
 	trb->field[3] = cpu_to_le32(field4);
-	inc_enq(xhci, ring, consumer, more_trbs_coming);
+	inc_enq(xhci, ring, consumer, more_trbs_coming, isoc);
 }
 
 /*
@@ -2416,7 +2418,7 @@  static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring,
  * FIXME allocate segments if the ring is full.
  */
 static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
-		u32 ep_state, unsigned int num_trbs, gfp_t mem_flags)
+		u32 ep_state, unsigned int num_trbs, bool isoc, gfp_t mem_flags)
 {
 	/* Make sure the endpoint has been added to xHC schedule */
 	switch (ep_state) {
@@ -2458,10 +2460,11 @@  static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
 		next = ring->enqueue;
 
 		while (last_trb(xhci, ring, ring->enq_seg, next)) {
-			/* If we're not dealing with 0.95 hardware,
-			 * clear the chain bit.
+			/* If we're not dealing with 0.95 hardware or isoc rings
+			 * on AMD 0.96 host, clear the chain bit.
 			 */
-			if (!xhci_link_trb_quirk(xhci))
+			if (!xhci_link_trb_quirk(xhci) && !(isoc &&
+					(xhci->quirks & XHCI_AMD_0x96_HOST)))
 				next->link.control &= cpu_to_le32(~TRB_CHAIN);
 			else
 				next->link.control |= cpu_to_le32(TRB_CHAIN);
@@ -2494,6 +2497,7 @@  static int prepare_transfer(struct xhci_hcd *xhci,
 		unsigned int num_trbs,
 		struct urb *urb,
 		unsigned int td_index,
+		bool isoc,
 		gfp_t mem_flags)
 {
 	int ret;
@@ -2511,7 +2515,7 @@  static int prepare_transfer(struct xhci_hcd *xhci,
 
 	ret = prepare_ring(xhci, ep_ring,
 			   le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
-			   num_trbs, mem_flags);
+			   num_trbs, isoc, mem_flags);
 	if (ret)
 		return ret;
 
@@ -2734,7 +2738,7 @@  static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
 	trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id],
 			ep_index, urb->stream_id,
-			num_trbs, urb, 0, mem_flags);
+			num_trbs, urb, 0, false, mem_flags);
 	if (trb_buff_len < 0)
 		return trb_buff_len;
 
@@ -2829,7 +2833,7 @@  static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 			more_trbs_coming = true;
 		else
 			more_trbs_coming = false;
-		queue_trb(xhci, ep_ring, false, more_trbs_coming,
+		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
 				lower_32_bits(addr),
 				upper_32_bits(addr),
 				length_field,
@@ -2920,7 +2924,7 @@  int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 
 	ret = prepare_transfer(xhci, xhci->devs[slot_id],
 			ep_index, urb->stream_id,
-			num_trbs, urb, 0, mem_flags);
+			num_trbs, urb, 0, false, mem_flags);
 	if (ret < 0)
 		return ret;
 
@@ -2992,7 +2996,7 @@  int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 			more_trbs_coming = true;
 		else
 			more_trbs_coming = false;
-		queue_trb(xhci, ep_ring, false, more_trbs_coming,
+		queue_trb(xhci, ep_ring, false, more_trbs_coming, false,
 				lower_32_bits(addr),
 				upper_32_bits(addr),
 				length_field,
@@ -3052,7 +3056,7 @@  int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		num_trbs++;
 	ret = prepare_transfer(xhci, xhci->devs[slot_id],
 			ep_index, urb->stream_id,
-			num_trbs, urb, 0, mem_flags);
+			num_trbs, urb, 0, false, mem_flags);
 	if (ret < 0)
 		return ret;
 
@@ -3085,7 +3089,7 @@  int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		}
 	}
 
-	queue_trb(xhci, ep_ring, false, true,
+	queue_trb(xhci, ep_ring, false, true, false,
 		  setup->bRequestType | setup->bRequest << 8 | le16_to_cpu(setup->wValue) << 16,
 		  le16_to_cpu(setup->wIndex) | le16_to_cpu(setup->wLength) << 16,
 		  TRB_LEN(8) | TRB_INTR_TARGET(0),
@@ -3105,7 +3109,7 @@  int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	if (urb->transfer_buffer_length > 0) {
 		if (setup->bRequestType & USB_DIR_IN)
 			field |= TRB_DIR_IN;
-		queue_trb(xhci, ep_ring, false, true,
+		queue_trb(xhci, ep_ring, false, true, false,
 				lower_32_bits(urb->transfer_dma),
 				upper_32_bits(urb->transfer_dma),
 				length_field,
@@ -3121,7 +3125,7 @@  int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		field = 0;
 	else
 		field = TRB_DIR_IN;
-	queue_trb(xhci, ep_ring, false, false,
+	queue_trb(xhci, ep_ring, false, false, false,
 			0,
 			0,
 			TRB_INTR_TARGET(0),
@@ -3270,7 +3274,8 @@  static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		trbs_per_td = count_isoc_trbs_needed(xhci, urb, i);
 
 		ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
-				urb->stream_id, trbs_per_td, urb, i, mem_flags);
+				urb->stream_id, trbs_per_td, urb, i, true,
+				mem_flags);
 		if (ret < 0) {
 			if (i == 0)
 				return ret;
@@ -3340,7 +3345,7 @@  static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 				remainder |
 				TRB_INTR_TARGET(0);
 
-			queue_trb(xhci, ep_ring, false, more_trbs_coming,
+			queue_trb(xhci, ep_ring, false, more_trbs_coming, true,
 				lower_32_bits(addr),
 				upper_32_bits(addr),
 				length_field,
@@ -3422,7 +3427,7 @@  int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags,
 	 * Do not insert any td of the urb to the ring if the check failed.
 	 */
 	ret = prepare_ring(xhci, ep_ring, le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
-			   num_trbs, mem_flags);
+			   num_trbs, true, mem_flags);
 	if (ret)
 		return ret;
 
@@ -3481,7 +3486,7 @@  static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
 		reserved_trbs++;
 
 	ret = prepare_ring(xhci, xhci->cmd_ring, EP_STATE_RUNNING,
-			reserved_trbs, GFP_ATOMIC);
+			reserved_trbs, false, GFP_ATOMIC);
 	if (ret < 0) {
 		xhci_err(xhci, "ERR: No room for command on command ring\n");
 		if (command_must_succeed)
@@ -3489,8 +3494,8 @@  static int queue_command(struct xhci_hcd *xhci, u32 field1, u32 field2,
 					"unfailable commands failed.\n");
 		return ret;
 	}
-	queue_trb(xhci, xhci->cmd_ring, false, false, field1, field2, field3,
-			field4 | xhci->cmd_ring->cycle_state);
+	queue_trb(xhci, xhci->cmd_ring, false, false, false, field1, field2,
+			field3,	field4 | xhci->cmd_ring->cycle_state);
 	return 0;
 }
 
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index d8bbf5c..8a98416 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1311,6 +1311,7 @@  struct xhci_hcd {
 #define XHCI_EP_LIMIT_QUIRK	(1 << 5)
 #define XHCI_BROKEN_MSI		(1 << 6)
 #define XHCI_RESET_ON_RESUME	(1 << 7)
+#define XHCI_AMD_0x96_HOST	(1 << 9)
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
 	/* There are two roothubs to keep track of bus suspend info for */