diff mbox

xhci: fix missing keys from keyboard

Message ID 1461873789-9269-1-git-send-email-nikunj@linux.vnet.ibm.com
State Superseded
Headers show

Commit Message

Nikunj A Dadhania April 28, 2016, 8:03 p.m. UTC
Current handling of the keyboard polling was very slow and
keys were getting dropped. Done following for fixing this:

* Use multiple buffers per TRB
* Allocate buffers in xhci according to the number of TRBS.

This reduces the delay of key? processing by getting rid of wait in 
the polling routine. 

Reported-by: Dinar Valeev <k0da@opensuse.org>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---

Dinar, can you give this patch a test as well, logic is same, 
some polishing and got rid of magic numbers.

 lib/libusb/usb-xhci.c | 52 +++++++++++++++++++++++++++++++++++++++------------
 lib/libusb/usb-xhci.h |  2 ++
 2 files changed, 42 insertions(+), 12 deletions(-)

Comments

Thomas Huth April 29, 2016, 7:24 a.m. UTC | #1
On 28.04.2016 22:03, Nikunj A Dadhania wrote:
> Current handling of the keyboard polling was very slow and
> keys were getting dropped. Done following for fixing this:
> 
> * Use multiple buffers per TRB
> * Allocate buffers in xhci according to the number of TRBS.
> 
> This reduces the delay of key? processing by getting rid of wait in 
> the polling routine. 
> 
> Reported-by: Dinar Valeev <k0da@opensuse.org>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
> 
> Dinar, can you give this patch a test as well, logic is same, 
> some polishing and got rid of magic numbers.
> 
>  lib/libusb/usb-xhci.c | 52 +++++++++++++++++++++++++++++++++++++++------------
>  lib/libusb/usb-xhci.h |  2 ++
>  2 files changed, 42 insertions(+), 12 deletions(-)

I just tried your patch, and this fixes my issue with XHCI keyboard as
well! Thanks!

Tested-by: Thomas Huth <thuth@redhat.com>
Dinar Valeev April 29, 2016, 11:58 a.m. UTC | #2
Tested-by: Dinar Valeev <dvaleev@suse.com>

On Thu, Apr 28, 2016 at 10:03 PM, Nikunj A Dadhania
<nikunj@linux.vnet.ibm.com> wrote:
> Current handling of the keyboard polling was very slow and
> keys were getting dropped. Done following for fixing this:
>
> * Use multiple buffers per TRB
> * Allocate buffers in xhci according to the number of TRBS.
>
> This reduces the delay of key? processing by getting rid of wait in
> the polling routine.
>
> Reported-by: Dinar Valeev <k0da@opensuse.org>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>
> Dinar, can you give this patch a test as well, logic is same,
> some polishing and got rid of magic numbers.
>
>  lib/libusb/usb-xhci.c | 52 +++++++++++++++++++++++++++++++++++++++------------
>  lib/libusb/usb-xhci.h |  2 ++
>  2 files changed, 42 insertions(+), 12 deletions(-)
>
> diff --git a/lib/libusb/usb-xhci.c b/lib/libusb/usb-xhci.c
> index 858cd12..70619c9 100644
> --- a/lib/libusb/usb-xhci.c
> +++ b/lib/libusb/usb-xhci.c
> @@ -238,7 +238,7 @@ static uint64_t xhci_poll_event(struct xhci_hcd *xhcd,
>         flags = le32_to_cpu(event->flags);
>
>         dprintf("Reading from event ptr %p %08x\n", event, flags);
> -       time = SLOF_GetTimer() + USB_TIMEOUT;
> +       time = SLOF_GetTimer() + ((event_type == XHCI_POLL_NO_WAIT)? 0: USB_TIMEOUT);
>
>         while ((flags & TRB_CYCLE_STATE) != xhcd->ering.cycle_state) {
>                 mb();
> @@ -1147,11 +1147,37 @@ static inline void *xhci_get_trb(struct xhci_seg *seg)
>         return (void *)enq;
>  }
>
> +static inline void *xhci_get_trb_deq(struct xhci_seg *seg)
> +{
> +       uint64_t val, deq;
> +       int index;
> +
> +       deq = val = seg->deq;
> +       val = val + XHCI_TRB_SIZE;
> +       index = (deq - (uint64_t)seg->trbs) / XHCI_TRB_SIZE + 1;
> +       dprintf("%s: enq %llx, val %llx %x\n", __func__, enq, val, index);
> +       /* TRBs being a cyclic buffer, here we cycle back to beginning. */
> +       if (index == (seg->size - 1)) {
> +               dprintf("%s: rounding \n", __func__);
> +               seg->deq = (uint64_t)seg->trbs;
> +               mb();
> +       }
> +       else {
> +               seg->deq = val;
> +       }
> +       return (void *)deq;
> +}
> +
>  static uint64_t xhci_get_trb_phys(struct xhci_seg *seg, uint64_t trb)
>  {
>         return seg->trbs_dma + (trb - (uint64_t)seg->trbs);
>  }
>
> +static uint32_t xhci_trb_get_index(struct xhci_seg *seg, struct xhci_transfer_trb *trb)
> +{
> +       return trb - (struct xhci_transfer_trb *)seg->trbs;
> +}
> +
>  static int usb_kb = false;
>  static int xhci_transfer_bulk(struct usb_pipe *pipe, void *td, void *td_phys,
>                         void *data, int datalen)
> @@ -1331,9 +1357,9 @@ static int xhci_get_pipe_intr(struct usb_pipe *pipe,
>                 xhci_init_seg(seg, XHCI_EVENT_TRBS_SIZE, TYPE_BULK);
>         }
>
> -       xpipe->buf = buf;
> -       xpipe->buf_phys = SLOF_dma_map_in(buf, len, false);
> -       xpipe->buflen = len;
> +       xpipe->buflen = pipe->mps * XHCI_INTR_TRBS_SIZE/(sizeof(struct xhci_transfer_trb));
> +       xpipe->buf = SLOF_dma_alloc(xpipe->buflen);
> +       xpipe->buf_phys = SLOF_dma_map_in(xpipe->buf, xpipe->buflen, false);
>
>         ctrl = xhci_get_control_ctx(&xdev->in_ctx);
>         x_epno = xhci_get_epno(pipe);
> @@ -1349,7 +1375,8 @@ static int xhci_get_pipe_intr(struct usb_pipe *pipe,
>         xpipe->seg = seg;
>
>         trb = xhci_get_trb(seg);
> -       fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
> +       buf = (char *)(xpipe->buf_phys + xhci_trb_get_index(seg, trb) * pipe->mps);
> +       fill_normal_trb(trb, (void *)buf, pipe->mps);
>         return true;
>  }
>
> @@ -1411,6 +1438,7 @@ static void xhci_put_pipe(struct usb_pipe *pipe)
>         } else if (pipe->type == USB_EP_TYPE_INTR) {
>                 xpipe = xhci_pipe_get_xpipe(pipe);
>                 SLOF_dma_map_out(xpipe->buf_phys, xpipe->buf, xpipe->buflen);
> +               SLOF_dma_free(xpipe->buf, xpipe->buflen);
>                 xpipe->seg = NULL;
>         }
>         if (xhcd->end)
> @@ -1448,26 +1476,26 @@ static int xhci_poll_intr(struct usb_pipe *pipe, uint8_t *data)
>         if (usb_kb == true) {
>                 /* This event was consumed by bulk transfer */
>                 usb_kb = false;
> +               xhci_get_trb_deq(seg);
>                 goto skip_poll;
>         }
> -       buf = xpipe->buf;
> -       memset(buf, 0, 8);
>
> -       mb();
>         /* Ring the doorbell - x_epno */
>         dbr = xhcd->db_regs;
>         write_reg32(&dbr->db[xdev->slot_id], x_epno);
> -       if (!xhci_poll_event(xhcd, 0)) {
> -               printf("poll intr failed\n");
> +       if (!xhci_poll_event(xhcd, XHCI_POLL_NO_WAIT)) {
>                 return 0;
>         }
>         mb();
> +       trb = xhci_get_trb_deq(seg);
> +       buf = xpipe->buf + xhci_trb_get_index(seg, trb) * pipe->mps;
>         memcpy(data, buf, 8);
> +       memset(buf, 0, 8);
>
>  skip_poll:
>         trb = xhci_get_trb(seg);
> -       fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
> -       mb();
> +       buf = (uint8_t *)(xpipe->buf_phys + xhci_trb_get_index(seg, trb) * pipe->mps);
> +       fill_normal_trb(trb, (void *)buf, pipe->mps);
>         return ret;
>  }
>
> diff --git a/lib/libusb/usb-xhci.h b/lib/libusb/usb-xhci.h
> index 3fc7e78..8172a37 100644
> --- a/lib/libusb/usb-xhci.h
> +++ b/lib/libusb/usb-xhci.h
> @@ -269,6 +269,8 @@ struct xhci_seg {
>  #define XHCI_INTR_TRBS_SIZE    4096
>  #define XHCI_ERST_NUM_SEGS     1
>
> +#define XHCI_POLL_NO_WAIT      1
> +
>  #define XHCI_MAX_BULK_SIZE    0xF000
>
>  struct xhci_erst_entry {
> --
> 2.5.5
>
Alexey Kardashevskiy May 2, 2016, 3:39 a.m. UTC | #3
On 04/29/2016 06:03 AM, Nikunj A Dadhania wrote:
> Current handling of the keyboard polling was very slow and
> keys were getting dropped. Done following for fixing this:
>
> * Use multiple buffers per TRB
> * Allocate buffers in xhci according to the number of TRBS.
>
> This reduces the delay of key? processing by getting rid of wait in
> the polling routine.
>
> Reported-by: Dinar Valeev <k0da@opensuse.org>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>
> Dinar, can you give this patch a test as well, logic is same,
> some polishing and got rid of magic numbers.
>
>  lib/libusb/usb-xhci.c | 52 +++++++++++++++++++++++++++++++++++++++------------
>  lib/libusb/usb-xhci.h |  2 ++
>  2 files changed, 42 insertions(+), 12 deletions(-)
>
> diff --git a/lib/libusb/usb-xhci.c b/lib/libusb/usb-xhci.c
> index 858cd12..70619c9 100644
> --- a/lib/libusb/usb-xhci.c
> +++ b/lib/libusb/usb-xhci.c
> @@ -238,7 +238,7 @@ static uint64_t xhci_poll_event(struct xhci_hcd *xhcd,
>  	flags = le32_to_cpu(event->flags);
>
>  	dprintf("Reading from event ptr %p %08x\n", event, flags);
> -	time = SLOF_GetTimer() + USB_TIMEOUT;
> +	time = SLOF_GetTimer() + ((event_type == XHCI_POLL_NO_WAIT)? 0: USB_TIMEOUT);
>
>  	while ((flags & TRB_CYCLE_STATE) != xhcd->ering.cycle_state) {
>  		mb();
> @@ -1147,11 +1147,37 @@ static inline void *xhci_get_trb(struct xhci_seg *seg)
>  	return (void *)enq;
>  }
>
> +static inline void *xhci_get_trb_deq(struct xhci_seg *seg)
> +{
> +	uint64_t val, deq;
> +	int index;
> +
> +	deq = val = seg->deq;
> +	val = val + XHCI_TRB_SIZE;

Nit:

deq = seq->deq;
val = deq + XHCI_TRB_SIZE;

and s/val/deq_next/ ?

?


> +	index = (deq - (uint64_t)seg->trbs) / XHCI_TRB_SIZE + 1;
> +	dprintf("%s: enq %llx, val %llx %x\n", __func__, enq, val, index);
> +	/* TRBs being a cyclic buffer, here we cycle back to beginning. */
> +	if (index == (seg->size - 1)) {
> +		dprintf("%s: rounding \n", __func__);
> +		seg->deq = (uint64_t)seg->trbs;
> +		mb();


mb() just here, not in the "else" branch?


> +	}
> +	else {
> +		seg->deq = val;
> +	}
> +	return (void *)deq;
> +}
> +
>  static uint64_t xhci_get_trb_phys(struct xhci_seg *seg, uint64_t trb)
>  {
>  	return seg->trbs_dma + (trb - (uint64_t)seg->trbs);
>  }
>
> +static uint32_t xhci_trb_get_index(struct xhci_seg *seg, struct xhci_transfer_trb *trb)
> +{
> +	return trb - (struct xhci_transfer_trb *)seg->trbs;
> +}
> +
>  static int usb_kb = false;
>  static int xhci_transfer_bulk(struct usb_pipe *pipe, void *td, void *td_phys,
>  			void *data, int datalen)
> @@ -1331,9 +1357,9 @@ static int xhci_get_pipe_intr(struct usb_pipe *pipe,
>  		xhci_init_seg(seg, XHCI_EVENT_TRBS_SIZE, TYPE_BULK);
>  	}
>
> -	xpipe->buf = buf;
> -	xpipe->buf_phys = SLOF_dma_map_in(buf, len, false);
> -	xpipe->buflen = len;
> +	xpipe->buflen = pipe->mps * XHCI_INTR_TRBS_SIZE/(sizeof(struct xhci_transfer_trb));
> +	xpipe->buf = SLOF_dma_alloc(xpipe->buflen);
> +	xpipe->buf_phys = SLOF_dma_map_in(xpipe->buf, xpipe->buflen, false);
>
>  	ctrl = xhci_get_control_ctx(&xdev->in_ctx);
>  	x_epno = xhci_get_epno(pipe);
> @@ -1349,7 +1375,8 @@ static int xhci_get_pipe_intr(struct usb_pipe *pipe,
>  	xpipe->seg = seg;
>
>  	trb = xhci_get_trb(seg);
> -	fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
> +	buf = (char *)(xpipe->buf_phys + xhci_trb_get_index(seg, trb) * pipe->mps);
> +	fill_normal_trb(trb, (void *)buf, pipe->mps);
>  	return true;
>  }
>
> @@ -1411,6 +1438,7 @@ static void xhci_put_pipe(struct usb_pipe *pipe)
>  	} else if (pipe->type == USB_EP_TYPE_INTR) {
>  		xpipe = xhci_pipe_get_xpipe(pipe);
>  		SLOF_dma_map_out(xpipe->buf_phys, xpipe->buf, xpipe->buflen);
> +		SLOF_dma_free(xpipe->buf, xpipe->buflen);
>  		xpipe->seg = NULL;
>  	}
>  	if (xhcd->end)
> @@ -1448,26 +1476,26 @@ static int xhci_poll_intr(struct usb_pipe *pipe, uint8_t *data)
>  	if (usb_kb == true) {
>  		/* This event was consumed by bulk transfer */
>  		usb_kb = false;
> +		xhci_get_trb_deq(seg);
>  		goto skip_poll;
>  	}
> -	buf = xpipe->buf;
> -	memset(buf, 0, 8);
>
> -	mb();


the barrier is not needed anymore?


>  	/* Ring the doorbell - x_epno */
>  	dbr = xhcd->db_regs;
>  	write_reg32(&dbr->db[xdev->slot_id], x_epno);
> -	if (!xhci_poll_event(xhcd, 0)) {
> -		printf("poll intr failed\n");
> +	if (!xhci_poll_event(xhcd, XHCI_POLL_NO_WAIT)) {
>  		return 0;
>  	}
>  	mb();
> +	trb = xhci_get_trb_deq(seg);
> +	buf = xpipe->buf + xhci_trb_get_index(seg, trb) * pipe->mps;
>  	memcpy(data, buf, 8);
> +	memset(buf, 0, 8);
>
>  skip_poll:
>  	trb = xhci_get_trb(seg);
> -	fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
> -	mb();


and here?

tbh I do not understand what is going on here, just when I see moving mb(), 
I get slightly nervous, that's it :)



> +	buf = (uint8_t *)(xpipe->buf_phys + xhci_trb_get_index(seg, trb) * pipe->mps);
> +	fill_normal_trb(trb, (void *)buf, pipe->mps);
>  	return ret;
>  }
>
> diff --git a/lib/libusb/usb-xhci.h b/lib/libusb/usb-xhci.h
> index 3fc7e78..8172a37 100644
> --- a/lib/libusb/usb-xhci.h
> +++ b/lib/libusb/usb-xhci.h
> @@ -269,6 +269,8 @@ struct xhci_seg {
>  #define XHCI_INTR_TRBS_SIZE    4096
>  #define XHCI_ERST_NUM_SEGS     1
>
> +#define XHCI_POLL_NO_WAIT      1
> +
>  #define XHCI_MAX_BULK_SIZE    0xF000
>
>  struct xhci_erst_entry {
>
Nikunj A Dadhania May 2, 2016, 4:15 a.m. UTC | #4
Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> On 04/29/2016 06:03 AM, Nikunj A Dadhania wrote:
>> Current handling of the keyboard polling was very slow and
>> keys were getting dropped. Done following for fixing this:
>>
>> * Use multiple buffers per TRB
>> * Allocate buffers in xhci according to the number of TRBS.
>>
>> This reduces the delay of key? processing by getting rid of wait in
>> the polling routine.
>>
>> Reported-by: Dinar Valeev <k0da@opensuse.org>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>
>> Dinar, can you give this patch a test as well, logic is same,
>> some polishing and got rid of magic numbers.
>>
>>  lib/libusb/usb-xhci.c | 52 +++++++++++++++++++++++++++++++++++++++------------
>>  lib/libusb/usb-xhci.h |  2 ++
>>  2 files changed, 42 insertions(+), 12 deletions(-)
>>
>> diff --git a/lib/libusb/usb-xhci.c b/lib/libusb/usb-xhci.c
>> index 858cd12..70619c9 100644
>> --- a/lib/libusb/usb-xhci.c
>> +++ b/lib/libusb/usb-xhci.c
>> @@ -238,7 +238,7 @@ static uint64_t xhci_poll_event(struct xhci_hcd *xhcd,
>>  	flags = le32_to_cpu(event->flags);
>>
>>  	dprintf("Reading from event ptr %p %08x\n", event, flags);
>> -	time = SLOF_GetTimer() + USB_TIMEOUT;
>> +	time = SLOF_GetTimer() + ((event_type == XHCI_POLL_NO_WAIT)? 0: USB_TIMEOUT);
>>
>>  	while ((flags & TRB_CYCLE_STATE) != xhcd->ering.cycle_state) {
>>  		mb();
>> @@ -1147,11 +1147,37 @@ static inline void *xhci_get_trb(struct xhci_seg *seg)
>>  	return (void *)enq;
>>  }
>>
>> +static inline void *xhci_get_trb_deq(struct xhci_seg *seg)
>> +{
>> +	uint64_t val, deq;
>> +	int index;
>> +
>> +	deq = val = seg->deq;
>> +	val = val + XHCI_TRB_SIZE;
>
> Nit:
>
> deq = seq->deq;
> val = deq + XHCI_TRB_SIZE;
>
> and s/val/deq_next/ ?
>
> ?

Sure, reads better.

>
>
>> +	index = (deq - (uint64_t)seg->trbs) / XHCI_TRB_SIZE + 1;
>> +	dprintf("%s: enq %llx, val %llx %x\n", __func__, enq, val, index);
>> +	/* TRBs being a cyclic buffer, here we cycle back to beginning. */
>> +	if (index == (seg->size - 1)) {
>> +		dprintf("%s: rounding \n", __func__);
>> +		seg->deq = (uint64_t)seg->trbs;
>> +		mb();
>
>
> mb() just here, not in the "else" branch?

Actually not required here as well, its only local structure variable change.

>> +	}
>> +	else {
>> +		seg->deq = val;
>> +	}
>> +	return (void *)deq;
>> +}
>> +
>>  static uint64_t xhci_get_trb_phys(struct xhci_seg *seg, uint64_t trb)
>>  {
>>  	return seg->trbs_dma + (trb - (uint64_t)seg->trbs);
>>  }
>>
>> +static uint32_t xhci_trb_get_index(struct xhci_seg *seg, struct xhci_transfer_trb *trb)
>> +{
>> +	return trb - (struct xhci_transfer_trb *)seg->trbs;
>> +}
>> +
>>  static int usb_kb = false;
>>  static int xhci_transfer_bulk(struct usb_pipe *pipe, void *td, void *td_phys,
>>  			void *data, int datalen)
>> @@ -1331,9 +1357,9 @@ static int xhci_get_pipe_intr(struct usb_pipe *pipe,
>>  		xhci_init_seg(seg, XHCI_EVENT_TRBS_SIZE, TYPE_BULK);
>>  	}
>>
>> -	xpipe->buf = buf;
>> -	xpipe->buf_phys = SLOF_dma_map_in(buf, len, false);
>> -	xpipe->buflen = len;
>> +	xpipe->buflen = pipe->mps * XHCI_INTR_TRBS_SIZE/(sizeof(struct xhci_transfer_trb));
>> +	xpipe->buf = SLOF_dma_alloc(xpipe->buflen);
>> +	xpipe->buf_phys = SLOF_dma_map_in(xpipe->buf, xpipe->buflen, false);
>>
>>  	ctrl = xhci_get_control_ctx(&xdev->in_ctx);
>>  	x_epno = xhci_get_epno(pipe);
>> @@ -1349,7 +1375,8 @@ static int xhci_get_pipe_intr(struct usb_pipe *pipe,
>>  	xpipe->seg = seg;
>>
>>  	trb = xhci_get_trb(seg);
>> -	fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
>> +	buf = (char *)(xpipe->buf_phys + xhci_trb_get_index(seg, trb) * pipe->mps);
>> +	fill_normal_trb(trb, (void *)buf, pipe->mps);
>>  	return true;
>>  }
>>
>> @@ -1411,6 +1438,7 @@ static void xhci_put_pipe(struct usb_pipe *pipe)
>>  	} else if (pipe->type == USB_EP_TYPE_INTR) {
>>  		xpipe = xhci_pipe_get_xpipe(pipe);
>>  		SLOF_dma_map_out(xpipe->buf_phys, xpipe->buf, xpipe->buflen);
>> +		SLOF_dma_free(xpipe->buf, xpipe->buflen);
>>  		xpipe->seg = NULL;
>>  	}
>>  	if (xhcd->end)
>> @@ -1448,26 +1476,26 @@ static int xhci_poll_intr(struct usb_pipe *pipe, uint8_t *data)
>>  	if (usb_kb == true) {
>>  		/* This event was consumed by bulk transfer */
>>  		usb_kb = false;
>> +		xhci_get_trb_deq(seg);
>>  		goto skip_poll;
>>  	}
>> -	buf = xpipe->buf;
>> -	memset(buf, 0, 8);
>>
>> -	mb();
>
>
> the barrier is not needed anymore?

No, as i have got rid of above memset. Moreover there is an mb() in
write_reg32, and in the beginning of xhci_poll_event();

>>  	/* Ring the doorbell - x_epno */
>>  	dbr = xhcd->db_regs;
>>  	write_reg32(&dbr->db[xdev->slot_id], x_epno);
>> -	if (!xhci_poll_event(xhcd, 0)) {
>> -		printf("poll intr failed\n");
>> +	if (!xhci_poll_event(xhcd, XHCI_POLL_NO_WAIT)) {
>>  		return 0;
>>  	}
>>  	mb();
>> +	trb = xhci_get_trb_deq(seg);
>> +	buf = xpipe->buf + xhci_trb_get_index(seg, trb) * pipe->mps;
>>  	memcpy(data, buf, 8);
>> +	memset(buf, 0, 8);
>>
>>  skip_poll:
>>  	trb = xhci_get_trb(seg);
>> -	fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
>> -	mb();
>
>
> and here?

Good catch, i thought that there is a mb() in fill_trb_buff(). That was
the reason i had removed it. Will send a separate patch adding mb() in
fill_trb_buff();

>
> tbh I do not understand what is going on here, just when I see moving mb(), 
> I get slightly nervous, that's it :)

Thanks for pointing it out.

Regards
Nikunj
diff mbox

Patch

diff --git a/lib/libusb/usb-xhci.c b/lib/libusb/usb-xhci.c
index 858cd12..70619c9 100644
--- a/lib/libusb/usb-xhci.c
+++ b/lib/libusb/usb-xhci.c
@@ -238,7 +238,7 @@  static uint64_t xhci_poll_event(struct xhci_hcd *xhcd,
 	flags = le32_to_cpu(event->flags);
 
 	dprintf("Reading from event ptr %p %08x\n", event, flags);
-	time = SLOF_GetTimer() + USB_TIMEOUT;
+	time = SLOF_GetTimer() + ((event_type == XHCI_POLL_NO_WAIT)? 0: USB_TIMEOUT);
 
 	while ((flags & TRB_CYCLE_STATE) != xhcd->ering.cycle_state) {
 		mb();
@@ -1147,11 +1147,37 @@  static inline void *xhci_get_trb(struct xhci_seg *seg)
 	return (void *)enq;
 }
 
+static inline void *xhci_get_trb_deq(struct xhci_seg *seg)
+{
+	uint64_t val, deq;
+	int index;
+
+	deq = val = seg->deq;
+	val = val + XHCI_TRB_SIZE;
+	index = (deq - (uint64_t)seg->trbs) / XHCI_TRB_SIZE + 1;
+	dprintf("%s: enq %llx, val %llx %x\n", __func__, enq, val, index);
+	/* TRBs being a cyclic buffer, here we cycle back to beginning. */
+	if (index == (seg->size - 1)) {
+		dprintf("%s: rounding \n", __func__);
+		seg->deq = (uint64_t)seg->trbs;
+		mb();
+	}
+	else {
+		seg->deq = val;
+	}
+	return (void *)deq;
+}
+
 static uint64_t xhci_get_trb_phys(struct xhci_seg *seg, uint64_t trb)
 {
 	return seg->trbs_dma + (trb - (uint64_t)seg->trbs);
 }
 
+static uint32_t xhci_trb_get_index(struct xhci_seg *seg, struct xhci_transfer_trb *trb)
+{
+	return trb - (struct xhci_transfer_trb *)seg->trbs;
+}
+
 static int usb_kb = false;
 static int xhci_transfer_bulk(struct usb_pipe *pipe, void *td, void *td_phys,
 			void *data, int datalen)
@@ -1331,9 +1357,9 @@  static int xhci_get_pipe_intr(struct usb_pipe *pipe,
 		xhci_init_seg(seg, XHCI_EVENT_TRBS_SIZE, TYPE_BULK);
 	}
 
-	xpipe->buf = buf;
-	xpipe->buf_phys = SLOF_dma_map_in(buf, len, false);
-	xpipe->buflen = len;
+	xpipe->buflen = pipe->mps * XHCI_INTR_TRBS_SIZE/(sizeof(struct xhci_transfer_trb));
+	xpipe->buf = SLOF_dma_alloc(xpipe->buflen);
+	xpipe->buf_phys = SLOF_dma_map_in(xpipe->buf, xpipe->buflen, false);
 
 	ctrl = xhci_get_control_ctx(&xdev->in_ctx);
 	x_epno = xhci_get_epno(pipe);
@@ -1349,7 +1375,8 @@  static int xhci_get_pipe_intr(struct usb_pipe *pipe,
 	xpipe->seg = seg;
 
 	trb = xhci_get_trb(seg);
-	fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
+	buf = (char *)(xpipe->buf_phys + xhci_trb_get_index(seg, trb) * pipe->mps);
+	fill_normal_trb(trb, (void *)buf, pipe->mps);
 	return true;
 }
 
@@ -1411,6 +1438,7 @@  static void xhci_put_pipe(struct usb_pipe *pipe)
 	} else if (pipe->type == USB_EP_TYPE_INTR) {
 		xpipe = xhci_pipe_get_xpipe(pipe);
 		SLOF_dma_map_out(xpipe->buf_phys, xpipe->buf, xpipe->buflen);
+		SLOF_dma_free(xpipe->buf, xpipe->buflen);
 		xpipe->seg = NULL;
 	}
 	if (xhcd->end)
@@ -1448,26 +1476,26 @@  static int xhci_poll_intr(struct usb_pipe *pipe, uint8_t *data)
 	if (usb_kb == true) {
 		/* This event was consumed by bulk transfer */
 		usb_kb = false;
+		xhci_get_trb_deq(seg);
 		goto skip_poll;
 	}
-	buf = xpipe->buf;
-	memset(buf, 0, 8);
 
-	mb();
 	/* Ring the doorbell - x_epno */
 	dbr = xhcd->db_regs;
 	write_reg32(&dbr->db[xdev->slot_id], x_epno);
-	if (!xhci_poll_event(xhcd, 0)) {
-		printf("poll intr failed\n");
+	if (!xhci_poll_event(xhcd, XHCI_POLL_NO_WAIT)) {
 		return 0;
 	}
 	mb();
+	trb = xhci_get_trb_deq(seg);
+	buf = xpipe->buf + xhci_trb_get_index(seg, trb) * pipe->mps;
 	memcpy(data, buf, 8);
+	memset(buf, 0, 8);
 
 skip_poll:
 	trb = xhci_get_trb(seg);
-	fill_normal_trb(trb, (void *)xpipe->buf_phys, pipe->mps);
-	mb();
+	buf = (uint8_t *)(xpipe->buf_phys + xhci_trb_get_index(seg, trb) * pipe->mps);
+	fill_normal_trb(trb, (void *)buf, pipe->mps);
 	return ret;
 }
 
diff --git a/lib/libusb/usb-xhci.h b/lib/libusb/usb-xhci.h
index 3fc7e78..8172a37 100644
--- a/lib/libusb/usb-xhci.h
+++ b/lib/libusb/usb-xhci.h
@@ -269,6 +269,8 @@  struct xhci_seg {
 #define XHCI_INTR_TRBS_SIZE    4096
 #define XHCI_ERST_NUM_SEGS     1
 
+#define XHCI_POLL_NO_WAIT      1
+
 #define XHCI_MAX_BULK_SIZE    0xF000
 
 struct xhci_erst_entry {