diff mbox

[net-next,v3,2/9] xen-netback: Change TX path from grant copy to mapping

Message ID 1389139818-24458-3-git-send-email-zoltan.kiss@citrix.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Zoltan Kiss Jan. 8, 2014, 12:10 a.m. UTC
This patch changes the grant copy on the TX patch to grant mapping

v2:
- delete branch for handling fragmented packets fit PKT_PROT_LEN sized first
  request
- mark the effect of using ballooned pages in a comment
- place setting of skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY right
  before netif_receive_skb, and mark the importance of it
- grab dealloc_lock before __napi_complete to avoid contention with the
  callback's napi_schedule
- handle fragmented packets where first request < PKT_PROT_LEN
- fix up error path when checksum_setup failed
- check before teardown for pending grants, and start complain if they are
  there after 10 second

v3:
- delete a surplus checking from tx_action
- remove stray line
- squash xenvif_idx_unmap changes into the first patch
- init spinlocks
- call map hypercall directly instead of gnttab_map_refs()
- fix unmapping timeout in xenvif_free()

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
---
 drivers/net/xen-netback/interface.c |   57 +++++++-
 drivers/net/xen-netback/netback.c   |  251 ++++++++++++++---------------------
 2 files changed, 153 insertions(+), 155 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Wei Liu Jan. 9, 2014, 3:30 p.m. UTC | #1
On Wed, Jan 08, 2014 at 12:10:11AM +0000, Zoltan Kiss wrote:
> This patch changes the grant copy on the TX patch to grant mapping
> 
> v2:
> - delete branch for handling fragmented packets fit PKT_PROT_LEN sized first
>   request
> - mark the effect of using ballooned pages in a comment
> - place setting of skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY right
>   before netif_receive_skb, and mark the importance of it
> - grab dealloc_lock before __napi_complete to avoid contention with the
>   callback's napi_schedule
> - handle fragmented packets where first request < PKT_PROT_LEN
> - fix up error path when checksum_setup failed
> - check before teardown for pending grants, and start complain if they are
>   there after 10 second
> 
> v3:
> - delete a surplus checking from tx_action
> - remove stray line
> - squash xenvif_idx_unmap changes into the first patch
> - init spinlocks
> - call map hypercall directly instead of gnttab_map_refs()

I suppose this is to avoid touching m2p override as well, just as
previous patch uses unmap hypercall directly.

> - fix unmapping timeout in xenvif_free()
> 
> Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
> ---
>  drivers/net/xen-netback/interface.c |   57 +++++++-
>  drivers/net/xen-netback/netback.c   |  251 ++++++++++++++---------------------
>  2 files changed, 153 insertions(+), 155 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
> index 7170f97..3b2b249 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -122,7 +122,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  	BUG_ON(skb->dev != dev);
>  
>  	/* Drop the packet if vif is not ready */
> -	if (vif->task == NULL || !xenvif_schedulable(vif))
> +	if (vif->task == NULL ||
> +		vif->dealloc_task == NULL ||
> +		!xenvif_schedulable(vif))

Indentation.

>  		goto drop;
>  
>  	/* At best we'll need one slot for the header and one for each
> @@ -345,8 +347,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
>  	vif->pending_prod = MAX_PENDING_REQS;
>  	for (i = 0; i < MAX_PENDING_REQS; i++)
>  		vif->pending_ring[i] = i;
> -	for (i = 0; i < MAX_PENDING_REQS; i++)
> -		vif->mmap_pages[i] = NULL;
> +	spin_lock_init(&vif->dealloc_lock);
> +	spin_lock_init(&vif->response_lock);
> +	/* If ballooning is disabled, this will consume real memory, so you
> +	 * better enable it. The long term solution would be to use just a
> +	 * bunch of valid page descriptors, without dependency on ballooning
> +	 */
> +	err = alloc_xenballooned_pages(MAX_PENDING_REQS,
> +		vif->mmap_pages,
> +		false);

Ditto.

> +	if (err) {
> +		netdev_err(dev, "Could not reserve mmap_pages\n");
> +		return NULL;
> +	}
> +	for (i = 0; i < MAX_PENDING_REQS; i++) {
> +		vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
> +			{ .callback = xenvif_zerocopy_callback,
> +			  .ctx = NULL,
> +			  .desc = i };
> +		vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
> +	}
>  
>  	/*
>  	 * Initialise a dummy MAC address. We choose the numerically
> @@ -390,6 +410,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
>  		goto err;
>  
>  	init_waitqueue_head(&vif->wq);
> +	init_waitqueue_head(&vif->dealloc_wq);
>  
>  	if (tx_evtchn == rx_evtchn) {
>  		/* feature-split-event-channels == 0 */
> @@ -431,6 +452,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
>  		goto err_rx_unbind;
>  	}
>  
> +	vif->dealloc_task = kthread_create(xenvif_dealloc_kthread,
> +				   (void *)vif, "%s-dealloc", vif->dev->name);

Ditto.

> +	if (IS_ERR(vif->dealloc_task)) {
> +		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
> +		err = PTR_ERR(vif->dealloc_task);
> +		goto err_rx_unbind;
> +	}
> +
>  	vif->task = task;
>  
>  	rtnl_lock();
[...]
>  
>  static int xenvif_tx_check_gop(struct xenvif *vif,
>  			       struct sk_buff *skb,
> -			       struct gnttab_copy **gopp)
> +			       struct gnttab_map_grant_ref **gopp)
>  {
> -	struct gnttab_copy *gop = *gopp;
> +	struct gnttab_map_grant_ref *gop = *gopp;
>  	u16 pending_idx = *((u16 *)skb->data);
>  	struct skb_shared_info *shinfo = skb_shinfo(skb);
>  	struct pending_tx_info *tx_info;
> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
>  	err = gop->status;
>  	if (unlikely(err))
>  		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
> +	else {
> +		if (vif->grant_tx_handle[pending_idx] !=
> +			NETBACK_INVALID_HANDLE) {
> +			netdev_err(vif->dev,
> +				"Stale mapped handle! pending_idx %x handle %x\n",
> +				pending_idx, vif->grant_tx_handle[pending_idx]);
> +			BUG();
> +		}
> +		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
> +			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));

What happens when you don't have this?

> +		vif->grant_tx_handle[pending_idx] = gop->handle;
> +	}
>  
>  	/* Skip first skb fragment if it is on same page as header fragment. */
>  	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
> @@ -933,18 +877,26 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
>  		head = tx_info->head;
>  
[...]
>  		}
> @@ -1567,7 +1523,11 @@ static int xenvif_tx_submit(struct xenvif *vif)
>  		else if (txp->flags & XEN_NETTXF_data_validated)
>  			skb->ip_summed = CHECKSUM_UNNECESSARY;
>  
> -		xenvif_fill_frags(vif, skb);
> +		xenvif_fill_frags(vif,
> +			skb,
> +			skb_shinfo(skb)->destructor_arg ?
> +					pending_idx :
> +					INVALID_PENDING_IDX);
>  

Indentation.

>  		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
>  			int target = min_t(int, skb->len, PKT_PROT_LEN);
> @@ -1581,6 +1541,8 @@ static int xenvif_tx_submit(struct xenvif *vif)
>  		if (checksum_setup(vif, skb)) {
>  			netdev_dbg(vif->dev,
>  				   "Can't setup checksum in net_tx_action\n");
> +			if (skb_shinfo(skb)->destructor_arg)
> +				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;

Do you still care setting the flag even if this skb is not going to be
delivered? If so can you state clearly the reason just like the
following hunk?

>  			kfree_skb(skb);
>  			continue;
>  		}
> @@ -1606,6 +1568,14 @@ static int xenvif_tx_submit(struct xenvif *vif)
>  
>  		work_done++;
>  
> +		/* Set this flag right before netif_receive_skb, otherwise
> +		 * someone might think this packet already left netback, and
> +		 * do a skb_copy_ubufs while we are still in control of the
> +		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
> +		 */
> +		if (skb_shinfo(skb)->destructor_arg)
> +			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
> +
>  		netif_receive_skb(skb);
>  	}
>  
> @@ -1715,7 +1685,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
>  int xenvif_tx_action(struct xenvif *vif, int budget)
>  {
>  	unsigned nr_gops;
> -	int work_done;
> +	int work_done, ret;
>  
>  	if (unlikely(!tx_work_todo(vif)))
>  		return 0;
> @@ -1725,7 +1695,10 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
>  	if (nr_gops == 0)
>  		return 0;
>  
> -	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
> +	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
> +			vif->tx_map_ops,
> +			nr_gops);

Why do you need to replace gnttab_batch_copy with hypercall? In the
ideal situation gnttab_batch_copy should behave the same as directly
hypercall but it also handles GNTST_eagain for you.

Wei.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zoltan Kiss Jan. 10, 2014, 11:35 a.m. UTC | #2
On 09/01/14 15:30, Wei Liu wrote:
> On Wed, Jan 08, 2014 at 12:10:11AM +0000, Zoltan Kiss wrote:
>> v3:
>> - delete a surplus checking from tx_action
>> - remove stray line
>> - squash xenvif_idx_unmap changes into the first patch
>> - init spinlocks
>> - call map hypercall directly instead of gnttab_map_refs()
>
> I suppose this is to avoid touching m2p override as well, just as
> previous patch uses unmap hypercall directly.

Yes.

>> --- a/drivers/net/xen-netback/interface.c
>> +++ b/drivers/net/xen-netback/interface.c
>> @@ -122,7 +122,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
>>   	BUG_ON(skb->dev != dev);
>>
>>   	/* Drop the packet if vif is not ready */
>> -	if (vif->task == NULL || !xenvif_schedulable(vif))
>> +	if (vif->task == NULL ||
>> +		vif->dealloc_task == NULL ||
>> +		!xenvif_schedulable(vif))
>
> Indentation.
Fixed, and the later ones as well

>> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
>>   	err = gop->status;
>>   	if (unlikely(err))
>>   		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
>> +	else {
>> +		if (vif->grant_tx_handle[pending_idx] !=
>> +			NETBACK_INVALID_HANDLE) {
>> +			netdev_err(vif->dev,
>> +				"Stale mapped handle! pending_idx %x handle %x\n",
>> +				pending_idx, vif->grant_tx_handle[pending_idx]);
>> +			BUG();
>> +		}
>> +		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
>> +			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
>
> What happens when you don't have this?
Your frags will be filled with garbage. I don't understand exactly what 
this function does, someone might want to enlighten us? I've took it's 
usage from classic kernel.
Also, it might be worthwhile to check the return value and BUG if it's 
false, but I don't know what exactly that return value means.

>
>>   		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
>>   			int target = min_t(int, skb->len, PKT_PROT_LEN);
>> @@ -1581,6 +1541,8 @@ static int xenvif_tx_submit(struct xenvif *vif)
>>   		if (checksum_setup(vif, skb)) {
>>   			netdev_dbg(vif->dev,
>>   				   "Can't setup checksum in net_tx_action\n");
>> +			if (skb_shinfo(skb)->destructor_arg)
>> +				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
>
> Do you still care setting the flag even if this skb is not going to be
> delivered? If so can you state clearly the reason just like the
> following hunk?
Of course, otherwise the pages wouldn't be sent back to the guest. I've 
added a comment.

>> @@ -1715,7 +1685,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
>>   int xenvif_tx_action(struct xenvif *vif, int budget)
>>   {
>>   	unsigned nr_gops;
>> -	int work_done;
>> +	int work_done, ret;
>>
>>   	if (unlikely(!tx_work_todo(vif)))
>>   		return 0;
>> @@ -1725,7 +1695,10 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
>>   	if (nr_gops == 0)
>>   		return 0;
>>
>> -	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
>> +	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
>> +			vif->tx_map_ops,
>> +			nr_gops);
>
> Why do you need to replace gnttab_batch_copy with hypercall? In the
> ideal situation gnttab_batch_copy should behave the same as directly
> hypercall but it also handles GNTST_eagain for you.

I don't need gnttab_batch_copy at all, I'm using the grant mapping 
hypercall here.

Regards,

Zoli
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Jan. 10, 2014, 11:45 a.m. UTC | #3
On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
[...]
> 
> >>@@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
> >>  	err = gop->status;
> >>  	if (unlikely(err))
> >>  		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
> >>+	else {
> >>+		if (vif->grant_tx_handle[pending_idx] !=
> >>+			NETBACK_INVALID_HANDLE) {
> >>+			netdev_err(vif->dev,
> >>+				"Stale mapped handle! pending_idx %x handle %x\n",
> >>+				pending_idx, vif->grant_tx_handle[pending_idx]);
> >>+			BUG();
> >>+		}
> >>+		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
> >>+			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
> >
> >What happens when you don't have this?
> Your frags will be filled with garbage. I don't understand exactly
> what this function does, someone might want to enlighten us? I've
> took it's usage from classic kernel.
> Also, it might be worthwhile to check the return value and BUG if
> it's false, but I don't know what exactly that return value means.
> 

This is actually part of gnttab_map_refs. As you're using hypercall
directly this becomes very fragile.

So the right thing to do is to fix gnttab_map_refs.

> >
> >>  		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
> >>  			int target = min_t(int, skb->len, PKT_PROT_LEN);
> >>@@ -1581,6 +1541,8 @@ static int xenvif_tx_submit(struct xenvif *vif)
> >>  		if (checksum_setup(vif, skb)) {
> >>  			netdev_dbg(vif->dev,
> >>  				   "Can't setup checksum in net_tx_action\n");
> >>+			if (skb_shinfo(skb)->destructor_arg)
> >>+				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
> >
> >Do you still care setting the flag even if this skb is not going to be
> >delivered? If so can you state clearly the reason just like the
> >following hunk?
> Of course, otherwise the pages wouldn't be sent back to the guest.
> I've added a comment.
> 

OK, Thanks! That means whenever SKB leaves netback we need to add this
flag.

> >>@@ -1715,7 +1685,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
> >>  int xenvif_tx_action(struct xenvif *vif, int budget)
> >>  {
> >>  	unsigned nr_gops;
> >>-	int work_done;
> >>+	int work_done, ret;
> >>
> >>  	if (unlikely(!tx_work_todo(vif)))
> >>  		return 0;
> >>@@ -1725,7 +1695,10 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
> >>  	if (nr_gops == 0)
> >>  		return 0;
> >>
> >>-	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
> >>+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
> >>+			vif->tx_map_ops,
> >>+			nr_gops);
> >
> >Why do you need to replace gnttab_batch_copy with hypercall? In the
> >ideal situation gnttab_batch_copy should behave the same as directly
> >hypercall but it also handles GNTST_eagain for you.
> 
> I don't need gnttab_batch_copy at all, I'm using the grant mapping
> hypercall here.
> 

Oops, my bad! Ignore that one.

Wei.

> Regards,
> 
> Zoli
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Jan. 10, 2014, 1:16 p.m. UTC | #4
On Fri, Jan 10, 2014 at 11:45:34AM +0000, Wei Liu wrote:
> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
> [...]
> > 
> > >>@@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
> > >>  	err = gop->status;
> > >>  	if (unlikely(err))
> > >>  		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
> > >>+	else {
> > >>+		if (vif->grant_tx_handle[pending_idx] !=
> > >>+			NETBACK_INVALID_HANDLE) {
> > >>+			netdev_err(vif->dev,
> > >>+				"Stale mapped handle! pending_idx %x handle %x\n",
> > >>+				pending_idx, vif->grant_tx_handle[pending_idx]);
> > >>+			BUG();
> > >>+		}
> > >>+		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
> > >>+			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
> > >
> > >What happens when you don't have this?
> > Your frags will be filled with garbage. I don't understand exactly
> > what this function does, someone might want to enlighten us? I've
> > took it's usage from classic kernel.
> > Also, it might be worthwhile to check the return value and BUG if
> > it's false, but I don't know what exactly that return value means.
> > 
> 
> This is actually part of gnttab_map_refs. As you're using hypercall
> directly this becomes very fragile.
> 

To make it clear, set_phys_to_machine is done within m2p_add_override.

Wei.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zoltan Kiss Jan. 10, 2014, 3:24 p.m. UTC | #5
On 10/01/14 11:45, Wei Liu wrote:
> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
> [...]
>>
>>>> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
>>>>   	err = gop->status;
>>>>   	if (unlikely(err))
>>>>   		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
>>>> +	else {
>>>> +		if (vif->grant_tx_handle[pending_idx] !=
>>>> +			NETBACK_INVALID_HANDLE) {
>>>> +			netdev_err(vif->dev,
>>>> +				"Stale mapped handle! pending_idx %x handle %x\n",
>>>> +				pending_idx, vif->grant_tx_handle[pending_idx]);
>>>> +			BUG();
>>>> +		}
>>>> +		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
>>>> +			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
>>>
>>> What happens when you don't have this?
>> Your frags will be filled with garbage. I don't understand exactly
>> what this function does, someone might want to enlighten us? I've
>> took it's usage from classic kernel.
>> Also, it might be worthwhile to check the return value and BUG if
>> it's false, but I don't know what exactly that return value means.
>>
>
> This is actually part of gnttab_map_refs. As you're using hypercall
> directly this becomes very fragile.
>
> So the right thing to do is to fix gnttab_map_refs.
I agree, as I mentioned in other email in this thread, I think that 
should be the topic of an another patchseries. In the meantime, I will 
use gnttab_batch_map instead of the direct hypercall, it handles the 
GNTST_eagain scenario, and I will use set_phys_to_machine the same way 
as m2p_override does:

if (unlikely(!set_phys_to_machine(idx_to_pfn(vif, pending_idx), 
FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT))
			BUG();

Zoli
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Vrabel Jan. 10, 2014, 4:02 p.m. UTC | #6
On 10/01/14 15:24, Zoltan Kiss wrote:
> On 10/01/14 11:45, Wei Liu wrote:
>> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
>> [...]
>>>
>>>>> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif
>>>>> *vif,
>>>>>       err = gop->status;
>>>>>       if (unlikely(err))
>>>>>           xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
>>>>> +    else {
>>>>> +        if (vif->grant_tx_handle[pending_idx] !=
>>>>> +            NETBACK_INVALID_HANDLE) {
>>>>> +            netdev_err(vif->dev,
>>>>> +                "Stale mapped handle! pending_idx %x handle %x\n",
>>>>> +                pending_idx, vif->grant_tx_handle[pending_idx]);
>>>>> +            BUG();
>>>>> +        }
>>>>> +        set_phys_to_machine(idx_to_pfn(vif, pending_idx),
>>>>> +            FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
>>>>
>>>> What happens when you don't have this?
>>> Your frags will be filled with garbage. I don't understand exactly
>>> what this function does, someone might want to enlighten us? I've
>>> took it's usage from classic kernel.
>>> Also, it might be worthwhile to check the return value and BUG if
>>> it's false, but I don't know what exactly that return value means.
>>>
>>
>> This is actually part of gnttab_map_refs. As you're using hypercall
>> directly this becomes very fragile.
>>
>> So the right thing to do is to fix gnttab_map_refs.
> I agree, as I mentioned in other email in this thread, I think that
> should be the topic of an another patchseries. In the meantime, I will
> use gnttab_batch_map instead of the direct hypercall, it handles the
> GNTST_eagain scenario, and I will use set_phys_to_machine the same way
> as m2p_override does:

If the grant table code doesn't provide the API calls you need you can
either:

a) add the new API as a prerequisite patch.
b) use the existing API calls and live with the performance problem,
until you can refactor the API later on.

Adding a netback-specific hack isn't a valid option.

David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Liu Jan. 10, 2014, 4:08 p.m. UTC | #7
On Fri, Jan 10, 2014 at 04:02:20PM +0000, David Vrabel wrote:
> On 10/01/14 15:24, Zoltan Kiss wrote:
> > On 10/01/14 11:45, Wei Liu wrote:
> >> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
> >> [...]
> >>>
> >>>>> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif
> >>>>> *vif,
> >>>>>       err = gop->status;
> >>>>>       if (unlikely(err))
> >>>>>           xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
> >>>>> +    else {
> >>>>> +        if (vif->grant_tx_handle[pending_idx] !=
> >>>>> +            NETBACK_INVALID_HANDLE) {
> >>>>> +            netdev_err(vif->dev,
> >>>>> +                "Stale mapped handle! pending_idx %x handle %x\n",
> >>>>> +                pending_idx, vif->grant_tx_handle[pending_idx]);
> >>>>> +            BUG();
> >>>>> +        }
> >>>>> +        set_phys_to_machine(idx_to_pfn(vif, pending_idx),
> >>>>> +            FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
> >>>>
> >>>> What happens when you don't have this?
> >>> Your frags will be filled with garbage. I don't understand exactly
> >>> what this function does, someone might want to enlighten us? I've
> >>> took it's usage from classic kernel.
> >>> Also, it might be worthwhile to check the return value and BUG if
> >>> it's false, but I don't know what exactly that return value means.
> >>>
> >>
> >> This is actually part of gnttab_map_refs. As you're using hypercall
> >> directly this becomes very fragile.
> >>
> >> So the right thing to do is to fix gnttab_map_refs.
> > I agree, as I mentioned in other email in this thread, I think that
> > should be the topic of an another patchseries. In the meantime, I will
> > use gnttab_batch_map instead of the direct hypercall, it handles the
> > GNTST_eagain scenario, and I will use set_phys_to_machine the same way
> > as m2p_override does:
> 
> If the grant table code doesn't provide the API calls you need you can
> either:
> 
> a) add the new API as a prerequisite patch.
> b) use the existing API calls and live with the performance problem,
> until you can refactor the API later on.
> 
> Adding a netback-specific hack isn't a valid option.
> 

Agreed.

Wei.

> David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zoltan Kiss Jan. 12, 2014, 11:19 p.m. UTC | #8
On 10/01/14 16:02, David Vrabel wrote:
> On 10/01/14 15:24, Zoltan Kiss wrote:
> If the grant table code doesn't provide the API calls you need you can
> either:
>
> a) add the new API as a prerequisite patch.
> b) use the existing API calls and live with the performance problem,
> until you can refactor the API later on.
>
> Adding a netback-specific hack isn't a valid option.
>
> David

Ok, I've sent in the patch which does a)

Zoli
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 7170f97..3b2b249 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -122,7 +122,9 @@  static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	BUG_ON(skb->dev != dev);
 
 	/* Drop the packet if vif is not ready */
-	if (vif->task == NULL || !xenvif_schedulable(vif))
+	if (vif->task == NULL ||
+		vif->dealloc_task == NULL ||
+		!xenvif_schedulable(vif))
 		goto drop;
 
 	/* At best we'll need one slot for the header and one for each
@@ -345,8 +347,26 @@  struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->pending_prod = MAX_PENDING_REQS;
 	for (i = 0; i < MAX_PENDING_REQS; i++)
 		vif->pending_ring[i] = i;
-	for (i = 0; i < MAX_PENDING_REQS; i++)
-		vif->mmap_pages[i] = NULL;
+	spin_lock_init(&vif->dealloc_lock);
+	spin_lock_init(&vif->response_lock);
+	/* If ballooning is disabled, this will consume real memory, so you
+	 * better enable it. The long term solution would be to use just a
+	 * bunch of valid page descriptors, without dependency on ballooning
+	 */
+	err = alloc_xenballooned_pages(MAX_PENDING_REQS,
+		vif->mmap_pages,
+		false);
+	if (err) {
+		netdev_err(dev, "Could not reserve mmap_pages\n");
+		return NULL;
+	}
+	for (i = 0; i < MAX_PENDING_REQS; i++) {
+		vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
+			{ .callback = xenvif_zerocopy_callback,
+			  .ctx = NULL,
+			  .desc = i };
+		vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
+	}
 
 	/*
 	 * Initialise a dummy MAC address. We choose the numerically
@@ -390,6 +410,7 @@  int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 		goto err;
 
 	init_waitqueue_head(&vif->wq);
+	init_waitqueue_head(&vif->dealloc_wq);
 
 	if (tx_evtchn == rx_evtchn) {
 		/* feature-split-event-channels == 0 */
@@ -431,6 +452,14 @@  int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 		goto err_rx_unbind;
 	}
 
+	vif->dealloc_task = kthread_create(xenvif_dealloc_kthread,
+				   (void *)vif, "%s-dealloc", vif->dev->name);
+	if (IS_ERR(vif->dealloc_task)) {
+		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
+		err = PTR_ERR(vif->dealloc_task);
+		goto err_rx_unbind;
+	}
+
 	vif->task = task;
 
 	rtnl_lock();
@@ -443,6 +472,7 @@  int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 	rtnl_unlock();
 
 	wake_up_process(vif->task);
+	wake_up_process(vif->dealloc_task);
 
 	return 0;
 
@@ -480,6 +510,11 @@  void xenvif_disconnect(struct xenvif *vif)
 		vif->task = NULL;
 	}
 
+	if (vif->dealloc_task) {
+		kthread_stop(vif->dealloc_task);
+		vif->dealloc_task = NULL;
+	}
+
 	if (vif->tx_irq) {
 		if (vif->tx_irq == vif->rx_irq)
 			unbind_from_irqhandler(vif->tx_irq, vif);
@@ -495,6 +530,22 @@  void xenvif_disconnect(struct xenvif *vif)
 
 void xenvif_free(struct xenvif *vif)
 {
+	int i, unmap_timeout = 0;
+
+	for (i = 0; i < MAX_PENDING_REQS; ++i) {
+		if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
+			unmap_timeout++;
+			schedule_timeout(msecs_to_jiffies(1000));
+			if (unmap_timeout > 9 &&
+				net_ratelimit())
+				netdev_err(vif->dev,
+					"Page still granted! Index: %x\n", i);
+			i = -1;
+		}
+	}
+
+	free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
+
 	netif_napi_del(&vif->napi);
 
 	unregister_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 7c241f9..53d7e78 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -644,9 +644,12 @@  static void xenvif_tx_err(struct xenvif *vif,
 			  struct xen_netif_tx_request *txp, RING_IDX end)
 {
 	RING_IDX cons = vif->tx.req_cons;
+	unsigned long flags;
 
 	do {
+		spin_lock_irqsave(&vif->response_lock, flags);
 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
+		spin_unlock_irqrestore(&vif->response_lock, flags);
 		if (cons == end)
 			break;
 		txp = RING_GET_REQUEST(&vif->tx, cons++);
@@ -784,10 +787,10 @@  static inline void xenvif_tx_create_gop(struct xenvif *vif, u16 pending_idx,
 
 }
 
-static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
+static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
 					       struct sk_buff *skb,
 					       struct xen_netif_tx_request *txp,
-					       struct gnttab_copy *gop)
+					       struct gnttab_map_grant_ref *gop)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
@@ -808,83 +811,12 @@  static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
 	/* Skip first skb fragment if it is on same page as header fragment. */
 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
 
-	/* Coalesce tx requests, at this point the packet passed in
-	 * should be <= 64K. Any packets larger than 64K have been
-	 * handled in xenvif_count_requests().
-	 */
-	for (shinfo->nr_frags = slot = start; slot < nr_slots;
-	     shinfo->nr_frags++) {
-		struct pending_tx_info *pending_tx_info =
-			vif->pending_tx_info;
-
-		page = alloc_page(GFP_ATOMIC|__GFP_COLD);
-		if (!page)
-			goto err;
-
-		dst_offset = 0;
-		first = NULL;
-		while (dst_offset < PAGE_SIZE && slot < nr_slots) {
-			gop->flags = GNTCOPY_source_gref;
-
-			gop->source.u.ref = txp->gref;
-			gop->source.domid = vif->domid;
-			gop->source.offset = txp->offset;
-
-			gop->dest.domid = DOMID_SELF;
-
-			gop->dest.offset = dst_offset;
-			gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-
-			if (dst_offset + txp->size > PAGE_SIZE) {
-				/* This page can only merge a portion
-				 * of tx request. Do not increment any
-				 * pointer / counter here. The txp
-				 * will be dealt with in future
-				 * rounds, eventually hitting the
-				 * `else` branch.
-				 */
-				gop->len = PAGE_SIZE - dst_offset;
-				txp->offset += gop->len;
-				txp->size -= gop->len;
-				dst_offset += gop->len; /* quit loop */
-			} else {
-				/* This tx request can be merged in the page */
-				gop->len = txp->size;
-				dst_offset += gop->len;
-
+	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
+	     shinfo->nr_frags++, txp++, gop++) {
 				index = pending_index(vif->pending_cons++);
-
 				pending_idx = vif->pending_ring[index];
-
-				memcpy(&pending_tx_info[pending_idx].req, txp,
-				       sizeof(*txp));
-
-				/* Poison these fields, corresponding
-				 * fields for head tx req will be set
-				 * to correct values after the loop.
-				 */
-				vif->mmap_pages[pending_idx] = (void *)(~0UL);
-				pending_tx_info[pending_idx].head =
-					INVALID_PENDING_RING_IDX;
-
-				if (!first) {
-					first = &pending_tx_info[pending_idx];
-					start_idx = index;
-					head_idx = pending_idx;
-				}
-
-				txp++;
-				slot++;
-			}
-
-			gop++;
-		}
-
-		first->req.offset = 0;
-		first->req.size = dst_offset;
-		first->head = start_idx;
-		vif->mmap_pages[head_idx] = page;
-		frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
+		xenvif_tx_create_gop(vif, pending_idx, txp, gop);
+		frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
 	}
 
 	BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
@@ -906,9 +838,9 @@  err:
 
 static int xenvif_tx_check_gop(struct xenvif *vif,
 			       struct sk_buff *skb,
-			       struct gnttab_copy **gopp)
+			       struct gnttab_map_grant_ref **gopp)
 {
-	struct gnttab_copy *gop = *gopp;
+	struct gnttab_map_grant_ref *gop = *gopp;
 	u16 pending_idx = *((u16 *)skb->data);
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	struct pending_tx_info *tx_info;
@@ -920,6 +852,18 @@  static int xenvif_tx_check_gop(struct xenvif *vif,
 	err = gop->status;
 	if (unlikely(err))
 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+	else {
+		if (vif->grant_tx_handle[pending_idx] !=
+			NETBACK_INVALID_HANDLE) {
+			netdev_err(vif->dev,
+				"Stale mapped handle! pending_idx %x handle %x\n",
+				pending_idx, vif->grant_tx_handle[pending_idx]);
+			BUG();
+		}
+		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
+			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
+		vif->grant_tx_handle[pending_idx] = gop->handle;
+	}
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
@@ -933,18 +877,26 @@  static int xenvif_tx_check_gop(struct xenvif *vif,
 		head = tx_info->head;
 
 		/* Check error status: if okay then remember grant handle. */
-		do {
 			newerr = (++gop)->status;
-			if (newerr)
-				break;
-			peek = vif->pending_ring[pending_index(++head)];
-		} while (!pending_tx_is_head(vif, peek));
 
 		if (likely(!newerr)) {
+			if (vif->grant_tx_handle[pending_idx] !=
+				NETBACK_INVALID_HANDLE) {
+				netdev_err(vif->dev,
+					"Stale mapped handle! pending_idx %x handle %x\n",
+					pending_idx,
+					vif->grant_tx_handle[pending_idx]);
+				xenvif_fatal_tx_err(vif);
+			}
+			set_phys_to_machine(idx_to_pfn(vif, pending_idx),
+				FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
+			vif->grant_tx_handle[pending_idx] = gop->handle;
 			/* Had a previous error? Invalidate this fragment. */
-			if (unlikely(err))
+			if (unlikely(err)) {
+				xenvif_idx_unmap(vif, pending_idx);
 				xenvif_idx_release(vif, pending_idx,
 						   XEN_NETIF_RSP_OKAY);
+			}
 			continue;
 		}
 
@@ -957,9 +909,11 @@  static int xenvif_tx_check_gop(struct xenvif *vif,
 
 		/* First error: invalidate header and preceding fragments. */
 		pending_idx = *((u16 *)skb->data);
+		xenvif_idx_unmap(vif, pending_idx);
 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
 		for (j = start; j < i; j++) {
 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
+			xenvif_idx_unmap(vif, pending_idx);
 			xenvif_idx_release(vif, pending_idx,
 					   XEN_NETIF_RSP_OKAY);
 		}
@@ -972,7 +926,8 @@  static int xenvif_tx_check_gop(struct xenvif *vif,
 	return err;
 }
 
-static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
+static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb,
+		u16 prev_pending_idx)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
@@ -986,6 +941,17 @@  static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
 
 		pending_idx = frag_get_pending_idx(frag);
 
+		/* If this is not the first frag, chain it to the previous*/
+		if (unlikely(prev_pending_idx == INVALID_PENDING_IDX))
+			skb_shinfo(skb)->destructor_arg =
+				&vif->pending_tx_info[pending_idx].callback_struct;
+		else if (likely(pending_idx != prev_pending_idx))
+			vif->pending_tx_info[prev_pending_idx].callback_struct.ctx =
+				&(vif->pending_tx_info[pending_idx].callback_struct);
+
+		vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
+		prev_pending_idx = pending_idx;
+
 		txp = &vif->pending_tx_info[pending_idx].req;
 		page = virt_to_page(idx_to_kaddr(vif, pending_idx));
 		__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
@@ -993,10 +959,15 @@  static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
 		skb->data_len += txp->size;
 		skb->truesize += txp->size;
 
-		/* Take an extra reference to offset xenvif_idx_release */
+		/* Take an extra reference to offset network stack's put_page */
 		get_page(vif->mmap_pages[pending_idx]);
-		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
 	}
+	/* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
+	 * overlaps with "index", and "mapping" is not set. I think mapping
+	 * should be set. If delivered to local stack, it would drop this
+	 * skb in sk_filter unless the socket has the right to use it.
+	 */
+	skb->pfmemalloc	= false;
 }
 
 static int xenvif_get_extras(struct xenvif *vif,
@@ -1358,7 +1329,7 @@  static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 
 static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 {
-	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
+	struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
 	struct sk_buff *skb;
 	int ret;
 
@@ -1466,30 +1437,10 @@  static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 			}
 		}
 
-		/* XXX could copy straight to head */
-		page = xenvif_alloc_page(vif, pending_idx);
-		if (!page) {
-			kfree_skb(skb);
-			xenvif_tx_err(vif, &txreq, idx);
-			break;
-		}
-
-		gop->source.u.ref = txreq.gref;
-		gop->source.domid = vif->domid;
-		gop->source.offset = txreq.offset;
-
-		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-		gop->dest.domid = DOMID_SELF;
-		gop->dest.offset = txreq.offset;
-
-		gop->len = txreq.size;
-		gop->flags = GNTCOPY_source_gref;
+		xenvif_tx_create_gop(vif, pending_idx, &txreq, gop);
 
 		gop++;
 
-		memcpy(&vif->pending_tx_info[pending_idx].req,
-		       &txreq, sizeof(txreq));
-		vif->pending_tx_info[pending_idx].head = index;
 		*((u16 *)skb->data) = pending_idx;
 
 		__skb_put(skb, data_len);
@@ -1518,17 +1469,17 @@  static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 
 		vif->tx.req_cons = idx;
 
-		if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
+		if ((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops))
 			break;
 	}
 
-	return gop - vif->tx_copy_ops;
+	return gop - vif->tx_map_ops;
 }
 
 
 static int xenvif_tx_submit(struct xenvif *vif)
 {
-	struct gnttab_copy *gop = vif->tx_copy_ops;
+	struct gnttab_map_grant_ref *gop = vif->tx_map_ops;
 	struct sk_buff *skb;
 	int work_done = 0;
 
@@ -1552,12 +1503,17 @@  static int xenvif_tx_submit(struct xenvif *vif)
 		memcpy(skb->data,
 		       (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
 		       data_len);
+		vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
 		if (data_len < txp->size) {
 			/* Append the packet payload as a fragment. */
 			txp->offset += data_len;
 			txp->size -= data_len;
+			skb_shinfo(skb)->destructor_arg =
+				&vif->pending_tx_info[pending_idx].callback_struct;
 		} else {
 			/* Schedule a response immediately. */
+			skb_shinfo(skb)->destructor_arg = NULL;
+			xenvif_idx_unmap(vif, pending_idx);
 			xenvif_idx_release(vif, pending_idx,
 					   XEN_NETIF_RSP_OKAY);
 		}
@@ -1567,7 +1523,11 @@  static int xenvif_tx_submit(struct xenvif *vif)
 		else if (txp->flags & XEN_NETTXF_data_validated)
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-		xenvif_fill_frags(vif, skb);
+		xenvif_fill_frags(vif,
+			skb,
+			skb_shinfo(skb)->destructor_arg ?
+					pending_idx :
+					INVALID_PENDING_IDX);
 
 		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
 			int target = min_t(int, skb->len, PKT_PROT_LEN);
@@ -1581,6 +1541,8 @@  static int xenvif_tx_submit(struct xenvif *vif)
 		if (checksum_setup(vif, skb)) {
 			netdev_dbg(vif->dev,
 				   "Can't setup checksum in net_tx_action\n");
+			if (skb_shinfo(skb)->destructor_arg)
+				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
 			kfree_skb(skb);
 			continue;
 		}
@@ -1606,6 +1568,14 @@  static int xenvif_tx_submit(struct xenvif *vif)
 
 		work_done++;
 
+		/* Set this flag right before netif_receive_skb, otherwise
+		 * someone might think this packet already left netback, and
+		 * do a skb_copy_ubufs while we are still in control of the
+		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
+		 */
+		if (skb_shinfo(skb)->destructor_arg)
+			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+
 		netif_receive_skb(skb);
 	}
 
@@ -1715,7 +1685,7 @@  static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
 int xenvif_tx_action(struct xenvif *vif, int budget)
 {
 	unsigned nr_gops;
-	int work_done;
+	int work_done, ret;
 
 	if (unlikely(!tx_work_todo(vif)))
 		return 0;
@@ -1725,7 +1695,10 @@  int xenvif_tx_action(struct xenvif *vif, int budget)
 	if (nr_gops == 0)
 		return 0;
 
-	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+			vif->tx_map_ops,
+			nr_gops);
+	BUG_ON(ret);
 
 	work_done = xenvif_tx_submit(vif);
 
@@ -1736,45 +1709,19 @@  static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
 			       u8 status)
 {
 	struct pending_tx_info *pending_tx_info;
-	pending_ring_idx_t head;
+	pending_ring_idx_t index;
 	u16 peek; /* peek into next tx request */
+	unsigned long flags;
 
-	BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
-
-	/* Already complete? */
-	if (vif->mmap_pages[pending_idx] == NULL)
-		return;
-
-	pending_tx_info = &vif->pending_tx_info[pending_idx];
-
-	head = pending_tx_info->head;
-
-	BUG_ON(!pending_tx_is_head(vif, head));
-	BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
-
-	do {
-		pending_ring_idx_t index;
-		pending_ring_idx_t idx = pending_index(head);
-		u16 info_idx = vif->pending_ring[idx];
-
-		pending_tx_info = &vif->pending_tx_info[info_idx];
+		pending_tx_info = &vif->pending_tx_info[pending_idx];
+		spin_lock_irqsave(&vif->response_lock, flags);
 		make_tx_response(vif, &pending_tx_info->req, status);
-
-		/* Setting any number other than
-		 * INVALID_PENDING_RING_IDX indicates this slot is
-		 * starting a new packet / ending a previous packet.
-		 */
-		pending_tx_info->head = 0;
-
-		index = pending_index(vif->pending_prod++);
-		vif->pending_ring[index] = vif->pending_ring[info_idx];
-
-		peek = vif->pending_ring[pending_index(++head)];
-
-	} while (!pending_tx_is_head(vif, peek));
-
-	put_page(vif->mmap_pages[pending_idx]);
-	vif->mmap_pages[pending_idx] = NULL;
+		index = pending_index(vif->pending_prod);
+		vif->pending_ring[index] = pending_idx;
+		/* TX shouldn't use the index before we give it back here */
+		mb();
+		vif->pending_prod++;
+		spin_unlock_irqrestore(&vif->response_lock, flags);
 }
 
 void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)