diff mbox

[net-next,v5,5/6] virtio_net: add XDP_TX support

Message ID 20161207201245.28121.95418.stgit@john-Precision-Tower-5810
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

John Fastabend Dec. 7, 2016, 8:12 p.m. UTC
This adds support for the XDP_TX action to virtio_net. When an XDP
program is run and returns the XDP_TX action the virtio_net XDP
implementation will transmit the packet on a TX queue that aligns
with the current CPU that the XDP packet was processed on.

Before sending the packet the header is zeroed.  Also XDP is expected
to handle checksum correctly so no checksum offload  support is
provided.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/virtio_net.c |   99 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 92 insertions(+), 7 deletions(-)

Comments

Michael S. Tsirkin Dec. 8, 2016, 6:11 a.m. UTC | #1
On Wed, Dec 07, 2016 at 12:12:45PM -0800, John Fastabend wrote:
> This adds support for the XDP_TX action to virtio_net. When an XDP
> program is run and returns the XDP_TX action the virtio_net XDP
> implementation will transmit the packet on a TX queue that aligns
> with the current CPU that the XDP packet was processed on.
> 
> Before sending the packet the header is zeroed.  Also XDP is expected
> to handle checksum correctly so no checksum offload  support is
> provided.
> 
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
>  drivers/net/virtio_net.c |   99 +++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 92 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 28b1196..8e5b13c 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -330,12 +330,57 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>  	return skb;
>  }
>  
> +static void virtnet_xdp_xmit(struct virtnet_info *vi,
> +			     struct receive_queue *rq,
> +			     struct send_queue *sq,
> +			     struct xdp_buff *xdp)
> +{
> +	struct page *page = virt_to_head_page(xdp->data);
> +	struct virtio_net_hdr_mrg_rxbuf *hdr;
> +	unsigned int num_sg, len;
> +	void *xdp_sent;
> +	int err;
> +
> +	/* Free up any pending old buffers before queueing new ones. */
> +	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> +		struct page *sent_page = virt_to_head_page(xdp_sent);
> +
> +		if (vi->mergeable_rx_bufs)
> +			put_page(sent_page);
> +		else
> +			give_pages(rq, sent_page);
> +	}

Looks like this is the only place where you do virtqueue_get_buf.
No interrupt handler?
This means that if you fill up the queue, nothing will clean it
and things will get stuck.
Can this be the issue you saw?


> +
> +	/* Zero header and leave csum up to XDP layers */
> +	hdr = xdp->data;
> +	memset(hdr, 0, vi->hdr_len);
> +
> +	num_sg = 1;
> +	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
> +	err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
> +				   xdp->data, GFP_ATOMIC);
> +	if (unlikely(err)) {
> +		if (vi->mergeable_rx_bufs)
> +			put_page(page);
> +		else
> +			give_pages(rq, page);
> +	} else if (!vi->mergeable_rx_bufs) {
> +		/* If not mergeable bufs must be big packets so cleanup pages */
> +		give_pages(rq, (struct page *)page->private);
> +		page->private = 0;
> +	}
> +
> +	virtqueue_kick(sq->vq);

Is this unconditional kick a work-around for hang
we could not figure out yet?
I guess this helps because it just slows down the guest.
I don't much like it ...

> +}
> +
>  static u32 do_xdp_prog(struct virtnet_info *vi,
> +		       struct receive_queue *rq,
>  		       struct bpf_prog *xdp_prog,
>  		       struct page *page, int offset, int len)
>  {
>  	int hdr_padded_len;
>  	struct xdp_buff xdp;
> +	unsigned int qp;
>  	u32 act;
>  	u8 *buf;
>  
> @@ -353,9 +398,15 @@ static u32 do_xdp_prog(struct virtnet_info *vi,
>  	switch (act) {
>  	case XDP_PASS:
>  		return XDP_PASS;
> +	case XDP_TX:
> +		qp = vi->curr_queue_pairs -
> +			vi->xdp_queue_pairs +
> +			smp_processor_id();
> +		xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4);
> +		virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp);
> +		return XDP_TX;
>  	default:
>  		bpf_warn_invalid_xdp_action(act);
> -	case XDP_TX:
>  	case XDP_ABORTED:
>  	case XDP_DROP:
>  		return XDP_DROP;
> @@ -390,9 +441,17 @@ static struct sk_buff *receive_big(struct net_device *dev,
>  
>  		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
>  			goto err_xdp;
> -		act = do_xdp_prog(vi, xdp_prog, page, 0, len);
> -		if (act == XDP_DROP)
> +		act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len);
> +		switch (act) {
> +		case XDP_PASS:
> +			break;
> +		case XDP_TX:
> +			rcu_read_unlock();
> +			goto xdp_xmit;
> +		case XDP_DROP:
> +		default:
>  			goto err_xdp;
> +		}
>  	}
>  	rcu_read_unlock();
>  
> @@ -407,6 +466,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
>  err:
>  	dev->stats.rx_dropped++;
>  	give_pages(rq, page);
> +xdp_xmit:
>  	return NULL;
>  }
>  
> @@ -425,6 +485,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  	struct bpf_prog *xdp_prog;
>  	unsigned int truesize;
>  
> +	head_skb = NULL;
> +
>  	rcu_read_lock();
>  	xdp_prog = rcu_dereference(rq->xdp_prog);
>  	if (xdp_prog) {
> @@ -448,9 +510,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
>  			goto err_xdp;
>  
> -		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
> -		if (act == XDP_DROP)
> +		act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len);
> +		switch (act) {
> +		case XDP_PASS:
> +			break;
> +		case XDP_TX:
> +			rcu_read_unlock();
> +			goto xdp_xmit;
> +		case XDP_DROP:
> +		default:
>  			goto err_xdp;
> +		}
>  	}
>  	rcu_read_unlock();
>  
> @@ -528,6 +598,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>  err_buf:
>  	dev->stats.rx_dropped++;
>  	dev_kfree_skb(head_skb);
> +xdp_xmit:
>  	return NULL;
>  }
>  
> @@ -1734,6 +1805,16 @@ static void free_receive_page_frags(struct virtnet_info *vi)
>  			put_page(vi->rq[i].alloc_frag.page);
>  }
>  
> +static bool is_xdp_queue(struct virtnet_info *vi, int q)
> +{
> +	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
> +		return false;
> +	else if (q < vi->curr_queue_pairs)
> +		return true;
> +	else
> +		return false;
> +}
> +
>  static void free_unused_bufs(struct virtnet_info *vi)
>  {
>  	void *buf;
> @@ -1741,8 +1822,12 @@ static void free_unused_bufs(struct virtnet_info *vi)
>  
>  	for (i = 0; i < vi->max_queue_pairs; i++) {
>  		struct virtqueue *vq = vi->sq[i].vq;
> -		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
> -			dev_kfree_skb(buf);
> +		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
> +			if (!is_xdp_queue(vi, i))
> +				dev_kfree_skb(buf);
> +			else
> +				put_page(virt_to_head_page(buf));
> +		}
>  	}
>  
>  	for (i = 0; i < vi->max_queue_pairs; i++) {
John Fastabend Dec. 8, 2016, 6:18 p.m. UTC | #2
On 16-12-07 10:11 PM, Michael S. Tsirkin wrote:
> On Wed, Dec 07, 2016 at 12:12:45PM -0800, John Fastabend wrote:
>> This adds support for the XDP_TX action to virtio_net. When an XDP
>> program is run and returns the XDP_TX action the virtio_net XDP
>> implementation will transmit the packet on a TX queue that aligns
>> with the current CPU that the XDP packet was processed on.
>>
>> Before sending the packet the header is zeroed.  Also XDP is expected
>> to handle checksum correctly so no checksum offload  support is
>> provided.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> ---
>>  drivers/net/virtio_net.c |   99 +++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 92 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 28b1196..8e5b13c 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -330,12 +330,57 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>  	return skb;
>>  }
>>  
>> +static void virtnet_xdp_xmit(struct virtnet_info *vi,
>> +			     struct receive_queue *rq,
>> +			     struct send_queue *sq,
>> +			     struct xdp_buff *xdp)
>> +{
>> +	struct page *page = virt_to_head_page(xdp->data);
>> +	struct virtio_net_hdr_mrg_rxbuf *hdr;
>> +	unsigned int num_sg, len;
>> +	void *xdp_sent;
>> +	int err;
>> +
>> +	/* Free up any pending old buffers before queueing new ones. */
>> +	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
>> +		struct page *sent_page = virt_to_head_page(xdp_sent);
>> +
>> +		if (vi->mergeable_rx_bufs)
>> +			put_page(sent_page);
>> +		else
>> +			give_pages(rq, sent_page);
>> +	}
> 
> Looks like this is the only place where you do virtqueue_get_buf.
> No interrupt handler?
> This means that if you fill up the queue, nothing will clean it
> and things will get stuck.

hmm OK so the callbacks should be implemented to do this and a pair
of virtqueue_enable_cb_prepare()/virtqueue_disable_cb() used to enable
and disable callbacks if packets are enqueued.

Also in the normal xmit path via start_xmit() will the same condition
happen? It looks like free_old_xmit_skbs for example is only called if
a packet is sent could we end up holding on to skbs in this case? I
don't see free_old_xmit_skbs being called from any callbacks?

> Can this be the issue you saw?

nope see below I was mishandling the big_packets page cleanup path in
the error case.

> 
> 
>> +
>> +	/* Zero header and leave csum up to XDP layers */
>> +	hdr = xdp->data;
>> +	memset(hdr, 0, vi->hdr_len);
>> +
>> +	nu_sg = 1;
>> +	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
>> +	err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
>> +				   xdp->data, GFP_ATOMIC);
>> +	if (unlikely(err)) {
>> +		if (vi->mergeable_rx_bufs)
>> +			put_page(page);
>> +		else
>> +			give_pages(rq, page);
>> +	} else if (!vi->mergeable_rx_bufs) {
>> +		/* If not mergeable bufs must be big packets so cleanup pages */
>> +		give_pages(rq, (struct page *)page->private);
>> +		page->private = 0;
>> +	}
>> +
>> +	virtqueue_kick(sq->vq);
> 
> Is this unconditional kick a work-around for hang
> we could not figure out yet?

I tracked the original issue down to how I handled the big_packet page
cleanups.

> I guess this helps because it just slows down the guest.
> I don't much like it ...

I left it like this copying the pattern in balloon and input drivers. I
can change it back to the previous pattern where it is only called if
there is no errors. It has been running fine with the old pattern now
for an hour or so.

.John
Michael S. Tsirkin Dec. 8, 2016, 9:08 p.m. UTC | #3
On Thu, Dec 08, 2016 at 10:18:22AM -0800, John Fastabend wrote:
> On 16-12-07 10:11 PM, Michael S. Tsirkin wrote:
> > On Wed, Dec 07, 2016 at 12:12:45PM -0800, John Fastabend wrote:
> >> This adds support for the XDP_TX action to virtio_net. When an XDP
> >> program is run and returns the XDP_TX action the virtio_net XDP
> >> implementation will transmit the packet on a TX queue that aligns
> >> with the current CPU that the XDP packet was processed on.
> >>
> >> Before sending the packet the header is zeroed.  Also XDP is expected
> >> to handle checksum correctly so no checksum offload  support is
> >> provided.
> >>
> >> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> >> ---
> >>  drivers/net/virtio_net.c |   99 +++++++++++++++++++++++++++++++++++++++++++---
> >>  1 file changed, 92 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> index 28b1196..8e5b13c 100644
> >> --- a/drivers/net/virtio_net.c
> >> +++ b/drivers/net/virtio_net.c
> >> @@ -330,12 +330,57 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >>  	return skb;
> >>  }
> >>  
> >> +static void virtnet_xdp_xmit(struct virtnet_info *vi,
> >> +			     struct receive_queue *rq,
> >> +			     struct send_queue *sq,
> >> +			     struct xdp_buff *xdp)
> >> +{
> >> +	struct page *page = virt_to_head_page(xdp->data);
> >> +	struct virtio_net_hdr_mrg_rxbuf *hdr;
> >> +	unsigned int num_sg, len;
> >> +	void *xdp_sent;
> >> +	int err;
> >> +
> >> +	/* Free up any pending old buffers before queueing new ones. */
> >> +	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> >> +		struct page *sent_page = virt_to_head_page(xdp_sent);
> >> +
> >> +		if (vi->mergeable_rx_bufs)
> >> +			put_page(sent_page);
> >> +		else
> >> +			give_pages(rq, sent_page);
> >> +	}
> > 
> > Looks like this is the only place where you do virtqueue_get_buf.
> > No interrupt handler?
> > This means that if you fill up the queue, nothing will clean it
> > and things will get stuck.
> 
> hmm OK so the callbacks should be implemented to do this and a pair
> of virtqueue_enable_cb_prepare()/virtqueue_disable_cb() used to enable
> and disable callbacks if packets are enqueued.

Oh I didn't realize XDP never stops processing packets,
even if they are never freed.
In that case you do not need callbacks.

> Also in the normal xmit path via start_xmit() will the same condition
> happen? It looks like free_old_xmit_skbs for example is only called if
> a packet is sent could we end up holding on to skbs in this case? I
> don't see free_old_xmit_skbs being called from any callbacks?

Right - all it does is restart the queue. That's why we don't support
BQL right now.

> > Can this be the issue you saw?
> 
> nope see below I was mishandling the big_packets page cleanup path in
> the error case.
> 
> > 
> > 
> >> +
> >> +	/* Zero header and leave csum up to XDP layers */
> >> +	hdr = xdp->data;
> >> +	memset(hdr, 0, vi->hdr_len);
> >> +
> >> +	nu_sg = 1;
> >> +	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
> >> +	err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
> >> +				   xdp->data, GFP_ATOMIC);
> >> +	if (unlikely(err)) {
> >> +		if (vi->mergeable_rx_bufs)
> >> +			put_page(page);
> >> +		else
> >> +			give_pages(rq, page);
> >> +	} else if (!vi->mergeable_rx_bufs) {
> >> +		/* If not mergeable bufs must be big packets so cleanup pages */
> >> +		give_pages(rq, (struct page *)page->private);
> >> +		page->private = 0;
> >> +	}
> >> +
> >> +	virtqueue_kick(sq->vq);
> > 
> > Is this unconditional kick a work-around for hang
> > we could not figure out yet?
> 
> I tracked the original issue down to how I handled the big_packet page
> cleanups.
> 
> > I guess this helps because it just slows down the guest.
> > I don't much like it ...
> 
> I left it like this copying the pattern in balloon and input drivers. I
> can change it back to the previous pattern where it is only called if
> there is no errors. It has been running fine with the old pattern now
> for an hour or so.
> 
> .John

OK makes sense.
Michael S. Tsirkin Dec. 8, 2016, 9:18 p.m. UTC | #4
On Thu, Dec 08, 2016 at 10:18:22AM -0800, John Fastabend wrote:
> > I guess this helps because it just slows down the guest.
> > I don't much like it ...
> 
> I left it like this copying the pattern in balloon and input drivers. I
> can change it back to the previous pattern where it is only called if
> there is no errors. It has been running fine with the old pattern now
> for an hour or so.
> 
> .John

I'd prefer internal consistency. Could be a patch on top
if this helps. I'm happy it isn't actually buggy.
Let me know whether you want to post v6 or
want me to ack this one.
John Fastabend Dec. 8, 2016, 9:25 p.m. UTC | #5
On 16-12-08 01:18 PM, Michael S. Tsirkin wrote:
> On Thu, Dec 08, 2016 at 10:18:22AM -0800, John Fastabend wrote:
>>> I guess this helps because it just slows down the guest.
>>> I don't much like it ...
>>
>> I left it like this copying the pattern in balloon and input drivers. I
>> can change it back to the previous pattern where it is only called if
>> there is no errors. It has been running fine with the old pattern now
>> for an hour or so.
>>
>> .John
> 
> I'd prefer internal consistency. Could be a patch on top
> if this helps. I'm happy it isn't actually buggy.
> Let me know whether you want to post v6 or
> want me to ack this one.
> 

I think because DaveM has closed net-next ACK'ing this set would be
great to get it in Dave's tree. Then I can post a small "fix" so that it
is consistent between normal stack and xdp tx path after that.

Thanks,
John
Michael S. Tsirkin Dec. 8, 2016, 9:45 p.m. UTC | #6
On Thu, Dec 08, 2016 at 01:25:40PM -0800, John Fastabend wrote:
> On 16-12-08 01:18 PM, Michael S. Tsirkin wrote:
> > On Thu, Dec 08, 2016 at 10:18:22AM -0800, John Fastabend wrote:
> >>> I guess this helps because it just slows down the guest.
> >>> I don't much like it ...
> >>
> >> I left it like this copying the pattern in balloon and input drivers. I
> >> can change it back to the previous pattern where it is only called if
> >> there is no errors. It has been running fine with the old pattern now
> >> for an hour or so.
> >>
> >> .John
> > 
> > I'd prefer internal consistency. Could be a patch on top
> > if this helps. I'm happy it isn't actually buggy.
> > Let me know whether you want to post v6 or
> > want me to ack this one.
> > 
> 
> I think because DaveM has closed net-next ACK'ing this set would be
> great to get it in Dave's tree. Then I can post a small "fix" so that it
> is consistent between normal stack and xdp tx path after that.
> 
> Thanks,
> John

I can merge it through my tree too - I don't think there's
any dependency on net-next, is there?
John Fastabend Dec. 8, 2016, 9:51 p.m. UTC | #7
On 16-12-08 01:45 PM, Michael S. Tsirkin wrote:
> On Thu, Dec 08, 2016 at 01:25:40PM -0800, John Fastabend wrote:
>> On 16-12-08 01:18 PM, Michael S. Tsirkin wrote:
>>> On Thu, Dec 08, 2016 at 10:18:22AM -0800, John Fastabend wrote:
>>>>> I guess this helps because it just slows down the guest.
>>>>> I don't much like it ...
>>>>
>>>> I left it like this copying the pattern in balloon and input drivers. I
>>>> can change it back to the previous pattern where it is only called if
>>>> there is no errors. It has been running fine with the old pattern now
>>>> for an hour or so.
>>>>
>>>> .John
>>>
>>> I'd prefer internal consistency. Could be a patch on top
>>> if this helps. I'm happy it isn't actually buggy.
>>> Let me know whether you want to post v6 or
>>> want me to ack this one.
>>>
>>
>> I think because DaveM has closed net-next ACK'ing this set would be
>> great to get it in Dave's tree. Then I can post a small "fix" so that it
>> is consistent between normal stack and xdp tx path after that.
>>
>> Thanks,
>> John
> 
> I can merge it through my tree too - I don't think there's
> any dependency on net-next, is there?
> 

Its all changes to virtio_net except for patch 2 which is against
filter.c/filter.h which I guess you could possibly get a merge conflict
on. But it would be fairly trivial.

I don't have a preference on which tree it goes through whichever makes
the most sense.

.John
diff mbox

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 28b1196..8e5b13c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -330,12 +330,57 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	return skb;
 }
 
+static void virtnet_xdp_xmit(struct virtnet_info *vi,
+			     struct receive_queue *rq,
+			     struct send_queue *sq,
+			     struct xdp_buff *xdp)
+{
+	struct page *page = virt_to_head_page(xdp->data);
+	struct virtio_net_hdr_mrg_rxbuf *hdr;
+	unsigned int num_sg, len;
+	void *xdp_sent;
+	int err;
+
+	/* Free up any pending old buffers before queueing new ones. */
+	while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		struct page *sent_page = virt_to_head_page(xdp_sent);
+
+		if (vi->mergeable_rx_bufs)
+			put_page(sent_page);
+		else
+			give_pages(rq, sent_page);
+	}
+
+	/* Zero header and leave csum up to XDP layers */
+	hdr = xdp->data;
+	memset(hdr, 0, vi->hdr_len);
+
+	num_sg = 1;
+	sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
+	err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
+				   xdp->data, GFP_ATOMIC);
+	if (unlikely(err)) {
+		if (vi->mergeable_rx_bufs)
+			put_page(page);
+		else
+			give_pages(rq, page);
+	} else if (!vi->mergeable_rx_bufs) {
+		/* If not mergeable bufs must be big packets so cleanup pages */
+		give_pages(rq, (struct page *)page->private);
+		page->private = 0;
+	}
+
+	virtqueue_kick(sq->vq);
+}
+
 static u32 do_xdp_prog(struct virtnet_info *vi,
+		       struct receive_queue *rq,
 		       struct bpf_prog *xdp_prog,
 		       struct page *page, int offset, int len)
 {
 	int hdr_padded_len;
 	struct xdp_buff xdp;
+	unsigned int qp;
 	u32 act;
 	u8 *buf;
 
@@ -353,9 +398,15 @@  static u32 do_xdp_prog(struct virtnet_info *vi,
 	switch (act) {
 	case XDP_PASS:
 		return XDP_PASS;
+	case XDP_TX:
+		qp = vi->curr_queue_pairs -
+			vi->xdp_queue_pairs +
+			smp_processor_id();
+		xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4);
+		virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp);
+		return XDP_TX;
 	default:
 		bpf_warn_invalid_xdp_action(act);
-	case XDP_TX:
 	case XDP_ABORTED:
 	case XDP_DROP:
 		return XDP_DROP;
@@ -390,9 +441,17 @@  static struct sk_buff *receive_big(struct net_device *dev,
 
 		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
 			goto err_xdp;
-		act = do_xdp_prog(vi, xdp_prog, page, 0, len);
-		if (act == XDP_DROP)
+		act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len);
+		switch (act) {
+		case XDP_PASS:
+			break;
+		case XDP_TX:
+			rcu_read_unlock();
+			goto xdp_xmit;
+		case XDP_DROP:
+		default:
 			goto err_xdp;
+		}
 	}
 	rcu_read_unlock();
 
@@ -407,6 +466,7 @@  static struct sk_buff *receive_big(struct net_device *dev,
 err:
 	dev->stats.rx_dropped++;
 	give_pages(rq, page);
+xdp_xmit:
 	return NULL;
 }
 
@@ -425,6 +485,8 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 	struct bpf_prog *xdp_prog;
 	unsigned int truesize;
 
+	head_skb = NULL;
+
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (xdp_prog) {
@@ -448,9 +510,17 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
 			goto err_xdp;
 
-		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
-		if (act == XDP_DROP)
+		act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len);
+		switch (act) {
+		case XDP_PASS:
+			break;
+		case XDP_TX:
+			rcu_read_unlock();
+			goto xdp_xmit;
+		case XDP_DROP:
+		default:
 			goto err_xdp;
+		}
 	}
 	rcu_read_unlock();
 
@@ -528,6 +598,7 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 err_buf:
 	dev->stats.rx_dropped++;
 	dev_kfree_skb(head_skb);
+xdp_xmit:
 	return NULL;
 }
 
@@ -1734,6 +1805,16 @@  static void free_receive_page_frags(struct virtnet_info *vi)
 			put_page(vi->rq[i].alloc_frag.page);
 }
 
+static bool is_xdp_queue(struct virtnet_info *vi, int q)
+{
+	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+		return false;
+	else if (q < vi->curr_queue_pairs)
+		return true;
+	else
+		return false;
+}
+
 static void free_unused_bufs(struct virtnet_info *vi)
 {
 	void *buf;
@@ -1741,8 +1822,12 @@  static void free_unused_bufs(struct virtnet_info *vi)
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		struct virtqueue *vq = vi->sq[i].vq;
-		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
-			dev_kfree_skb(buf);
+		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
+			if (!is_xdp_queue(vi, i))
+				dev_kfree_skb(buf);
+			else
+				put_page(virt_to_head_page(buf));
+		}
 	}
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {