diff mbox series

[bpf-next,v3] virtio_net: add XDP meta data support

Message ID 20190702081646.23230-1-yuya.kusakabe@gmail.com
State Awaiting Upstream
Delegated to: BPF Maintainers
Headers show
Series [bpf-next,v3] virtio_net: add XDP meta data support | expand

Commit Message

Yuya Kusakabe July 2, 2019, 8:16 a.m. UTC
This adds XDP meta data support to both receive_small() and
receive_mergeable().

Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
---
v3:
 - fix preserve the vnet header in receive_small().
v2:
 - keep copy untouched in page_to_skb().
 - preserve the vnet header in receive_small().
 - fix indentation.
---
 drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 14 deletions(-)

Comments

Jason Wang July 2, 2019, 8:33 a.m. UTC | #1
On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
> This adds XDP meta data support to both receive_small() and
> receive_mergeable().
>
> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
> ---
> v3:
>   - fix preserve the vnet header in receive_small().
> v2:
>   - keep copy untouched in page_to_skb().
>   - preserve the vnet header in receive_small().
>   - fix indentation.
> ---
>   drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
>   1 file changed, 31 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4f3de0ac8b0b..03a1ae6fe267 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>   				   struct receive_queue *rq,
>   				   struct page *page, unsigned int offset,
>   				   unsigned int len, unsigned int truesize,
> -				   bool hdr_valid)
> +				   bool hdr_valid, unsigned int metasize)
>   {
>   	struct sk_buff *skb;
>   	struct virtio_net_hdr_mrg_rxbuf *hdr;
> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>   	else
>   		hdr_padded_len = sizeof(struct padded_vnet_hdr);
>   
> -	if (hdr_valid)
> +	if (hdr_valid && !metasize)
>   		memcpy(hdr, p, hdr_len);
>   
>   	len -= hdr_len;
> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>   		copy = skb_tailroom(skb);
>   	skb_put_data(skb, p, copy);
>   
> +	if (metasize) {
> +		__skb_pull(skb, metasize);
> +		skb_metadata_set(skb, metasize);
> +	}
> +
>   	len -= copy;
>   	offset += copy;
>   
> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
>   	unsigned int delta = 0;
>   	struct page *xdp_page;
>   	int err;
> +	unsigned int metasize = 0;
>   
>   	len -= vi->hdr_len;
>   	stats->bytes += len;
> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
>   
>   		xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>   		xdp.data = xdp.data_hard_start + xdp_headroom;
> -		xdp_set_data_meta_invalid(&xdp);
>   		xdp.data_end = xdp.data + len;
> +		xdp.data_meta = xdp.data;
>   		xdp.rxq = &rq->xdp_rxq;
>   		orig_data = xdp.data;
> +		/* Copy the vnet header to the front of data_hard_start to avoid
> +		 * overwriting by XDP meta data */
> +		memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);


What happens if we have a large metadata that occupies all headroom here?

Thanks


>   		act = bpf_prog_run_xdp(xdp_prog, &xdp);
>   		stats->xdp_packets++;
>   
> @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
>   			/* Recalculate length in case bpf program changed it */
>   			delta = orig_data - xdp.data;
>   			len = xdp.data_end - xdp.data;
> +			metasize = xdp.data - xdp.data_meta;
>   			break;
>   		case XDP_TX:
>   			stats->xdp_tx++;
> +			xdp.data_meta = xdp.data;
>   			xdpf = convert_to_xdp_frame(&xdp);
>   			if (unlikely(!xdpf))
>   				goto err_xdp;
> @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev,
>   	skb_reserve(skb, headroom - delta);
>   	skb_put(skb, len);
>   	if (!delta) {
> -		buf += header_offset;
> -		memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
> +		memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len);
>   	} /* keep zeroed vnet hdr since packet was changed by bpf */
>   
> +	if (metasize)
> +		skb_metadata_set(skb, metasize);
> +
>   err:
>   	return skb;
>   
> @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
>   				   struct virtnet_rq_stats *stats)
>   {
>   	struct page *page = buf;
> -	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
> -					  PAGE_SIZE, true);
> +	struct sk_buff *skb =
> +		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
>   
>   	stats->bytes += len - vi->hdr_len;
>   	if (unlikely(!skb))
> @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   	unsigned int truesize;
>   	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
>   	int err;
> +	unsigned int metasize = 0;
>   
>   	head_skb = NULL;
>   	stats->bytes += len - vi->hdr_len;
> @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   		data = page_address(xdp_page) + offset;
>   		xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
>   		xdp.data = data + vi->hdr_len;
> -		xdp_set_data_meta_invalid(&xdp);
>   		xdp.data_end = xdp.data + (len - vi->hdr_len);
> +		xdp.data_meta = xdp.data;
>   		xdp.rxq = &rq->xdp_rxq;
>   
>   		act = bpf_prog_run_xdp(xdp_prog, &xdp);
> @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   			 * adjustments. Note other cases do not build an
>   			 * skb and avoid using offset
>   			 */
> -			offset = xdp.data -
> -					page_address(xdp_page) - vi->hdr_len;
> +			metasize = xdp.data - xdp.data_meta;
> +			offset = xdp.data - page_address(xdp_page) -
> +				 vi->hdr_len - metasize;
>   
>   			/* recalculate len if xdp.data or xdp.data_end were
>   			 * adjusted
> @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   			if (unlikely(xdp_page != page)) {
>   				rcu_read_unlock();
>   				put_page(page);
> -				head_skb = page_to_skb(vi, rq, xdp_page,
> -						       offset, len,
> -						       PAGE_SIZE, false);
> +				head_skb = page_to_skb(vi, rq, xdp_page, offset,
> +						       len, PAGE_SIZE, false,
> +						       metasize);
>   				return head_skb;
>   			}
>   			break;
>   		case XDP_TX:
>   			stats->xdp_tx++;
> +			xdp.data_meta = xdp.data;
>   			xdpf = convert_to_xdp_frame(&xdp);
>   			if (unlikely(!xdpf))
>   				goto err_xdp;
> @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>   		goto err_skb;
>   	}
>   
> -	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
> +	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
> +			       metasize);
>   	curr_skb = head_skb;
>   
>   	if (unlikely(!curr_skb))
Yuya Kusakabe July 2, 2019, 2:11 p.m. UTC | #2
On 7/2/19 5:33 PM, Jason Wang wrote:
> 
> On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
>> This adds XDP meta data support to both receive_small() and
>> receive_mergeable().
>>
>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
>> ---
>> v3:
>>   - fix preserve the vnet header in receive_small().
>> v2:
>>   - keep copy untouched in page_to_skb().
>>   - preserve the vnet header in receive_small().
>>   - fix indentation.
>> ---
>>   drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
>>   1 file changed, 31 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 4f3de0ac8b0b..03a1ae6fe267 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>                      struct receive_queue *rq,
>>                      struct page *page, unsigned int offset,
>>                      unsigned int len, unsigned int truesize,
>> -                   bool hdr_valid)
>> +                   bool hdr_valid, unsigned int metasize)
>>   {
>>       struct sk_buff *skb;
>>       struct virtio_net_hdr_mrg_rxbuf *hdr;
>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>       else
>>           hdr_padded_len = sizeof(struct padded_vnet_hdr);
>>   -    if (hdr_valid)
>> +    if (hdr_valid && !metasize)
>>           memcpy(hdr, p, hdr_len);
>>         len -= hdr_len;
>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>           copy = skb_tailroom(skb);
>>       skb_put_data(skb, p, copy);
>>   +    if (metasize) {
>> +        __skb_pull(skb, metasize);
>> +        skb_metadata_set(skb, metasize);
>> +    }
>> +
>>       len -= copy;
>>       offset += copy;
>>   @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>       unsigned int delta = 0;
>>       struct page *xdp_page;
>>       int err;
>> +    unsigned int metasize = 0;
>>         len -= vi->hdr_len;
>>       stats->bytes += len;
>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>             xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>>           xdp.data = xdp.data_hard_start + xdp_headroom;
>> -        xdp_set_data_meta_invalid(&xdp);
>>           xdp.data_end = xdp.data + len;
>> +        xdp.data_meta = xdp.data;
>>           xdp.rxq = &rq->xdp_rxq;
>>           orig_data = xdp.data;
>> +        /* Copy the vnet header to the front of data_hard_start to avoid
>> +         * overwriting by XDP meta data */
>> +        memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);
> 
> 
> What happens if we have a large metadata that occupies all headroom here?
> 
> Thanks

Do you mean a large "XDP" metadata? If a large metadata is a large "XDP" metadata, I think we can not use a metadata that occupies all headroom. The size of metadata limited by bpf_xdp_adjust_meta() as below.
bpf_xdp_adjust_meta() in net/core/filter.c:
	if (unlikely((metalen & (sizeof(__u32) - 1)) ||
		     (metalen > 32)))
		return -EACCES;

Thanks.

> 
> 
>>           act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>           stats->xdp_packets++;
>>   @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>               /* Recalculate length in case bpf program changed it */
>>               delta = orig_data - xdp.data;
>>               len = xdp.data_end - xdp.data;
>> +            metasize = xdp.data - xdp.data_meta;
>>               break;
>>           case XDP_TX:
>>               stats->xdp_tx++;
>> +            xdp.data_meta = xdp.data;
>>               xdpf = convert_to_xdp_frame(&xdp);
>>               if (unlikely(!xdpf))
>>                   goto err_xdp;
>> @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>       skb_reserve(skb, headroom - delta);
>>       skb_put(skb, len);
>>       if (!delta) {
>> -        buf += header_offset;
>> -        memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
>> +        memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len);
>>       } /* keep zeroed vnet hdr since packet was changed by bpf */
>>   +    if (metasize)
>> +        skb_metadata_set(skb, metasize);
>> +
>>   err:
>>       return skb;
>>   @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
>>                      struct virtnet_rq_stats *stats)
>>   {
>>       struct page *page = buf;
>> -    struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
>> -                      PAGE_SIZE, true);
>> +    struct sk_buff *skb =
>> +        page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
>>         stats->bytes += len - vi->hdr_len;
>>       if (unlikely(!skb))
>> @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>       unsigned int truesize;
>>       unsigned int headroom = mergeable_ctx_to_headroom(ctx);
>>       int err;
>> +    unsigned int metasize = 0;
>>         head_skb = NULL;
>>       stats->bytes += len - vi->hdr_len;
>> @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>           data = page_address(xdp_page) + offset;
>>           xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
>>           xdp.data = data + vi->hdr_len;
>> -        xdp_set_data_meta_invalid(&xdp);
>>           xdp.data_end = xdp.data + (len - vi->hdr_len);
>> +        xdp.data_meta = xdp.data;
>>           xdp.rxq = &rq->xdp_rxq;
>>             act = bpf_prog_run_xdp(xdp_prog, &xdp);
>> @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>                * adjustments. Note other cases do not build an
>>                * skb and avoid using offset
>>                */
>> -            offset = xdp.data -
>> -                    page_address(xdp_page) - vi->hdr_len;
>> +            metasize = xdp.data - xdp.data_meta;
>> +            offset = xdp.data - page_address(xdp_page) -
>> +                 vi->hdr_len - metasize;
>>                 /* recalculate len if xdp.data or xdp.data_end were
>>                * adjusted
>> @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>               if (unlikely(xdp_page != page)) {
>>                   rcu_read_unlock();
>>                   put_page(page);
>> -                head_skb = page_to_skb(vi, rq, xdp_page,
>> -                               offset, len,
>> -                               PAGE_SIZE, false);
>> +                head_skb = page_to_skb(vi, rq, xdp_page, offset,
>> +                               len, PAGE_SIZE, false,
>> +                               metasize);
>>                   return head_skb;
>>               }
>>               break;
>>           case XDP_TX:
>>               stats->xdp_tx++;
>> +            xdp.data_meta = xdp.data;
>>               xdpf = convert_to_xdp_frame(&xdp);
>>               if (unlikely(!xdpf))
>>                   goto err_xdp;
>> @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>           goto err_skb;
>>       }
>>   -    head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
>> +    head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
>> +                   metasize);
>>       curr_skb = head_skb;
>>         if (unlikely(!curr_skb))
Daniel Borkmann July 8, 2019, 10:38 p.m. UTC | #3
On 07/02/2019 04:11 PM, Yuya Kusakabe wrote:
> On 7/2/19 5:33 PM, Jason Wang wrote:
>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
>>> This adds XDP meta data support to both receive_small() and
>>> receive_mergeable().
>>>
>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
>>> ---
>>> v3:
>>>   - fix preserve the vnet header in receive_small().
>>> v2:
>>>   - keep copy untouched in page_to_skb().
>>>   - preserve the vnet header in receive_small().
>>>   - fix indentation.
>>> ---
>>>   drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
>>>   1 file changed, 31 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>> index 4f3de0ac8b0b..03a1ae6fe267 100644
>>> --- a/drivers/net/virtio_net.c
>>> +++ b/drivers/net/virtio_net.c
>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>                      struct receive_queue *rq,
>>>                      struct page *page, unsigned int offset,
>>>                      unsigned int len, unsigned int truesize,
>>> -                   bool hdr_valid)
>>> +                   bool hdr_valid, unsigned int metasize)
>>>   {
>>>       struct sk_buff *skb;
>>>       struct virtio_net_hdr_mrg_rxbuf *hdr;
>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>       else
>>>           hdr_padded_len = sizeof(struct padded_vnet_hdr);
>>>   -    if (hdr_valid)
>>> +    if (hdr_valid && !metasize)
>>>           memcpy(hdr, p, hdr_len);
>>>         len -= hdr_len;
>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>           copy = skb_tailroom(skb);
>>>       skb_put_data(skb, p, copy);
>>>   +    if (metasize) {
>>> +        __skb_pull(skb, metasize);
>>> +        skb_metadata_set(skb, metasize);
>>> +    }
>>> +
>>>       len -= copy;
>>>       offset += copy;
>>>   @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>       unsigned int delta = 0;
>>>       struct page *xdp_page;
>>>       int err;
>>> +    unsigned int metasize = 0;
>>>         len -= vi->hdr_len;
>>>       stats->bytes += len;
>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>             xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>>>           xdp.data = xdp.data_hard_start + xdp_headroom;
>>> -        xdp_set_data_meta_invalid(&xdp);
>>>           xdp.data_end = xdp.data + len;
>>> +        xdp.data_meta = xdp.data;
>>>           xdp.rxq = &rq->xdp_rxq;
>>>           orig_data = xdp.data;
>>> +        /* Copy the vnet header to the front of data_hard_start to avoid
>>> +         * overwriting by XDP meta data */
>>> +        memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);

I'm not fully sure if I'm following this one correctly, probably just missing
something. Isn't the vnet header based on how we set up xdp.data_hard_start
earlier already in front of it? Wouldn't we copy invalid data from xdp.data -
vi->hdr_len into the vnet header at that point (given there can be up to 256
bytes of headroom between the two)? If it's relative to xdp.data and headroom
is >0, then BPF prog could otherwise mangle this; something doesn't add up to
me here. Could you clarify? Thx

>> What happens if we have a large metadata that occupies all headroom here?
>>
>> Thanks
> 
> Do you mean a large "XDP" metadata? If a large metadata is a large "XDP" metadata, I think we can not use a metadata that occupies all headroom. The size of metadata limited by bpf_xdp_adjust_meta() as below.
> bpf_xdp_adjust_meta() in net/core/filter.c:
> 	if (unlikely((metalen & (sizeof(__u32) - 1)) ||
> 		     (metalen > 32)))
> 		return -EACCES;
> 
> Thanks.
> 
>>
>>
>>>           act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>>           stats->xdp_packets++;
>>>   @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>               /* Recalculate length in case bpf program changed it */
>>>               delta = orig_data - xdp.data;
>>>               len = xdp.data_end - xdp.data;
>>> +            metasize = xdp.data - xdp.data_meta;
>>>               break;
>>>           case XDP_TX:
>>>               stats->xdp_tx++;
>>> +            xdp.data_meta = xdp.data;
>>>               xdpf = convert_to_xdp_frame(&xdp);
>>>               if (unlikely(!xdpf))
>>>                   goto err_xdp;
>>> @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>       skb_reserve(skb, headroom - delta);
>>>       skb_put(skb, len);
>>>       if (!delta) {
>>> -        buf += header_offset;
>>> -        memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
>>> +        memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len);
>>>       } /* keep zeroed vnet hdr since packet was changed by bpf */
>>>   +    if (metasize)
>>> +        skb_metadata_set(skb, metasize);
>>> +
>>>   err:
>>>       return skb;
>>>   @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
>>>                      struct virtnet_rq_stats *stats)
>>>   {
>>>       struct page *page = buf;
>>> -    struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
>>> -                      PAGE_SIZE, true);
>>> +    struct sk_buff *skb =
>>> +        page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
>>>         stats->bytes += len - vi->hdr_len;
>>>       if (unlikely(!skb))
>>> @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>       unsigned int truesize;
>>>       unsigned int headroom = mergeable_ctx_to_headroom(ctx);
>>>       int err;
>>> +    unsigned int metasize = 0;
>>>         head_skb = NULL;
>>>       stats->bytes += len - vi->hdr_len;
>>> @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>           data = page_address(xdp_page) + offset;
>>>           xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
>>>           xdp.data = data + vi->hdr_len;
>>> -        xdp_set_data_meta_invalid(&xdp);
>>>           xdp.data_end = xdp.data + (len - vi->hdr_len);
>>> +        xdp.data_meta = xdp.data;
>>>           xdp.rxq = &rq->xdp_rxq;
>>>             act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>> @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>                * adjustments. Note other cases do not build an
>>>                * skb and avoid using offset
>>>                */
>>> -            offset = xdp.data -
>>> -                    page_address(xdp_page) - vi->hdr_len;
>>> +            metasize = xdp.data - xdp.data_meta;
>>> +            offset = xdp.data - page_address(xdp_page) -
>>> +                 vi->hdr_len - metasize;
>>>                 /* recalculate len if xdp.data or xdp.data_end were
>>>                * adjusted
>>> @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>               if (unlikely(xdp_page != page)) {
>>>                   rcu_read_unlock();
>>>                   put_page(page);
>>> -                head_skb = page_to_skb(vi, rq, xdp_page,
>>> -                               offset, len,
>>> -                               PAGE_SIZE, false);
>>> +                head_skb = page_to_skb(vi, rq, xdp_page, offset,
>>> +                               len, PAGE_SIZE, false,
>>> +                               metasize);
>>>                   return head_skb;
>>>               }
>>>               break;
>>>           case XDP_TX:
>>>               stats->xdp_tx++;
>>> +            xdp.data_meta = xdp.data;
>>>               xdpf = convert_to_xdp_frame(&xdp);
>>>               if (unlikely(!xdpf))
>>>                   goto err_xdp;
>>> @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>           goto err_skb;
>>>       }
>>>   -    head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
>>> +    head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
>>> +                   metasize);
>>>       curr_skb = head_skb;
>>>         if (unlikely(!curr_skb))
Jason Wang July 9, 2019, 3:04 a.m. UTC | #4
On 2019/7/9 上午6:38, Daniel Borkmann wrote:
> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote:
>> On 7/2/19 5:33 PM, Jason Wang wrote:
>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
>>>> This adds XDP meta data support to both receive_small() and
>>>> receive_mergeable().
>>>>
>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
>>>> ---
>>>> v3:
>>>>    - fix preserve the vnet header in receive_small().
>>>> v2:
>>>>    - keep copy untouched in page_to_skb().
>>>>    - preserve the vnet header in receive_small().
>>>>    - fix indentation.
>>>> ---
>>>>    drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
>>>>    1 file changed, 31 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644
>>>> --- a/drivers/net/virtio_net.c
>>>> +++ b/drivers/net/virtio_net.c
>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>                       struct receive_queue *rq,
>>>>                       struct page *page, unsigned int offset,
>>>>                       unsigned int len, unsigned int truesize,
>>>> -                   bool hdr_valid)
>>>> +                   bool hdr_valid, unsigned int metasize)
>>>>    {
>>>>        struct sk_buff *skb;
>>>>        struct virtio_net_hdr_mrg_rxbuf *hdr;
>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>        else
>>>>            hdr_padded_len = sizeof(struct padded_vnet_hdr);
>>>>    -    if (hdr_valid)
>>>> +    if (hdr_valid && !metasize)
>>>>            memcpy(hdr, p, hdr_len);
>>>>          len -= hdr_len;
>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>            copy = skb_tailroom(skb);
>>>>        skb_put_data(skb, p, copy);
>>>>    +    if (metasize) {
>>>> +        __skb_pull(skb, metasize);
>>>> +        skb_metadata_set(skb, metasize);
>>>> +    }
>>>> +
>>>>        len -= copy;
>>>>        offset += copy;
>>>>    @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>        unsigned int delta = 0;
>>>>        struct page *xdp_page;
>>>>        int err;
>>>> +    unsigned int metasize = 0;
>>>>          len -= vi->hdr_len;
>>>>        stats->bytes += len;
>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>              xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>>>>            xdp.data = xdp.data_hard_start + xdp_headroom;
>>>> -        xdp_set_data_meta_invalid(&xdp);
>>>>            xdp.data_end = xdp.data + len;
>>>> +        xdp.data_meta = xdp.data;
>>>>            xdp.rxq = &rq->xdp_rxq;
>>>>            orig_data = xdp.data;
>>>> +        /* Copy the vnet header to the front of data_hard_start to avoid
>>>> +         * overwriting by XDP meta data */
>>>> +        memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);
> I'm not fully sure if I'm following this one correctly, probably just missing
> something. Isn't the vnet header based on how we set up xdp.data_hard_start
> earlier already in front of it? Wouldn't we copy invalid data from xdp.data -
> vi->hdr_len into the vnet header at that point (given there can be up to 256
> bytes of headroom between the two)? If it's relative to xdp.data and headroom
> is >0, then BPF prog could otherwise mangle this; something doesn't add up to
> me here. Could you clarify? Thx


Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it 
could be overwrote by metadata, that's why we need a copy here.

Thanks


>
>>> What happens if we have a large metadata that occupies all headroom here?
>>>
>>> Thanks
>> Do you mean a large "XDP" metadata? If a large metadata is a large "XDP" metadata, I think we can not use a metadata that occupies all headroom. The size of metadata limited by bpf_xdp_adjust_meta() as below.
>> bpf_xdp_adjust_meta() in net/core/filter.c:
>> 	if (unlikely((metalen & (sizeof(__u32) - 1)) ||
>> 		     (metalen > 32)))
>> 		return -EACCES;
>>
>> Thanks.
>>
>>>
>>>>            act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>>>            stats->xdp_packets++;
>>>>    @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>                /* Recalculate length in case bpf program changed it */
>>>>                delta = orig_data - xdp.data;
>>>>                len = xdp.data_end - xdp.data;
>>>> +            metasize = xdp.data - xdp.data_meta;
>>>>                break;
>>>>            case XDP_TX:
>>>>                stats->xdp_tx++;
>>>> +            xdp.data_meta = xdp.data;
>>>>                xdpf = convert_to_xdp_frame(&xdp);
>>>>                if (unlikely(!xdpf))
>>>>                    goto err_xdp;
>>>> @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>        skb_reserve(skb, headroom - delta);
>>>>        skb_put(skb, len);
>>>>        if (!delta) {
>>>> -        buf += header_offset;
>>>> -        memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
>>>> +        memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len);
>>>>        } /* keep zeroed vnet hdr since packet was changed by bpf */
>>>>    +    if (metasize)
>>>> +        skb_metadata_set(skb, metasize);
>>>> +
>>>>    err:
>>>>        return skb;
>>>>    @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
>>>>                       struct virtnet_rq_stats *stats)
>>>>    {
>>>>        struct page *page = buf;
>>>> -    struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
>>>> -                      PAGE_SIZE, true);
>>>> +    struct sk_buff *skb =
>>>> +        page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
>>>>          stats->bytes += len - vi->hdr_len;
>>>>        if (unlikely(!skb))
>>>> @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>>        unsigned int truesize;
>>>>        unsigned int headroom = mergeable_ctx_to_headroom(ctx);
>>>>        int err;
>>>> +    unsigned int metasize = 0;
>>>>          head_skb = NULL;
>>>>        stats->bytes += len - vi->hdr_len;
>>>> @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>>            data = page_address(xdp_page) + offset;
>>>>            xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
>>>>            xdp.data = data + vi->hdr_len;
>>>> -        xdp_set_data_meta_invalid(&xdp);
>>>>            xdp.data_end = xdp.data + (len - vi->hdr_len);
>>>> +        xdp.data_meta = xdp.data;
>>>>            xdp.rxq = &rq->xdp_rxq;
>>>>              act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>>> @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>>                 * adjustments. Note other cases do not build an
>>>>                 * skb and avoid using offset
>>>>                 */
>>>> -            offset = xdp.data -
>>>> -                    page_address(xdp_page) - vi->hdr_len;
>>>> +            metasize = xdp.data - xdp.data_meta;
>>>> +            offset = xdp.data - page_address(xdp_page) -
>>>> +                 vi->hdr_len - metasize;
>>>>                  /* recalculate len if xdp.data or xdp.data_end were
>>>>                 * adjusted
>>>> @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>>                if (unlikely(xdp_page != page)) {
>>>>                    rcu_read_unlock();
>>>>                    put_page(page);
>>>> -                head_skb = page_to_skb(vi, rq, xdp_page,
>>>> -                               offset, len,
>>>> -                               PAGE_SIZE, false);
>>>> +                head_skb = page_to_skb(vi, rq, xdp_page, offset,
>>>> +                               len, PAGE_SIZE, false,
>>>> +                               metasize);
>>>>                    return head_skb;
>>>>                }
>>>>                break;
>>>>            case XDP_TX:
>>>>                stats->xdp_tx++;
>>>> +            xdp.data_meta = xdp.data;
>>>>                xdpf = convert_to_xdp_frame(&xdp);
>>>>                if (unlikely(!xdpf))
>>>>                    goto err_xdp;
>>>> @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
>>>>            goto err_skb;
>>>>        }
>>>>    -    head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
>>>> +    head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
>>>> +                   metasize);
>>>>        curr_skb = head_skb;
>>>>          if (unlikely(!curr_skb))
Daniel Borkmann July 9, 2019, 8:03 p.m. UTC | #5
On 07/09/2019 05:04 AM, Jason Wang wrote:
> On 2019/7/9 上午6:38, Daniel Borkmann wrote:
>> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote:
>>> On 7/2/19 5:33 PM, Jason Wang wrote:
>>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
>>>>> This adds XDP meta data support to both receive_small() and
>>>>> receive_mergeable().
>>>>>
>>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
>>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
>>>>> ---
>>>>> v3:
>>>>>    - fix preserve the vnet header in receive_small().
>>>>> v2:
>>>>>    - keep copy untouched in page_to_skb().
>>>>>    - preserve the vnet header in receive_small().
>>>>>    - fix indentation.
>>>>> ---
>>>>>    drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
>>>>>    1 file changed, 31 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644
>>>>> --- a/drivers/net/virtio_net.c
>>>>> +++ b/drivers/net/virtio_net.c
>>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>                       struct receive_queue *rq,
>>>>>                       struct page *page, unsigned int offset,
>>>>>                       unsigned int len, unsigned int truesize,
>>>>> -                   bool hdr_valid)
>>>>> +                   bool hdr_valid, unsigned int metasize)
>>>>>    {
>>>>>        struct sk_buff *skb;
>>>>>        struct virtio_net_hdr_mrg_rxbuf *hdr;
>>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>        else
>>>>>            hdr_padded_len = sizeof(struct padded_vnet_hdr);
>>>>>    -    if (hdr_valid)
>>>>> +    if (hdr_valid && !metasize)
>>>>>            memcpy(hdr, p, hdr_len);
>>>>>          len -= hdr_len;
>>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>            copy = skb_tailroom(skb);
>>>>>        skb_put_data(skb, p, copy);
>>>>>    +    if (metasize) {
>>>>> +        __skb_pull(skb, metasize);
>>>>> +        skb_metadata_set(skb, metasize);
>>>>> +    }
>>>>> +
>>>>>        len -= copy;
>>>>>        offset += copy;
>>>>>    @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>>        unsigned int delta = 0;
>>>>>        struct page *xdp_page;
>>>>>        int err;
>>>>> +    unsigned int metasize = 0;
>>>>>          len -= vi->hdr_len;
>>>>>        stats->bytes += len;
>>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>>              xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>>>>>            xdp.data = xdp.data_hard_start + xdp_headroom;
>>>>> -        xdp_set_data_meta_invalid(&xdp);
>>>>>            xdp.data_end = xdp.data + len;
>>>>> +        xdp.data_meta = xdp.data;
>>>>>            xdp.rxq = &rq->xdp_rxq;
>>>>>            orig_data = xdp.data;
>>>>> +        /* Copy the vnet header to the front of data_hard_start to avoid
>>>>> +         * overwriting by XDP meta data */
>>>>> +        memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);
>> I'm not fully sure if I'm following this one correctly, probably just missing
>> something. Isn't the vnet header based on how we set up xdp.data_hard_start
>> earlier already in front of it? Wouldn't we copy invalid data from xdp.data -
>> vi->hdr_len into the vnet header at that point (given there can be up to 256
>> bytes of headroom between the two)? If it's relative to xdp.data and headroom
>> is >0, then BPF prog could otherwise mangle this; something doesn't add up to
>> me here. Could you clarify? Thx
> 
> Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here.

For the current code, you can adjust the xdp.data with a positive/negative offset
already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of
xdp.data, couldn't this be overridden today as well then? Anyway, just wondering
how this is handled differently?

Thanks,
Daniel
Jason Wang July 10, 2019, 2:30 a.m. UTC | #6
On 2019/7/10 上午4:03, Daniel Borkmann wrote:
> On 07/09/2019 05:04 AM, Jason Wang wrote:
>> On 2019/7/9 上午6:38, Daniel Borkmann wrote:
>>> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote:
>>>> On 7/2/19 5:33 PM, Jason Wang wrote:
>>>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
>>>>>> This adds XDP meta data support to both receive_small() and
>>>>>> receive_mergeable().
>>>>>>
>>>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
>>>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
>>>>>> ---
>>>>>> v3:
>>>>>>     - fix preserve the vnet header in receive_small().
>>>>>> v2:
>>>>>>     - keep copy untouched in page_to_skb().
>>>>>>     - preserve the vnet header in receive_small().
>>>>>>     - fix indentation.
>>>>>> ---
>>>>>>     drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
>>>>>>     1 file changed, 31 insertions(+), 14 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644
>>>>>> --- a/drivers/net/virtio_net.c
>>>>>> +++ b/drivers/net/virtio_net.c
>>>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>>                        struct receive_queue *rq,
>>>>>>                        struct page *page, unsigned int offset,
>>>>>>                        unsigned int len, unsigned int truesize,
>>>>>> -                   bool hdr_valid)
>>>>>> +                   bool hdr_valid, unsigned int metasize)
>>>>>>     {
>>>>>>         struct sk_buff *skb;
>>>>>>         struct virtio_net_hdr_mrg_rxbuf *hdr;
>>>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>>         else
>>>>>>             hdr_padded_len = sizeof(struct padded_vnet_hdr);
>>>>>>     -    if (hdr_valid)
>>>>>> +    if (hdr_valid && !metasize)
>>>>>>             memcpy(hdr, p, hdr_len);
>>>>>>           len -= hdr_len;
>>>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>>             copy = skb_tailroom(skb);
>>>>>>         skb_put_data(skb, p, copy);
>>>>>>     +    if (metasize) {
>>>>>> +        __skb_pull(skb, metasize);
>>>>>> +        skb_metadata_set(skb, metasize);
>>>>>> +    }
>>>>>> +
>>>>>>         len -= copy;
>>>>>>         offset += copy;
>>>>>>     @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>>>         unsigned int delta = 0;
>>>>>>         struct page *xdp_page;
>>>>>>         int err;
>>>>>> +    unsigned int metasize = 0;
>>>>>>           len -= vi->hdr_len;
>>>>>>         stats->bytes += len;
>>>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>>>               xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>>>>>>             xdp.data = xdp.data_hard_start + xdp_headroom;
>>>>>> -        xdp_set_data_meta_invalid(&xdp);
>>>>>>             xdp.data_end = xdp.data + len;
>>>>>> +        xdp.data_meta = xdp.data;
>>>>>>             xdp.rxq = &rq->xdp_rxq;
>>>>>>             orig_data = xdp.data;
>>>>>> +        /* Copy the vnet header to the front of data_hard_start to avoid
>>>>>> +         * overwriting by XDP meta data */
>>>>>> +        memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);
>>> I'm not fully sure if I'm following this one correctly, probably just missing
>>> something. Isn't the vnet header based on how we set up xdp.data_hard_start
>>> earlier already in front of it? Wouldn't we copy invalid data from xdp.data -
>>> vi->hdr_len into the vnet header at that point (given there can be up to 256
>>> bytes of headroom between the two)? If it's relative to xdp.data and headroom
>>> is >0, then BPF prog could otherwise mangle this; something doesn't add up to
>>> me here. Could you clarify? Thx
>> Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here.
> For the current code, you can adjust the xdp.data with a positive/negative offset
> already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of
> xdp.data, couldn't this be overridden today as well then? Anyway, just wondering
> how this is handled differently?


We will invalidate the vnet header in this case. But for the case of 
metadata adjustment without header adjustment, we want to seek a way to 
preserve that.

Thanks


>
> Thanks,
> Daniel
Yuya Kusakabe Feb. 3, 2020, 1:52 p.m. UTC | #7
I'm sorry for the late reply.

I saw the status of this patch is "Awaiting Upstream" on
https://patchwork.ozlabs.org/patch/1126046/. What is "Awaiting
Upstream"? Is there anything that I can do?

Thank you,
Yuya

On Wed, Jul 10, 2019 at 11:30 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> On 2019/7/10 上午4:03, Daniel Borkmann wrote:
> > On 07/09/2019 05:04 AM, Jason Wang wrote:
> >> On 2019/7/9 上午6:38, Daniel Borkmann wrote:
> >>> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote:
> >>>> On 7/2/19 5:33 PM, Jason Wang wrote:
> >>>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
> >>>>>> This adds XDP meta data support to both receive_small() and
> >>>>>> receive_mergeable().
> >>>>>>
> >>>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
> >>>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
> >>>>>> ---
> >>>>>> v3:
> >>>>>>     - fix preserve the vnet header in receive_small().
> >>>>>> v2:
> >>>>>>     - keep copy untouched in page_to_skb().
> >>>>>>     - preserve the vnet header in receive_small().
> >>>>>>     - fix indentation.
> >>>>>> ---
> >>>>>>     drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
> >>>>>>     1 file changed, 31 insertions(+), 14 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >>>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644
> >>>>>> --- a/drivers/net/virtio_net.c
> >>>>>> +++ b/drivers/net/virtio_net.c
> >>>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >>>>>>                        struct receive_queue *rq,
> >>>>>>                        struct page *page, unsigned int offset,
> >>>>>>                        unsigned int len, unsigned int truesize,
> >>>>>> -                   bool hdr_valid)
> >>>>>> +                   bool hdr_valid, unsigned int metasize)
> >>>>>>     {
> >>>>>>         struct sk_buff *skb;
> >>>>>>         struct virtio_net_hdr_mrg_rxbuf *hdr;
> >>>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >>>>>>         else
> >>>>>>             hdr_padded_len = sizeof(struct padded_vnet_hdr);
> >>>>>>     -    if (hdr_valid)
> >>>>>> +    if (hdr_valid && !metasize)
> >>>>>>             memcpy(hdr, p, hdr_len);
> >>>>>>           len -= hdr_len;
> >>>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> >>>>>>             copy = skb_tailroom(skb);
> >>>>>>         skb_put_data(skb, p, copy);
> >>>>>>     +    if (metasize) {
> >>>>>> +        __skb_pull(skb, metasize);
> >>>>>> +        skb_metadata_set(skb, metasize);
> >>>>>> +    }
> >>>>>> +
> >>>>>>         len -= copy;
> >>>>>>         offset += copy;
> >>>>>>     @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
> >>>>>>         unsigned int delta = 0;
> >>>>>>         struct page *xdp_page;
> >>>>>>         int err;
> >>>>>> +    unsigned int metasize = 0;
> >>>>>>           len -= vi->hdr_len;
> >>>>>>         stats->bytes += len;
> >>>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
> >>>>>>               xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
> >>>>>>             xdp.data = xdp.data_hard_start + xdp_headroom;
> >>>>>> -        xdp_set_data_meta_invalid(&xdp);
> >>>>>>             xdp.data_end = xdp.data + len;
> >>>>>> +        xdp.data_meta = xdp.data;
> >>>>>>             xdp.rxq = &rq->xdp_rxq;
> >>>>>>             orig_data = xdp.data;
> >>>>>> +        /* Copy the vnet header to the front of data_hard_start to avoid
> >>>>>> +         * overwriting by XDP meta data */
> >>>>>> +        memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);
> >>> I'm not fully sure if I'm following this one correctly, probably just missing
> >>> something. Isn't the vnet header based on how we set up xdp.data_hard_start
> >>> earlier already in front of it? Wouldn't we copy invalid data from xdp.data -
> >>> vi->hdr_len into the vnet header at that point (given there can be up to 256
> >>> bytes of headroom between the two)? If it's relative to xdp.data and headroom
> >>> is >0, then BPF prog could otherwise mangle this; something doesn't add up to
> >>> me here. Could you clarify? Thx
> >> Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here.
> > For the current code, you can adjust the xdp.data with a positive/negative offset
> > already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of
> > xdp.data, couldn't this be overridden today as well then? Anyway, just wondering
> > how this is handled differently?
>
>
> We will invalidate the vnet header in this case. But for the case of
> metadata adjustment without header adjustment, we want to seek a way to
> preserve that.
>
> Thanks
>
>
> >
> > Thanks,
> > Daniel
Jason Wang Feb. 4, 2020, 3:31 a.m. UTC | #8
On 2020/2/3 下午9:52, Yuya Kusakabe wrote:
> I'm sorry for the late reply.
>
> I saw the status of this patch is "Awaiting Upstream" on
> https://patchwork.ozlabs.org/patch/1126046/. What is "Awaiting
> Upstream"? Is there anything that I can do?


You can post a new version I think.

Thanks


>
> Thank you,
> Yuya
>
> On Wed, Jul 10, 2019 at 11:30 AM Jason Wang <jasowang@redhat.com> wrote:
>>
>> On 2019/7/10 上午4:03, Daniel Borkmann wrote:
>>> On 07/09/2019 05:04 AM, Jason Wang wrote:
>>>> On 2019/7/9 上午6:38, Daniel Borkmann wrote:
>>>>> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote:
>>>>>> On 7/2/19 5:33 PM, Jason Wang wrote:
>>>>>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote:
>>>>>>>> This adds XDP meta data support to both receive_small() and
>>>>>>>> receive_mergeable().
>>>>>>>>
>>>>>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
>>>>>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
>>>>>>>> ---
>>>>>>>> v3:
>>>>>>>>      - fix preserve the vnet header in receive_small().
>>>>>>>> v2:
>>>>>>>>      - keep copy untouched in page_to_skb().
>>>>>>>>      - preserve the vnet header in receive_small().
>>>>>>>>      - fix indentation.
>>>>>>>> ---
>>>>>>>>      drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++-------------
>>>>>>>>      1 file changed, 31 insertions(+), 14 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>>>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644
>>>>>>>> --- a/drivers/net/virtio_net.c
>>>>>>>> +++ b/drivers/net/virtio_net.c
>>>>>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>>>>                         struct receive_queue *rq,
>>>>>>>>                         struct page *page, unsigned int offset,
>>>>>>>>                         unsigned int len, unsigned int truesize,
>>>>>>>> -                   bool hdr_valid)
>>>>>>>> +                   bool hdr_valid, unsigned int metasize)
>>>>>>>>      {
>>>>>>>>          struct sk_buff *skb;
>>>>>>>>          struct virtio_net_hdr_mrg_rxbuf *hdr;
>>>>>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>>>>          else
>>>>>>>>              hdr_padded_len = sizeof(struct padded_vnet_hdr);
>>>>>>>>      -    if (hdr_valid)
>>>>>>>> +    if (hdr_valid && !metasize)
>>>>>>>>              memcpy(hdr, p, hdr_len);
>>>>>>>>            len -= hdr_len;
>>>>>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
>>>>>>>>              copy = skb_tailroom(skb);
>>>>>>>>          skb_put_data(skb, p, copy);
>>>>>>>>      +    if (metasize) {
>>>>>>>> +        __skb_pull(skb, metasize);
>>>>>>>> +        skb_metadata_set(skb, metasize);
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>>          len -= copy;
>>>>>>>>          offset += copy;
>>>>>>>>      @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>>>>>          unsigned int delta = 0;
>>>>>>>>          struct page *xdp_page;
>>>>>>>>          int err;
>>>>>>>> +    unsigned int metasize = 0;
>>>>>>>>            len -= vi->hdr_len;
>>>>>>>>          stats->bytes += len;
>>>>>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev,
>>>>>>>>                xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>>>>>>>>              xdp.data = xdp.data_hard_start + xdp_headroom;
>>>>>>>> -        xdp_set_data_meta_invalid(&xdp);
>>>>>>>>              xdp.data_end = xdp.data + len;
>>>>>>>> +        xdp.data_meta = xdp.data;
>>>>>>>>              xdp.rxq = &rq->xdp_rxq;
>>>>>>>>              orig_data = xdp.data;
>>>>>>>> +        /* Copy the vnet header to the front of data_hard_start to avoid
>>>>>>>> +         * overwriting by XDP meta data */
>>>>>>>> +        memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);
>>>>> I'm not fully sure if I'm following this one correctly, probably just missing
>>>>> something. Isn't the vnet header based on how we set up xdp.data_hard_start
>>>>> earlier already in front of it? Wouldn't we copy invalid data from xdp.data -
>>>>> vi->hdr_len into the vnet header at that point (given there can be up to 256
>>>>> bytes of headroom between the two)? If it's relative to xdp.data and headroom
>>>>> is >0, then BPF prog could otherwise mangle this; something doesn't add up to
>>>>> me here. Could you clarify? Thx
>>>> Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here.
>>> For the current code, you can adjust the xdp.data with a positive/negative offset
>>> already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of
>>> xdp.data, couldn't this be overridden today as well then? Anyway, just wondering
>>> how this is handled differently?
>>
>> We will invalidate the vnet header in this case. But for the case of
>> metadata adjustment without header adjustment, we want to seek a way to
>> preserve that.
>>
>> Thanks
>>
>>
>>> Thanks,
>>> Daniel
>
>
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4f3de0ac8b0b..03a1ae6fe267 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -371,7 +371,7 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
 				   struct page *page, unsigned int offset,
 				   unsigned int len, unsigned int truesize,
-				   bool hdr_valid)
+				   bool hdr_valid, unsigned int metasize)
 {
 	struct sk_buff *skb;
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -393,7 +393,7 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	else
 		hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-	if (hdr_valid)
+	if (hdr_valid && !metasize)
 		memcpy(hdr, p, hdr_len);
 
 	len -= hdr_len;
@@ -405,6 +405,11 @@  static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 		copy = skb_tailroom(skb);
 	skb_put_data(skb, p, copy);
 
+	if (metasize) {
+		__skb_pull(skb, metasize);
+		skb_metadata_set(skb, metasize);
+	}
+
 	len -= copy;
 	offset += copy;
 
@@ -644,6 +649,7 @@  static struct sk_buff *receive_small(struct net_device *dev,
 	unsigned int delta = 0;
 	struct page *xdp_page;
 	int err;
+	unsigned int metasize = 0;
 
 	len -= vi->hdr_len;
 	stats->bytes += len;
@@ -683,10 +689,13 @@  static struct sk_buff *receive_small(struct net_device *dev,
 
 		xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
 		xdp.data = xdp.data_hard_start + xdp_headroom;
-		xdp_set_data_meta_invalid(&xdp);
 		xdp.data_end = xdp.data + len;
+		xdp.data_meta = xdp.data;
 		xdp.rxq = &rq->xdp_rxq;
 		orig_data = xdp.data;
+		/* Copy the vnet header to the front of data_hard_start to avoid
+		 * overwriting by XDP meta data */
+		memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len);
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		stats->xdp_packets++;
 
@@ -695,9 +704,11 @@  static struct sk_buff *receive_small(struct net_device *dev,
 			/* Recalculate length in case bpf program changed it */
 			delta = orig_data - xdp.data;
 			len = xdp.data_end - xdp.data;
+			metasize = xdp.data - xdp.data_meta;
 			break;
 		case XDP_TX:
 			stats->xdp_tx++;
+			xdp.data_meta = xdp.data;
 			xdpf = convert_to_xdp_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
@@ -736,10 +747,12 @@  static struct sk_buff *receive_small(struct net_device *dev,
 	skb_reserve(skb, headroom - delta);
 	skb_put(skb, len);
 	if (!delta) {
-		buf += header_offset;
-		memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
+		memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len);
 	} /* keep zeroed vnet hdr since packet was changed by bpf */
 
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
 err:
 	return skb;
 
@@ -760,8 +773,8 @@  static struct sk_buff *receive_big(struct net_device *dev,
 				   struct virtnet_rq_stats *stats)
 {
 	struct page *page = buf;
-	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
-					  PAGE_SIZE, true);
+	struct sk_buff *skb =
+		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
 
 	stats->bytes += len - vi->hdr_len;
 	if (unlikely(!skb))
@@ -793,6 +806,7 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 	unsigned int truesize;
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
 	int err;
+	unsigned int metasize = 0;
 
 	head_skb = NULL;
 	stats->bytes += len - vi->hdr_len;
@@ -839,8 +853,8 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 		data = page_address(xdp_page) + offset;
 		xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
 		xdp.data = data + vi->hdr_len;
-		xdp_set_data_meta_invalid(&xdp);
 		xdp.data_end = xdp.data + (len - vi->hdr_len);
+		xdp.data_meta = xdp.data;
 		xdp.rxq = &rq->xdp_rxq;
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -852,8 +866,9 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 			 * adjustments. Note other cases do not build an
 			 * skb and avoid using offset
 			 */
-			offset = xdp.data -
-					page_address(xdp_page) - vi->hdr_len;
+			metasize = xdp.data - xdp.data_meta;
+			offset = xdp.data - page_address(xdp_page) -
+				 vi->hdr_len - metasize;
 
 			/* recalculate len if xdp.data or xdp.data_end were
 			 * adjusted
@@ -863,14 +878,15 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 			if (unlikely(xdp_page != page)) {
 				rcu_read_unlock();
 				put_page(page);
-				head_skb = page_to_skb(vi, rq, xdp_page,
-						       offset, len,
-						       PAGE_SIZE, false);
+				head_skb = page_to_skb(vi, rq, xdp_page, offset,
+						       len, PAGE_SIZE, false,
+						       metasize);
 				return head_skb;
 			}
 			break;
 		case XDP_TX:
 			stats->xdp_tx++;
+			xdp.data_meta = xdp.data;
 			xdpf = convert_to_xdp_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
@@ -921,7 +937,8 @@  static struct sk_buff *receive_mergeable(struct net_device *dev,
 		goto err_skb;
 	}
 
-	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
+	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
+			       metasize);
 	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))