Message ID | 20190702081646.23230-1-yuya.kusakabe@gmail.com |
---|---|
State | Awaiting Upstream |
Delegated to: | BPF Maintainers |
Headers | show |
Series | [bpf-next,v3] virtio_net: add XDP meta data support | expand |
On 2019/7/2 下午4:16, Yuya Kusakabe wrote: > This adds XDP meta data support to both receive_small() and > receive_mergeable(). > > Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") > Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> > --- > v3: > - fix preserve the vnet header in receive_small(). > v2: > - keep copy untouched in page_to_skb(). > - preserve the vnet header in receive_small(). > - fix indentation. > --- > drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- > 1 file changed, 31 insertions(+), 14 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 4f3de0ac8b0b..03a1ae6fe267 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > struct receive_queue *rq, > struct page *page, unsigned int offset, > unsigned int len, unsigned int truesize, > - bool hdr_valid) > + bool hdr_valid, unsigned int metasize) > { > struct sk_buff *skb; > struct virtio_net_hdr_mrg_rxbuf *hdr; > @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > else > hdr_padded_len = sizeof(struct padded_vnet_hdr); > > - if (hdr_valid) > + if (hdr_valid && !metasize) > memcpy(hdr, p, hdr_len); > > len -= hdr_len; > @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > copy = skb_tailroom(skb); > skb_put_data(skb, p, copy); > > + if (metasize) { > + __skb_pull(skb, metasize); > + skb_metadata_set(skb, metasize); > + } > + > len -= copy; > offset += copy; > > @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, > unsigned int delta = 0; > struct page *xdp_page; > int err; > + unsigned int metasize = 0; > > len -= vi->hdr_len; > stats->bytes += len; > @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, > > xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; > xdp.data = xdp.data_hard_start + xdp_headroom; > - xdp_set_data_meta_invalid(&xdp); > xdp.data_end = xdp.data + len; > + xdp.data_meta = xdp.data; > xdp.rxq = &rq->xdp_rxq; > orig_data = xdp.data; > + /* Copy the vnet header to the front of data_hard_start to avoid > + * overwriting by XDP meta data */ > + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); What happens if we have a large metadata that occupies all headroom here? Thanks > act = bpf_prog_run_xdp(xdp_prog, &xdp); > stats->xdp_packets++; > > @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev, > /* Recalculate length in case bpf program changed it */ > delta = orig_data - xdp.data; > len = xdp.data_end - xdp.data; > + metasize = xdp.data - xdp.data_meta; > break; > case XDP_TX: > stats->xdp_tx++; > + xdp.data_meta = xdp.data; > xdpf = convert_to_xdp_frame(&xdp); > if (unlikely(!xdpf)) > goto err_xdp; > @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev, > skb_reserve(skb, headroom - delta); > skb_put(skb, len); > if (!delta) { > - buf += header_offset; > - memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); > + memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len); > } /* keep zeroed vnet hdr since packet was changed by bpf */ > > + if (metasize) > + skb_metadata_set(skb, metasize); > + > err: > return skb; > > @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev, > struct virtnet_rq_stats *stats) > { > struct page *page = buf; > - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, > - PAGE_SIZE, true); > + struct sk_buff *skb = > + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); > > stats->bytes += len - vi->hdr_len; > if (unlikely(!skb)) > @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > unsigned int truesize; > unsigned int headroom = mergeable_ctx_to_headroom(ctx); > int err; > + unsigned int metasize = 0; > > head_skb = NULL; > stats->bytes += len - vi->hdr_len; > @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > data = page_address(xdp_page) + offset; > xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; > xdp.data = data + vi->hdr_len; > - xdp_set_data_meta_invalid(&xdp); > xdp.data_end = xdp.data + (len - vi->hdr_len); > + xdp.data_meta = xdp.data; > xdp.rxq = &rq->xdp_rxq; > > act = bpf_prog_run_xdp(xdp_prog, &xdp); > @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > * adjustments. Note other cases do not build an > * skb and avoid using offset > */ > - offset = xdp.data - > - page_address(xdp_page) - vi->hdr_len; > + metasize = xdp.data - xdp.data_meta; > + offset = xdp.data - page_address(xdp_page) - > + vi->hdr_len - metasize; > > /* recalculate len if xdp.data or xdp.data_end were > * adjusted > @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > if (unlikely(xdp_page != page)) { > rcu_read_unlock(); > put_page(page); > - head_skb = page_to_skb(vi, rq, xdp_page, > - offset, len, > - PAGE_SIZE, false); > + head_skb = page_to_skb(vi, rq, xdp_page, offset, > + len, PAGE_SIZE, false, > + metasize); > return head_skb; > } > break; > case XDP_TX: > stats->xdp_tx++; > + xdp.data_meta = xdp.data; > xdpf = convert_to_xdp_frame(&xdp); > if (unlikely(!xdpf)) > goto err_xdp; > @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > goto err_skb; > } > > - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); > + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, > + metasize); > curr_skb = head_skb; > > if (unlikely(!curr_skb))
On 7/2/19 5:33 PM, Jason Wang wrote: > > On 2019/7/2 下午4:16, Yuya Kusakabe wrote: >> This adds XDP meta data support to both receive_small() and >> receive_mergeable(). >> >> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") >> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> >> --- >> v3: >> - fix preserve the vnet header in receive_small(). >> v2: >> - keep copy untouched in page_to_skb(). >> - preserve the vnet header in receive_small(). >> - fix indentation. >> --- >> drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- >> 1 file changed, 31 insertions(+), 14 deletions(-) >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> index 4f3de0ac8b0b..03a1ae6fe267 100644 >> --- a/drivers/net/virtio_net.c >> +++ b/drivers/net/virtio_net.c >> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> struct receive_queue *rq, >> struct page *page, unsigned int offset, >> unsigned int len, unsigned int truesize, >> - bool hdr_valid) >> + bool hdr_valid, unsigned int metasize) >> { >> struct sk_buff *skb; >> struct virtio_net_hdr_mrg_rxbuf *hdr; >> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> else >> hdr_padded_len = sizeof(struct padded_vnet_hdr); >> - if (hdr_valid) >> + if (hdr_valid && !metasize) >> memcpy(hdr, p, hdr_len); >> len -= hdr_len; >> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >> copy = skb_tailroom(skb); >> skb_put_data(skb, p, copy); >> + if (metasize) { >> + __skb_pull(skb, metasize); >> + skb_metadata_set(skb, metasize); >> + } >> + >> len -= copy; >> offset += copy; >> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >> unsigned int delta = 0; >> struct page *xdp_page; >> int err; >> + unsigned int metasize = 0; >> len -= vi->hdr_len; >> stats->bytes += len; >> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, >> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >> xdp.data = xdp.data_hard_start + xdp_headroom; >> - xdp_set_data_meta_invalid(&xdp); >> xdp.data_end = xdp.data + len; >> + xdp.data_meta = xdp.data; >> xdp.rxq = &rq->xdp_rxq; >> orig_data = xdp.data; >> + /* Copy the vnet header to the front of data_hard_start to avoid >> + * overwriting by XDP meta data */ >> + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); > > > What happens if we have a large metadata that occupies all headroom here? > > Thanks Do you mean a large "XDP" metadata? If a large metadata is a large "XDP" metadata, I think we can not use a metadata that occupies all headroom. The size of metadata limited by bpf_xdp_adjust_meta() as below. bpf_xdp_adjust_meta() in net/core/filter.c: if (unlikely((metalen & (sizeof(__u32) - 1)) || (metalen > 32))) return -EACCES; Thanks. > > >> act = bpf_prog_run_xdp(xdp_prog, &xdp); >> stats->xdp_packets++; >> @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev, >> /* Recalculate length in case bpf program changed it */ >> delta = orig_data - xdp.data; >> len = xdp.data_end - xdp.data; >> + metasize = xdp.data - xdp.data_meta; >> break; >> case XDP_TX: >> stats->xdp_tx++; >> + xdp.data_meta = xdp.data; >> xdpf = convert_to_xdp_frame(&xdp); >> if (unlikely(!xdpf)) >> goto err_xdp; >> @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev, >> skb_reserve(skb, headroom - delta); >> skb_put(skb, len); >> if (!delta) { >> - buf += header_offset; >> - memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); >> + memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len); >> } /* keep zeroed vnet hdr since packet was changed by bpf */ >> + if (metasize) >> + skb_metadata_set(skb, metasize); >> + >> err: >> return skb; >> @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev, >> struct virtnet_rq_stats *stats) >> { >> struct page *page = buf; >> - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, >> - PAGE_SIZE, true); >> + struct sk_buff *skb = >> + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); >> stats->bytes += len - vi->hdr_len; >> if (unlikely(!skb)) >> @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> unsigned int truesize; >> unsigned int headroom = mergeable_ctx_to_headroom(ctx); >> int err; >> + unsigned int metasize = 0; >> head_skb = NULL; >> stats->bytes += len - vi->hdr_len; >> @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> data = page_address(xdp_page) + offset; >> xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; >> xdp.data = data + vi->hdr_len; >> - xdp_set_data_meta_invalid(&xdp); >> xdp.data_end = xdp.data + (len - vi->hdr_len); >> + xdp.data_meta = xdp.data; >> xdp.rxq = &rq->xdp_rxq; >> act = bpf_prog_run_xdp(xdp_prog, &xdp); >> @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> * adjustments. Note other cases do not build an >> * skb and avoid using offset >> */ >> - offset = xdp.data - >> - page_address(xdp_page) - vi->hdr_len; >> + metasize = xdp.data - xdp.data_meta; >> + offset = xdp.data - page_address(xdp_page) - >> + vi->hdr_len - metasize; >> /* recalculate len if xdp.data or xdp.data_end were >> * adjusted >> @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> if (unlikely(xdp_page != page)) { >> rcu_read_unlock(); >> put_page(page); >> - head_skb = page_to_skb(vi, rq, xdp_page, >> - offset, len, >> - PAGE_SIZE, false); >> + head_skb = page_to_skb(vi, rq, xdp_page, offset, >> + len, PAGE_SIZE, false, >> + metasize); >> return head_skb; >> } >> break; >> case XDP_TX: >> stats->xdp_tx++; >> + xdp.data_meta = xdp.data; >> xdpf = convert_to_xdp_frame(&xdp); >> if (unlikely(!xdpf)) >> goto err_xdp; >> @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >> goto err_skb; >> } >> - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); >> + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, >> + metasize); >> curr_skb = head_skb; >> if (unlikely(!curr_skb))
On 07/02/2019 04:11 PM, Yuya Kusakabe wrote: > On 7/2/19 5:33 PM, Jason Wang wrote: >> On 2019/7/2 下午4:16, Yuya Kusakabe wrote: >>> This adds XDP meta data support to both receive_small() and >>> receive_mergeable(). >>> >>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") >>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> >>> --- >>> v3: >>> - fix preserve the vnet header in receive_small(). >>> v2: >>> - keep copy untouched in page_to_skb(). >>> - preserve the vnet header in receive_small(). >>> - fix indentation. >>> --- >>> drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- >>> 1 file changed, 31 insertions(+), 14 deletions(-) >>> >>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>> index 4f3de0ac8b0b..03a1ae6fe267 100644 >>> --- a/drivers/net/virtio_net.c >>> +++ b/drivers/net/virtio_net.c >>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>> struct receive_queue *rq, >>> struct page *page, unsigned int offset, >>> unsigned int len, unsigned int truesize, >>> - bool hdr_valid) >>> + bool hdr_valid, unsigned int metasize) >>> { >>> struct sk_buff *skb; >>> struct virtio_net_hdr_mrg_rxbuf *hdr; >>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>> else >>> hdr_padded_len = sizeof(struct padded_vnet_hdr); >>> - if (hdr_valid) >>> + if (hdr_valid && !metasize) >>> memcpy(hdr, p, hdr_len); >>> len -= hdr_len; >>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>> copy = skb_tailroom(skb); >>> skb_put_data(skb, p, copy); >>> + if (metasize) { >>> + __skb_pull(skb, metasize); >>> + skb_metadata_set(skb, metasize); >>> + } >>> + >>> len -= copy; >>> offset += copy; >>> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >>> unsigned int delta = 0; >>> struct page *xdp_page; >>> int err; >>> + unsigned int metasize = 0; >>> len -= vi->hdr_len; >>> stats->bytes += len; >>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, >>> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >>> xdp.data = xdp.data_hard_start + xdp_headroom; >>> - xdp_set_data_meta_invalid(&xdp); >>> xdp.data_end = xdp.data + len; >>> + xdp.data_meta = xdp.data; >>> xdp.rxq = &rq->xdp_rxq; >>> orig_data = xdp.data; >>> + /* Copy the vnet header to the front of data_hard_start to avoid >>> + * overwriting by XDP meta data */ >>> + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); I'm not fully sure if I'm following this one correctly, probably just missing something. Isn't the vnet header based on how we set up xdp.data_hard_start earlier already in front of it? Wouldn't we copy invalid data from xdp.data - vi->hdr_len into the vnet header at that point (given there can be up to 256 bytes of headroom between the two)? If it's relative to xdp.data and headroom is >0, then BPF prog could otherwise mangle this; something doesn't add up to me here. Could you clarify? Thx >> What happens if we have a large metadata that occupies all headroom here? >> >> Thanks > > Do you mean a large "XDP" metadata? If a large metadata is a large "XDP" metadata, I think we can not use a metadata that occupies all headroom. The size of metadata limited by bpf_xdp_adjust_meta() as below. > bpf_xdp_adjust_meta() in net/core/filter.c: > if (unlikely((metalen & (sizeof(__u32) - 1)) || > (metalen > 32))) > return -EACCES; > > Thanks. > >> >> >>> act = bpf_prog_run_xdp(xdp_prog, &xdp); >>> stats->xdp_packets++; >>> @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev, >>> /* Recalculate length in case bpf program changed it */ >>> delta = orig_data - xdp.data; >>> len = xdp.data_end - xdp.data; >>> + metasize = xdp.data - xdp.data_meta; >>> break; >>> case XDP_TX: >>> stats->xdp_tx++; >>> + xdp.data_meta = xdp.data; >>> xdpf = convert_to_xdp_frame(&xdp); >>> if (unlikely(!xdpf)) >>> goto err_xdp; >>> @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev, >>> skb_reserve(skb, headroom - delta); >>> skb_put(skb, len); >>> if (!delta) { >>> - buf += header_offset; >>> - memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); >>> + memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len); >>> } /* keep zeroed vnet hdr since packet was changed by bpf */ >>> + if (metasize) >>> + skb_metadata_set(skb, metasize); >>> + >>> err: >>> return skb; >>> @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev, >>> struct virtnet_rq_stats *stats) >>> { >>> struct page *page = buf; >>> - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, >>> - PAGE_SIZE, true); >>> + struct sk_buff *skb = >>> + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); >>> stats->bytes += len - vi->hdr_len; >>> if (unlikely(!skb)) >>> @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>> unsigned int truesize; >>> unsigned int headroom = mergeable_ctx_to_headroom(ctx); >>> int err; >>> + unsigned int metasize = 0; >>> head_skb = NULL; >>> stats->bytes += len - vi->hdr_len; >>> @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>> data = page_address(xdp_page) + offset; >>> xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; >>> xdp.data = data + vi->hdr_len; >>> - xdp_set_data_meta_invalid(&xdp); >>> xdp.data_end = xdp.data + (len - vi->hdr_len); >>> + xdp.data_meta = xdp.data; >>> xdp.rxq = &rq->xdp_rxq; >>> act = bpf_prog_run_xdp(xdp_prog, &xdp); >>> @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>> * adjustments. Note other cases do not build an >>> * skb and avoid using offset >>> */ >>> - offset = xdp.data - >>> - page_address(xdp_page) - vi->hdr_len; >>> + metasize = xdp.data - xdp.data_meta; >>> + offset = xdp.data - page_address(xdp_page) - >>> + vi->hdr_len - metasize; >>> /* recalculate len if xdp.data or xdp.data_end were >>> * adjusted >>> @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>> if (unlikely(xdp_page != page)) { >>> rcu_read_unlock(); >>> put_page(page); >>> - head_skb = page_to_skb(vi, rq, xdp_page, >>> - offset, len, >>> - PAGE_SIZE, false); >>> + head_skb = page_to_skb(vi, rq, xdp_page, offset, >>> + len, PAGE_SIZE, false, >>> + metasize); >>> return head_skb; >>> } >>> break; >>> case XDP_TX: >>> stats->xdp_tx++; >>> + xdp.data_meta = xdp.data; >>> xdpf = convert_to_xdp_frame(&xdp); >>> if (unlikely(!xdpf)) >>> goto err_xdp; >>> @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>> goto err_skb; >>> } >>> - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); >>> + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, >>> + metasize); >>> curr_skb = head_skb; >>> if (unlikely(!curr_skb))
On 2019/7/9 上午6:38, Daniel Borkmann wrote: > On 07/02/2019 04:11 PM, Yuya Kusakabe wrote: >> On 7/2/19 5:33 PM, Jason Wang wrote: >>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote: >>>> This adds XDP meta data support to both receive_small() and >>>> receive_mergeable(). >>>> >>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") >>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> >>>> --- >>>> v3: >>>> - fix preserve the vnet header in receive_small(). >>>> v2: >>>> - keep copy untouched in page_to_skb(). >>>> - preserve the vnet header in receive_small(). >>>> - fix indentation. >>>> --- >>>> drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- >>>> 1 file changed, 31 insertions(+), 14 deletions(-) >>>> >>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>>> index 4f3de0ac8b0b..03a1ae6fe267 100644 >>>> --- a/drivers/net/virtio_net.c >>>> +++ b/drivers/net/virtio_net.c >>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>> struct receive_queue *rq, >>>> struct page *page, unsigned int offset, >>>> unsigned int len, unsigned int truesize, >>>> - bool hdr_valid) >>>> + bool hdr_valid, unsigned int metasize) >>>> { >>>> struct sk_buff *skb; >>>> struct virtio_net_hdr_mrg_rxbuf *hdr; >>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>> else >>>> hdr_padded_len = sizeof(struct padded_vnet_hdr); >>>> - if (hdr_valid) >>>> + if (hdr_valid && !metasize) >>>> memcpy(hdr, p, hdr_len); >>>> len -= hdr_len; >>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>> copy = skb_tailroom(skb); >>>> skb_put_data(skb, p, copy); >>>> + if (metasize) { >>>> + __skb_pull(skb, metasize); >>>> + skb_metadata_set(skb, metasize); >>>> + } >>>> + >>>> len -= copy; >>>> offset += copy; >>>> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>> unsigned int delta = 0; >>>> struct page *xdp_page; >>>> int err; >>>> + unsigned int metasize = 0; >>>> len -= vi->hdr_len; >>>> stats->bytes += len; >>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >>>> xdp.data = xdp.data_hard_start + xdp_headroom; >>>> - xdp_set_data_meta_invalid(&xdp); >>>> xdp.data_end = xdp.data + len; >>>> + xdp.data_meta = xdp.data; >>>> xdp.rxq = &rq->xdp_rxq; >>>> orig_data = xdp.data; >>>> + /* Copy the vnet header to the front of data_hard_start to avoid >>>> + * overwriting by XDP meta data */ >>>> + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); > I'm not fully sure if I'm following this one correctly, probably just missing > something. Isn't the vnet header based on how we set up xdp.data_hard_start > earlier already in front of it? Wouldn't we copy invalid data from xdp.data - > vi->hdr_len into the vnet header at that point (given there can be up to 256 > bytes of headroom between the two)? If it's relative to xdp.data and headroom > is >0, then BPF prog could otherwise mangle this; something doesn't add up to > me here. Could you clarify? Thx Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here. Thanks > >>> What happens if we have a large metadata that occupies all headroom here? >>> >>> Thanks >> Do you mean a large "XDP" metadata? If a large metadata is a large "XDP" metadata, I think we can not use a metadata that occupies all headroom. The size of metadata limited by bpf_xdp_adjust_meta() as below. >> bpf_xdp_adjust_meta() in net/core/filter.c: >> if (unlikely((metalen & (sizeof(__u32) - 1)) || >> (metalen > 32))) >> return -EACCES; >> >> Thanks. >> >>> >>>> act = bpf_prog_run_xdp(xdp_prog, &xdp); >>>> stats->xdp_packets++; >>>> @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>> /* Recalculate length in case bpf program changed it */ >>>> delta = orig_data - xdp.data; >>>> len = xdp.data_end - xdp.data; >>>> + metasize = xdp.data - xdp.data_meta; >>>> break; >>>> case XDP_TX: >>>> stats->xdp_tx++; >>>> + xdp.data_meta = xdp.data; >>>> xdpf = convert_to_xdp_frame(&xdp); >>>> if (unlikely(!xdpf)) >>>> goto err_xdp; >>>> @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>> skb_reserve(skb, headroom - delta); >>>> skb_put(skb, len); >>>> if (!delta) { >>>> - buf += header_offset; >>>> - memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); >>>> + memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len); >>>> } /* keep zeroed vnet hdr since packet was changed by bpf */ >>>> + if (metasize) >>>> + skb_metadata_set(skb, metasize); >>>> + >>>> err: >>>> return skb; >>>> @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev, >>>> struct virtnet_rq_stats *stats) >>>> { >>>> struct page *page = buf; >>>> - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, >>>> - PAGE_SIZE, true); >>>> + struct sk_buff *skb = >>>> + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); >>>> stats->bytes += len - vi->hdr_len; >>>> if (unlikely(!skb)) >>>> @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>>> unsigned int truesize; >>>> unsigned int headroom = mergeable_ctx_to_headroom(ctx); >>>> int err; >>>> + unsigned int metasize = 0; >>>> head_skb = NULL; >>>> stats->bytes += len - vi->hdr_len; >>>> @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>>> data = page_address(xdp_page) + offset; >>>> xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; >>>> xdp.data = data + vi->hdr_len; >>>> - xdp_set_data_meta_invalid(&xdp); >>>> xdp.data_end = xdp.data + (len - vi->hdr_len); >>>> + xdp.data_meta = xdp.data; >>>> xdp.rxq = &rq->xdp_rxq; >>>> act = bpf_prog_run_xdp(xdp_prog, &xdp); >>>> @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>>> * adjustments. Note other cases do not build an >>>> * skb and avoid using offset >>>> */ >>>> - offset = xdp.data - >>>> - page_address(xdp_page) - vi->hdr_len; >>>> + metasize = xdp.data - xdp.data_meta; >>>> + offset = xdp.data - page_address(xdp_page) - >>>> + vi->hdr_len - metasize; >>>> /* recalculate len if xdp.data or xdp.data_end were >>>> * adjusted >>>> @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>>> if (unlikely(xdp_page != page)) { >>>> rcu_read_unlock(); >>>> put_page(page); >>>> - head_skb = page_to_skb(vi, rq, xdp_page, >>>> - offset, len, >>>> - PAGE_SIZE, false); >>>> + head_skb = page_to_skb(vi, rq, xdp_page, offset, >>>> + len, PAGE_SIZE, false, >>>> + metasize); >>>> return head_skb; >>>> } >>>> break; >>>> case XDP_TX: >>>> stats->xdp_tx++; >>>> + xdp.data_meta = xdp.data; >>>> xdpf = convert_to_xdp_frame(&xdp); >>>> if (unlikely(!xdpf)) >>>> goto err_xdp; >>>> @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, >>>> goto err_skb; >>>> } >>>> - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); >>>> + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, >>>> + metasize); >>>> curr_skb = head_skb; >>>> if (unlikely(!curr_skb))
On 07/09/2019 05:04 AM, Jason Wang wrote: > On 2019/7/9 上午6:38, Daniel Borkmann wrote: >> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote: >>> On 7/2/19 5:33 PM, Jason Wang wrote: >>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote: >>>>> This adds XDP meta data support to both receive_small() and >>>>> receive_mergeable(). >>>>> >>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") >>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> >>>>> --- >>>>> v3: >>>>> - fix preserve the vnet header in receive_small(). >>>>> v2: >>>>> - keep copy untouched in page_to_skb(). >>>>> - preserve the vnet header in receive_small(). >>>>> - fix indentation. >>>>> --- >>>>> drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- >>>>> 1 file changed, 31 insertions(+), 14 deletions(-) >>>>> >>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644 >>>>> --- a/drivers/net/virtio_net.c >>>>> +++ b/drivers/net/virtio_net.c >>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>> struct receive_queue *rq, >>>>> struct page *page, unsigned int offset, >>>>> unsigned int len, unsigned int truesize, >>>>> - bool hdr_valid) >>>>> + bool hdr_valid, unsigned int metasize) >>>>> { >>>>> struct sk_buff *skb; >>>>> struct virtio_net_hdr_mrg_rxbuf *hdr; >>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>> else >>>>> hdr_padded_len = sizeof(struct padded_vnet_hdr); >>>>> - if (hdr_valid) >>>>> + if (hdr_valid && !metasize) >>>>> memcpy(hdr, p, hdr_len); >>>>> len -= hdr_len; >>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>> copy = skb_tailroom(skb); >>>>> skb_put_data(skb, p, copy); >>>>> + if (metasize) { >>>>> + __skb_pull(skb, metasize); >>>>> + skb_metadata_set(skb, metasize); >>>>> + } >>>>> + >>>>> len -= copy; >>>>> offset += copy; >>>>> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>>> unsigned int delta = 0; >>>>> struct page *xdp_page; >>>>> int err; >>>>> + unsigned int metasize = 0; >>>>> len -= vi->hdr_len; >>>>> stats->bytes += len; >>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>>> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >>>>> xdp.data = xdp.data_hard_start + xdp_headroom; >>>>> - xdp_set_data_meta_invalid(&xdp); >>>>> xdp.data_end = xdp.data + len; >>>>> + xdp.data_meta = xdp.data; >>>>> xdp.rxq = &rq->xdp_rxq; >>>>> orig_data = xdp.data; >>>>> + /* Copy the vnet header to the front of data_hard_start to avoid >>>>> + * overwriting by XDP meta data */ >>>>> + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); >> I'm not fully sure if I'm following this one correctly, probably just missing >> something. Isn't the vnet header based on how we set up xdp.data_hard_start >> earlier already in front of it? Wouldn't we copy invalid data from xdp.data - >> vi->hdr_len into the vnet header at that point (given there can be up to 256 >> bytes of headroom between the two)? If it's relative to xdp.data and headroom >> is >0, then BPF prog could otherwise mangle this; something doesn't add up to >> me here. Could you clarify? Thx > > Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here. For the current code, you can adjust the xdp.data with a positive/negative offset already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of xdp.data, couldn't this be overridden today as well then? Anyway, just wondering how this is handled differently? Thanks, Daniel
On 2019/7/10 上午4:03, Daniel Borkmann wrote: > On 07/09/2019 05:04 AM, Jason Wang wrote: >> On 2019/7/9 上午6:38, Daniel Borkmann wrote: >>> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote: >>>> On 7/2/19 5:33 PM, Jason Wang wrote: >>>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote: >>>>>> This adds XDP meta data support to both receive_small() and >>>>>> receive_mergeable(). >>>>>> >>>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") >>>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> >>>>>> --- >>>>>> v3: >>>>>> - fix preserve the vnet header in receive_small(). >>>>>> v2: >>>>>> - keep copy untouched in page_to_skb(). >>>>>> - preserve the vnet header in receive_small(). >>>>>> - fix indentation. >>>>>> --- >>>>>> drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- >>>>>> 1 file changed, 31 insertions(+), 14 deletions(-) >>>>>> >>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644 >>>>>> --- a/drivers/net/virtio_net.c >>>>>> +++ b/drivers/net/virtio_net.c >>>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>>> struct receive_queue *rq, >>>>>> struct page *page, unsigned int offset, >>>>>> unsigned int len, unsigned int truesize, >>>>>> - bool hdr_valid) >>>>>> + bool hdr_valid, unsigned int metasize) >>>>>> { >>>>>> struct sk_buff *skb; >>>>>> struct virtio_net_hdr_mrg_rxbuf *hdr; >>>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>>> else >>>>>> hdr_padded_len = sizeof(struct padded_vnet_hdr); >>>>>> - if (hdr_valid) >>>>>> + if (hdr_valid && !metasize) >>>>>> memcpy(hdr, p, hdr_len); >>>>>> len -= hdr_len; >>>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>>> copy = skb_tailroom(skb); >>>>>> skb_put_data(skb, p, copy); >>>>>> + if (metasize) { >>>>>> + __skb_pull(skb, metasize); >>>>>> + skb_metadata_set(skb, metasize); >>>>>> + } >>>>>> + >>>>>> len -= copy; >>>>>> offset += copy; >>>>>> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>>>> unsigned int delta = 0; >>>>>> struct page *xdp_page; >>>>>> int err; >>>>>> + unsigned int metasize = 0; >>>>>> len -= vi->hdr_len; >>>>>> stats->bytes += len; >>>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>>>> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >>>>>> xdp.data = xdp.data_hard_start + xdp_headroom; >>>>>> - xdp_set_data_meta_invalid(&xdp); >>>>>> xdp.data_end = xdp.data + len; >>>>>> + xdp.data_meta = xdp.data; >>>>>> xdp.rxq = &rq->xdp_rxq; >>>>>> orig_data = xdp.data; >>>>>> + /* Copy the vnet header to the front of data_hard_start to avoid >>>>>> + * overwriting by XDP meta data */ >>>>>> + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); >>> I'm not fully sure if I'm following this one correctly, probably just missing >>> something. Isn't the vnet header based on how we set up xdp.data_hard_start >>> earlier already in front of it? Wouldn't we copy invalid data from xdp.data - >>> vi->hdr_len into the vnet header at that point (given there can be up to 256 >>> bytes of headroom between the two)? If it's relative to xdp.data and headroom >>> is >0, then BPF prog could otherwise mangle this; something doesn't add up to >>> me here. Could you clarify? Thx >> Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here. > For the current code, you can adjust the xdp.data with a positive/negative offset > already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of > xdp.data, couldn't this be overridden today as well then? Anyway, just wondering > how this is handled differently? We will invalidate the vnet header in this case. But for the case of metadata adjustment without header adjustment, we want to seek a way to preserve that. Thanks > > Thanks, > Daniel
I'm sorry for the late reply. I saw the status of this patch is "Awaiting Upstream" on https://patchwork.ozlabs.org/patch/1126046/. What is "Awaiting Upstream"? Is there anything that I can do? Thank you, Yuya On Wed, Jul 10, 2019 at 11:30 AM Jason Wang <jasowang@redhat.com> wrote: > > > On 2019/7/10 上午4:03, Daniel Borkmann wrote: > > On 07/09/2019 05:04 AM, Jason Wang wrote: > >> On 2019/7/9 上午6:38, Daniel Borkmann wrote: > >>> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote: > >>>> On 7/2/19 5:33 PM, Jason Wang wrote: > >>>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote: > >>>>>> This adds XDP meta data support to both receive_small() and > >>>>>> receive_mergeable(). > >>>>>> > >>>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") > >>>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> > >>>>>> --- > >>>>>> v3: > >>>>>> - fix preserve the vnet header in receive_small(). > >>>>>> v2: > >>>>>> - keep copy untouched in page_to_skb(). > >>>>>> - preserve the vnet header in receive_small(). > >>>>>> - fix indentation. > >>>>>> --- > >>>>>> drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- > >>>>>> 1 file changed, 31 insertions(+), 14 deletions(-) > >>>>>> > >>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > >>>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644 > >>>>>> --- a/drivers/net/virtio_net.c > >>>>>> +++ b/drivers/net/virtio_net.c > >>>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > >>>>>> struct receive_queue *rq, > >>>>>> struct page *page, unsigned int offset, > >>>>>> unsigned int len, unsigned int truesize, > >>>>>> - bool hdr_valid) > >>>>>> + bool hdr_valid, unsigned int metasize) > >>>>>> { > >>>>>> struct sk_buff *skb; > >>>>>> struct virtio_net_hdr_mrg_rxbuf *hdr; > >>>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > >>>>>> else > >>>>>> hdr_padded_len = sizeof(struct padded_vnet_hdr); > >>>>>> - if (hdr_valid) > >>>>>> + if (hdr_valid && !metasize) > >>>>>> memcpy(hdr, p, hdr_len); > >>>>>> len -= hdr_len; > >>>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, > >>>>>> copy = skb_tailroom(skb); > >>>>>> skb_put_data(skb, p, copy); > >>>>>> + if (metasize) { > >>>>>> + __skb_pull(skb, metasize); > >>>>>> + skb_metadata_set(skb, metasize); > >>>>>> + } > >>>>>> + > >>>>>> len -= copy; > >>>>>> offset += copy; > >>>>>> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, > >>>>>> unsigned int delta = 0; > >>>>>> struct page *xdp_page; > >>>>>> int err; > >>>>>> + unsigned int metasize = 0; > >>>>>> len -= vi->hdr_len; > >>>>>> stats->bytes += len; > >>>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, > >>>>>> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; > >>>>>> xdp.data = xdp.data_hard_start + xdp_headroom; > >>>>>> - xdp_set_data_meta_invalid(&xdp); > >>>>>> xdp.data_end = xdp.data + len; > >>>>>> + xdp.data_meta = xdp.data; > >>>>>> xdp.rxq = &rq->xdp_rxq; > >>>>>> orig_data = xdp.data; > >>>>>> + /* Copy the vnet header to the front of data_hard_start to avoid > >>>>>> + * overwriting by XDP meta data */ > >>>>>> + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); > >>> I'm not fully sure if I'm following this one correctly, probably just missing > >>> something. Isn't the vnet header based on how we set up xdp.data_hard_start > >>> earlier already in front of it? Wouldn't we copy invalid data from xdp.data - > >>> vi->hdr_len into the vnet header at that point (given there can be up to 256 > >>> bytes of headroom between the two)? If it's relative to xdp.data and headroom > >>> is >0, then BPF prog could otherwise mangle this; something doesn't add up to > >>> me here. Could you clarify? Thx > >> Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here. > > For the current code, you can adjust the xdp.data with a positive/negative offset > > already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of > > xdp.data, couldn't this be overridden today as well then? Anyway, just wondering > > how this is handled differently? > > > We will invalidate the vnet header in this case. But for the case of > metadata adjustment without header adjustment, we want to seek a way to > preserve that. > > Thanks > > > > > > Thanks, > > Daniel
On 2020/2/3 下午9:52, Yuya Kusakabe wrote: > I'm sorry for the late reply. > > I saw the status of this patch is "Awaiting Upstream" on > https://patchwork.ozlabs.org/patch/1126046/. What is "Awaiting > Upstream"? Is there anything that I can do? You can post a new version I think. Thanks > > Thank you, > Yuya > > On Wed, Jul 10, 2019 at 11:30 AM Jason Wang <jasowang@redhat.com> wrote: >> >> On 2019/7/10 上午4:03, Daniel Borkmann wrote: >>> On 07/09/2019 05:04 AM, Jason Wang wrote: >>>> On 2019/7/9 上午6:38, Daniel Borkmann wrote: >>>>> On 07/02/2019 04:11 PM, Yuya Kusakabe wrote: >>>>>> On 7/2/19 5:33 PM, Jason Wang wrote: >>>>>>> On 2019/7/2 下午4:16, Yuya Kusakabe wrote: >>>>>>>> This adds XDP meta data support to both receive_small() and >>>>>>>> receive_mergeable(). >>>>>>>> >>>>>>>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") >>>>>>>> Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> >>>>>>>> --- >>>>>>>> v3: >>>>>>>> - fix preserve the vnet header in receive_small(). >>>>>>>> v2: >>>>>>>> - keep copy untouched in page_to_skb(). >>>>>>>> - preserve the vnet header in receive_small(). >>>>>>>> - fix indentation. >>>>>>>> --- >>>>>>>> drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- >>>>>>>> 1 file changed, 31 insertions(+), 14 deletions(-) >>>>>>>> >>>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >>>>>>>> index 4f3de0ac8b0b..03a1ae6fe267 100644 >>>>>>>> --- a/drivers/net/virtio_net.c >>>>>>>> +++ b/drivers/net/virtio_net.c >>>>>>>> @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>>>>> struct receive_queue *rq, >>>>>>>> struct page *page, unsigned int offset, >>>>>>>> unsigned int len, unsigned int truesize, >>>>>>>> - bool hdr_valid) >>>>>>>> + bool hdr_valid, unsigned int metasize) >>>>>>>> { >>>>>>>> struct sk_buff *skb; >>>>>>>> struct virtio_net_hdr_mrg_rxbuf *hdr; >>>>>>>> @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>>>>> else >>>>>>>> hdr_padded_len = sizeof(struct padded_vnet_hdr); >>>>>>>> - if (hdr_valid) >>>>>>>> + if (hdr_valid && !metasize) >>>>>>>> memcpy(hdr, p, hdr_len); >>>>>>>> len -= hdr_len; >>>>>>>> @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, >>>>>>>> copy = skb_tailroom(skb); >>>>>>>> skb_put_data(skb, p, copy); >>>>>>>> + if (metasize) { >>>>>>>> + __skb_pull(skb, metasize); >>>>>>>> + skb_metadata_set(skb, metasize); >>>>>>>> + } >>>>>>>> + >>>>>>>> len -= copy; >>>>>>>> offset += copy; >>>>>>>> @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>>>>>> unsigned int delta = 0; >>>>>>>> struct page *xdp_page; >>>>>>>> int err; >>>>>>>> + unsigned int metasize = 0; >>>>>>>> len -= vi->hdr_len; >>>>>>>> stats->bytes += len; >>>>>>>> @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, >>>>>>>> xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; >>>>>>>> xdp.data = xdp.data_hard_start + xdp_headroom; >>>>>>>> - xdp_set_data_meta_invalid(&xdp); >>>>>>>> xdp.data_end = xdp.data + len; >>>>>>>> + xdp.data_meta = xdp.data; >>>>>>>> xdp.rxq = &rq->xdp_rxq; >>>>>>>> orig_data = xdp.data; >>>>>>>> + /* Copy the vnet header to the front of data_hard_start to avoid >>>>>>>> + * overwriting by XDP meta data */ >>>>>>>> + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); >>>>> I'm not fully sure if I'm following this one correctly, probably just missing >>>>> something. Isn't the vnet header based on how we set up xdp.data_hard_start >>>>> earlier already in front of it? Wouldn't we copy invalid data from xdp.data - >>>>> vi->hdr_len into the vnet header at that point (given there can be up to 256 >>>>> bytes of headroom between the two)? If it's relative to xdp.data and headroom >>>>> is >0, then BPF prog could otherwise mangle this; something doesn't add up to >>>>> me here. Could you clarify? Thx >>>> Vnet headr sits just in front of xdp.data not xdp.data_hard_start. So it could be overwrote by metadata, that's why we need a copy here. >>> For the current code, you can adjust the xdp.data with a positive/negative offset >>> already via bpf_xdp_adjust_head() helper. If vnet headr sits just in front of >>> xdp.data, couldn't this be overridden today as well then? Anyway, just wondering >>> how this is handled differently? >> >> We will invalidate the vnet header in this case. But for the case of >> metadata adjustment without header adjustment, we want to seek a way to >> preserve that. >> >> Thanks >> >> >>> Thanks, >>> Daniel > >
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 4f3de0ac8b0b..03a1ae6fe267 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, - bool hdr_valid) + bool hdr_valid, unsigned int metasize) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -393,7 +393,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - if (hdr_valid) + if (hdr_valid && !metasize) memcpy(hdr, p, hdr_len); len -= hdr_len; @@ -405,6 +405,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, copy = skb_tailroom(skb); skb_put_data(skb, p, copy); + if (metasize) { + __skb_pull(skb, metasize); + skb_metadata_set(skb, metasize); + } + len -= copy; offset += copy; @@ -644,6 +649,7 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int delta = 0; struct page *xdp_page; int err; + unsigned int metasize = 0; len -= vi->hdr_len; stats->bytes += len; @@ -683,10 +689,13 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; xdp.data = xdp.data_hard_start + xdp_headroom; - xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; + xdp.data_meta = xdp.data; xdp.rxq = &rq->xdp_rxq; orig_data = xdp.data; + /* Copy the vnet header to the front of data_hard_start to avoid + * overwriting by XDP meta data */ + memcpy(xdp.data_hard_start - vi->hdr_len, xdp.data - vi->hdr_len, vi->hdr_len); act = bpf_prog_run_xdp(xdp_prog, &xdp); stats->xdp_packets++; @@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device *dev, /* Recalculate length in case bpf program changed it */ delta = orig_data - xdp.data; len = xdp.data_end - xdp.data; + metasize = xdp.data - xdp.data_meta; break; case XDP_TX: stats->xdp_tx++; + xdp.data_meta = xdp.data; xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; @@ -736,10 +747,12 @@ static struct sk_buff *receive_small(struct net_device *dev, skb_reserve(skb, headroom - delta); skb_put(skb, len); if (!delta) { - buf += header_offset; - memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); + memcpy(skb_vnet_hdr(skb), buf + VIRTNET_RX_PAD, vi->hdr_len); } /* keep zeroed vnet hdr since packet was changed by bpf */ + if (metasize) + skb_metadata_set(skb, metasize); + err: return skb; @@ -760,8 +773,8 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page = buf; - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, - PAGE_SIZE, true); + struct sk_buff *skb = + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -793,6 +806,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); int err; + unsigned int metasize = 0; head_skb = NULL; stats->bytes += len - vi->hdr_len; @@ -839,8 +853,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, data = page_address(xdp_page) + offset; xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; xdp.data = data + vi->hdr_len; - xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + (len - vi->hdr_len); + xdp.data_meta = xdp.data; xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -852,8 +866,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, * adjustments. Note other cases do not build an * skb and avoid using offset */ - offset = xdp.data - - page_address(xdp_page) - vi->hdr_len; + metasize = xdp.data - xdp.data_meta; + offset = xdp.data - page_address(xdp_page) - + vi->hdr_len - metasize; /* recalculate len if xdp.data or xdp.data_end were * adjusted @@ -863,14 +878,15 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(xdp_page != page)) { rcu_read_unlock(); put_page(page); - head_skb = page_to_skb(vi, rq, xdp_page, - offset, len, - PAGE_SIZE, false); + head_skb = page_to_skb(vi, rq, xdp_page, offset, + len, PAGE_SIZE, false, + metasize); return head_skb; } break; case XDP_TX: stats->xdp_tx++; + xdp.data_meta = xdp.data; xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; @@ -921,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_skb; } - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, + metasize); curr_skb = head_skb; if (unlikely(!curr_skb))
This adds XDP meta data support to both receive_small() and receive_mergeable(). Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access") Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com> --- v3: - fix preserve the vnet header in receive_small(). v2: - keep copy untouched in page_to_skb(). - preserve the vnet header in receive_small(). - fix indentation. --- drivers/net/virtio_net.c | 45 +++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 14 deletions(-)