Message ID | 20220318153339.31083-1-david.marchand@redhat.com |
---|---|
State | Changes Requested |
Headers | show |
Series | [ovs-dev] dp-packet: Allow DPDK packet resize. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | success | test: success |
Hi, do you have future plan to support mbuf chaining for dp-packet? as I see you've actually relax some check on dp-packet in this patch. thanks David Marchand <david.marchand@redhat.com> 于2022年3月18日周五 23:34写道: > DPDK based dp-packets points to data buffers that can't be expanded > dynamically. > Their layout is as follows: > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), > - a maximum size chosen at mempool creation, > > In some usecases though (like encapsulating with multiple tunnels), > a 128 bytes headroom is too short. > > Dynamically allocate buffers in DPDK memory and make use of DPDK > external buffers API (previously used for userspace TSO). > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- > lib/dp-packet.c | 17 ++++++++++++++++- > lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------ > lib/netdev-dpdk.h | 3 +++ > 3 files changed, 54 insertions(+), 13 deletions(-) > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index 35c72542a2..07fa67b1a1 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t > new_headroom, size_t new_tailroom) > new_allocated = new_headroom + dp_packet_size(b) + new_tailroom; > > switch (b->source) { > - case DPBUF_DPDK: > + case DPBUF_DPDK: { > +#ifdef DPDK_NETDEV > + uint32_t buf_len; > + > + buf_len = new_allocated; > + new_base = netdev_dpdk_extbuf_allocate(&buf_len); > + if (!new_base) { > + out_of_memory(); > + } > + ovs_assert(buf_len <= UINT16_MAX); > + dp_packet_copy__(b, new_base, new_headroom, new_tailroom); > + netdev_dpdk_extbuf_replace(b, new_base, buf_len); > + break; > +#else > OVS_NOT_REACHED(); > +#endif > + } > > case DPBUF_MALLOC: > if (new_headroom == dp_packet_headroom(b)) { > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index fbc3b42d84..47e16f22c5 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2646,41 +2646,64 @@ out: > } > } > > +void * > +netdev_dpdk_extbuf_allocate(uint32_t *data_len) > +{ > + *data_len += sizeof(struct rte_mbuf_ext_shared_info) + > sizeof(uintptr_t); > + *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t)); > + return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE); > +} > + > static void > netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque) > { > rte_free(opaque); > } > > +void > +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t > data_len) > +{ > + struct rte_mbuf *pkt = (struct rte_mbuf *) b; > + struct rte_mbuf_ext_shared_info *shinfo; > + uint16_t buf_len = data_len; > + > + shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, > + netdev_dpdk_extbuf_free, > + buf); > + ovs_assert(shinfo != NULL); > + > + if (RTE_MBUF_HAS_EXTBUF(pkt)) { > + rte_pktmbuf_detach_extbuf(pkt); > + } > + rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), > buf_len, > + shinfo); > +} > + > static struct rte_mbuf * > dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len) > { > uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len; > - struct rte_mbuf_ext_shared_info *shinfo = NULL; > + struct rte_mbuf_ext_shared_info *shinfo; > uint16_t buf_len; > void *buf; > > - total_len += sizeof *shinfo + sizeof(uintptr_t); > - total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); > - > + buf = netdev_dpdk_extbuf_allocate(&total_len); > + if (OVS_UNLIKELY(buf == NULL)) { > + VLOG_ERR("Failed to allocate memory using rte_malloc: %u", > total_len); > + return NULL; > + } > if (OVS_UNLIKELY(total_len > UINT16_MAX)) { > + netdev_dpdk_extbuf_free(NULL, buf); > VLOG_ERR("Can't copy packet: too big %u", total_len); > return NULL; > } > > buf_len = total_len; > - buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); > - if (OVS_UNLIKELY(buf == NULL)) { > - VLOG_ERR("Failed to allocate memory using rte_malloc: %u", > buf_len); > - return NULL; > - } > - > - /* Initialize shinfo. */ > shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, > netdev_dpdk_extbuf_free, > buf); > if (OVS_UNLIKELY(shinfo == NULL)) { > - rte_free(buf); > + netdev_dpdk_extbuf_free(NULL, buf); > VLOG_ERR("Failed to initialize shared info for mbuf while " > "attempting to attach an external buffer."); > return NULL; > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h > index 699be3fb41..95594f07fb 100644 > --- a/lib/netdev-dpdk.h > +++ b/lib/netdev-dpdk.h > @@ -31,6 +31,9 @@ struct netdev; > void netdev_dpdk_register(void); > void free_dpdk_buf(struct dp_packet *); > > +void *netdev_dpdk_extbuf_allocate(uint32_t *); > +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t); > + > bool netdev_dpdk_flow_api_supported(struct netdev *); > > int > -- > 2.23.0 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >
On Fri, Mar 18, 2022 at 11:33 PM David Marchand <david.marchand@redhat.com> wrote: > > DPDK based dp-packets points to data buffers that can't be expanded > dynamically. > Their layout is as follows: > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), > - a maximum size chosen at mempool creation, > > In some usecases though (like encapsulating with multiple tunnels), > a 128 bytes headroom is too short. > > Dynamically allocate buffers in DPDK memory and make use of DPDK > external buffers API (previously used for userspace TSO). > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- > lib/dp-packet.c | 17 ++++++++++++++++- > lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------ > lib/netdev-dpdk.h | 3 +++ > 3 files changed, 54 insertions(+), 13 deletions(-) > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > index 35c72542a2..07fa67b1a1 100644 > --- a/lib/dp-packet.c > +++ b/lib/dp-packet.c > @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) > new_allocated = new_headroom + dp_packet_size(b) + new_tailroom; > > switch (b->source) { > - case DPBUF_DPDK: > + case DPBUF_DPDK: { > +#ifdef DPDK_NETDEV > + uint32_t buf_len; > + > + buf_len = new_allocated; > + new_base = netdev_dpdk_extbuf_allocate(&buf_len); > + if (!new_base) { > + out_of_memory(); > + } > + ovs_assert(buf_len <= UINT16_MAX); > + dp_packet_copy__(b, new_base, new_headroom, new_tailroom); > + netdev_dpdk_extbuf_replace(b, new_base, buf_len); It seems that this is the first example in dp-packet.c to use the APIs in netdev-dpdk.c. Why not move them to dp-packet.c directly? > + break; > +#else > OVS_NOT_REACHED(); > +#endif > + } > > case DPBUF_MALLOC: > if (new_headroom == dp_packet_headroom(b)) { > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index fbc3b42d84..47e16f22c5 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -2646,41 +2646,64 @@ out: > } > } > > +void * > +netdev_dpdk_extbuf_allocate(uint32_t *data_len) > +{ > + *data_len += sizeof(struct rte_mbuf_ext_shared_info) + sizeof(uintptr_t); > + *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t)); > + return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE); > +} > + > static void > netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque) > { > rte_free(opaque); > } > > +void > +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t data_len) > +{ > + struct rte_mbuf *pkt = (struct rte_mbuf *) b; > + struct rte_mbuf_ext_shared_info *shinfo; > + uint16_t buf_len = data_len; > + > + shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, > + netdev_dpdk_extbuf_free, > + buf); > + ovs_assert(shinfo != NULL); > + > + if (RTE_MBUF_HAS_EXTBUF(pkt)) { > + rte_pktmbuf_detach_extbuf(pkt); > + } > + rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), buf_len, > + shinfo); > +} > + > static struct rte_mbuf * > dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len) > { > uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len; > - struct rte_mbuf_ext_shared_info *shinfo = NULL; > + struct rte_mbuf_ext_shared_info *shinfo; > uint16_t buf_len; > void *buf; > > - total_len += sizeof *shinfo + sizeof(uintptr_t); > - total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); > - > + buf = netdev_dpdk_extbuf_allocate(&total_len); > + if (OVS_UNLIKELY(buf == NULL)) { > + VLOG_ERR("Failed to allocate memory using rte_malloc: %u", total_len); > + return NULL; > + } > if (OVS_UNLIKELY(total_len > UINT16_MAX)) { > + netdev_dpdk_extbuf_free(NULL, buf); > VLOG_ERR("Can't copy packet: too big %u", total_len); > return NULL; > } > > buf_len = total_len; > - buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); > - if (OVS_UNLIKELY(buf == NULL)) { > - VLOG_ERR("Failed to allocate memory using rte_malloc: %u", buf_len); > - return NULL; > - } > - > - /* Initialize shinfo. */ > shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, > netdev_dpdk_extbuf_free, > buf); > if (OVS_UNLIKELY(shinfo == NULL)) { > - rte_free(buf); > + netdev_dpdk_extbuf_free(NULL, buf); > VLOG_ERR("Failed to initialize shared info for mbuf while " > "attempting to attach an external buffer."); > return NULL; > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h > index 699be3fb41..95594f07fb 100644 > --- a/lib/netdev-dpdk.h > +++ b/lib/netdev-dpdk.h > @@ -31,6 +31,9 @@ struct netdev; > void netdev_dpdk_register(void); > void free_dpdk_buf(struct dp_packet *); > > +void *netdev_dpdk_extbuf_allocate(uint32_t *); > +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t); > + > bool netdev_dpdk_flow_api_supported(struct netdev *); > > int > -- > 2.23.0 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Hi, Peng, Patch[1] is an example in OVS-DPDK to support muti-segments mbufs. It was not applied to OVS upstream finally. Maybe muti-segments mbuf feature is not accepted by the community although we need this feature when we want to use some virtual PMD drivers such as vhost-user, virtio-user to support TSO or GRO. [1]: https://patchwork.ozlabs.org/project/openvswitch/cover/1539188552-129083-1-git-send-email-tiago.lam@intel.com/ On Mon, Mar 21, 2022 at 1:54 PM Peng He <xnhp0320@gmail.com> wrote: > > Hi, > do you have future plan to support mbuf chaining for dp-packet? > as I see you've actually relax some check on dp-packet in this > patch. > > thanks > > David Marchand <david.marchand@redhat.com> 于2022年3月18日周五 23:34写道: > > > DPDK based dp-packets points to data buffers that can't be expanded > > dynamically. > > Their layout is as follows: > > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), > > - a maximum size chosen at mempool creation, > > > > In some usecases though (like encapsulating with multiple tunnels), > > a 128 bytes headroom is too short. > > > > Dynamically allocate buffers in DPDK memory and make use of DPDK > > external buffers API (previously used for userspace TSO). > > > > Signed-off-by: David Marchand <david.marchand@redhat.com> > > --- > > lib/dp-packet.c | 17 ++++++++++++++++- > > lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------ > > lib/netdev-dpdk.h | 3 +++ > > 3 files changed, 54 insertions(+), 13 deletions(-) > > > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > > index 35c72542a2..07fa67b1a1 100644 > > --- a/lib/dp-packet.c > > +++ b/lib/dp-packet.c > > @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t > > new_headroom, size_t new_tailroom) > > new_allocated = new_headroom + dp_packet_size(b) + new_tailroom; > > > > switch (b->source) { > > - case DPBUF_DPDK: > > + case DPBUF_DPDK: { > > +#ifdef DPDK_NETDEV > > + uint32_t buf_len; > > + > > + buf_len = new_allocated; > > + new_base = netdev_dpdk_extbuf_allocate(&buf_len); > > + if (!new_base) { > > + out_of_memory(); > > + } > > + ovs_assert(buf_len <= UINT16_MAX); > > + dp_packet_copy__(b, new_base, new_headroom, new_tailroom); > > + netdev_dpdk_extbuf_replace(b, new_base, buf_len); > > + break; > > +#else > > OVS_NOT_REACHED(); > > +#endif > > + } > > > > case DPBUF_MALLOC: > > if (new_headroom == dp_packet_headroom(b)) { > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index fbc3b42d84..47e16f22c5 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -2646,41 +2646,64 @@ out: > > } > > } > > > > +void * > > +netdev_dpdk_extbuf_allocate(uint32_t *data_len) > > +{ > > + *data_len += sizeof(struct rte_mbuf_ext_shared_info) + > > sizeof(uintptr_t); > > + *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t)); > > + return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE); > > +} > > + > > static void > > netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque) > > { > > rte_free(opaque); > > } > > > > +void > > +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t > > data_len) > > +{ > > + struct rte_mbuf *pkt = (struct rte_mbuf *) b; > > + struct rte_mbuf_ext_shared_info *shinfo; > > + uint16_t buf_len = data_len; > > + > > + shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, > > + netdev_dpdk_extbuf_free, > > + buf); > > + ovs_assert(shinfo != NULL); > > + > > + if (RTE_MBUF_HAS_EXTBUF(pkt)) { > > + rte_pktmbuf_detach_extbuf(pkt); > > + } > > + rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), > > buf_len, > > + shinfo); > > +} > > + > > static struct rte_mbuf * > > dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len) > > { > > uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len; > > - struct rte_mbuf_ext_shared_info *shinfo = NULL; > > + struct rte_mbuf_ext_shared_info *shinfo; > > uint16_t buf_len; > > void *buf; > > > > - total_len += sizeof *shinfo + sizeof(uintptr_t); > > - total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); > > - > > + buf = netdev_dpdk_extbuf_allocate(&total_len); > > + if (OVS_UNLIKELY(buf == NULL)) { > > + VLOG_ERR("Failed to allocate memory using rte_malloc: %u", > > total_len); > > + return NULL; > > + } > > if (OVS_UNLIKELY(total_len > UINT16_MAX)) { > > + netdev_dpdk_extbuf_free(NULL, buf); > > VLOG_ERR("Can't copy packet: too big %u", total_len); > > return NULL; > > } > > > > buf_len = total_len; > > - buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); > > - if (OVS_UNLIKELY(buf == NULL)) { > > - VLOG_ERR("Failed to allocate memory using rte_malloc: %u", > > buf_len); > > - return NULL; > > - } > > - > > - /* Initialize shinfo. */ > > shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, > > netdev_dpdk_extbuf_free, > > buf); > > if (OVS_UNLIKELY(shinfo == NULL)) { > > - rte_free(buf); > > + netdev_dpdk_extbuf_free(NULL, buf); > > VLOG_ERR("Failed to initialize shared info for mbuf while " > > "attempting to attach an external buffer."); > > return NULL; > > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h > > index 699be3fb41..95594f07fb 100644 > > --- a/lib/netdev-dpdk.h > > +++ b/lib/netdev-dpdk.h > > @@ -31,6 +31,9 @@ struct netdev; > > void netdev_dpdk_register(void); > > void free_dpdk_buf(struct dp_packet *); > > > > +void *netdev_dpdk_extbuf_allocate(uint32_t *); > > +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t); > > + > > bool netdev_dpdk_flow_api_supported(struct netdev *); > > > > int > > -- > > 2.23.0 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > > > -- > hepeng > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Mon, Mar 21, 2022 at 6:54 AM Peng He <xnhp0320@gmail.com> wrote: > > Hi, > do you have future plan to support mbuf chaining for dp-packet? > as I see you've actually relax some check on dp-packet in this > patch. To me, mbuf chaining means chaining packets, like having a list of packets. So I guess you mean "multi segments" mbuf. This patch only allows resizing mono segment mbuf. I have no plan to revive the multi segments work. Do you see an issue with proposed code change?
On Tue, Mar 22, 2022 at 3:41 AM Harold Huang <baymaxhuang@gmail.com> wrote: > > On Fri, Mar 18, 2022 at 11:33 PM David Marchand > <david.marchand@redhat.com> wrote: > > > > DPDK based dp-packets points to data buffers that can't be expanded > > dynamically. > > Their layout is as follows: > > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), > > - a maximum size chosen at mempool creation, > > > > In some usecases though (like encapsulating with multiple tunnels), > > a 128 bytes headroom is too short. > > > > Dynamically allocate buffers in DPDK memory and make use of DPDK > > external buffers API (previously used for userspace TSO). > > > > Signed-off-by: David Marchand <david.marchand@redhat.com> > > --- > > lib/dp-packet.c | 17 ++++++++++++++++- > > lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------ > > lib/netdev-dpdk.h | 3 +++ > > 3 files changed, 54 insertions(+), 13 deletions(-) > > > > diff --git a/lib/dp-packet.c b/lib/dp-packet.c > > index 35c72542a2..07fa67b1a1 100644 > > --- a/lib/dp-packet.c > > +++ b/lib/dp-packet.c > > @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) > > new_allocated = new_headroom + dp_packet_size(b) + new_tailroom; > > > > switch (b->source) { > > - case DPBUF_DPDK: > > + case DPBUF_DPDK: { > > +#ifdef DPDK_NETDEV > > + uint32_t buf_len; > > + > > + buf_len = new_allocated; > > + new_base = netdev_dpdk_extbuf_allocate(&buf_len); > > + if (!new_base) { > > + out_of_memory(); > > + } > > + ovs_assert(buf_len <= UINT16_MAX); > > + dp_packet_copy__(b, new_base, new_headroom, new_tailroom); > > + netdev_dpdk_extbuf_replace(b, new_base, buf_len); > > It seems that this is the first example in dp-packet.c to use the APIs > in netdev-dpdk.c. Why not move them to dp-packet.c directly? This is not the first time this approach is taken. lib/dp-packet.c: free_dpdk_buf((struct dp_packet*) b); lib/dp-packet.h: free_dpdk_buf((struct dp_packet*) b); lib/netdev-dpdk.c:free_dpdk_buf(struct dp_packet *p) lib/netdev-dpdk.h:void free_dpdk_buf(struct dp_packet *); lib/netdev-dpdk.h:free_dpdk_buf(struct dp_packet *buf OVS_UNUSED) I prefer to keep DPDK related bits in netdev-dpdk.c. dp-packet does not have to know about external buffers and having all code about external buffers in a single file is easier to tracker/understand.
On 3/18/22 16:33, David Marchand wrote: > DPDK based dp-packets points to data buffers that can't be expanded > dynamically. > Their layout is as follows: > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), > - a maximum size chosen at mempool creation, > > In some usecases though (like encapsulating with multiple tunnels), > a 128 bytes headroom is too short. > > Dynamically allocate buffers in DPDK memory and make use of DPDK > external buffers API (previously used for userspace TSO). > > Signed-off-by: David Marchand <david.marchand@redhat.com> > --- Hi, David. This is a nice change! I didn't test much, but the code looks correct in general. If you can create a system test for this issue, that would be great. Thanks! Best regards, Ilya Maximets.
Hi Ilya, On Thu, Jun 30, 2022 at 3:06 PM Ilya Maximets <i.maximets@ovn.org> wrote: > > On 3/18/22 16:33, David Marchand wrote: > > DPDK based dp-packets points to data buffers that can't be expanded > > dynamically. > > Their layout is as follows: > > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), > > - a maximum size chosen at mempool creation, > > > > In some usecases though (like encapsulating with multiple tunnels), > > a 128 bytes headroom is too short. > > > > Dynamically allocate buffers in DPDK memory and make use of DPDK > > external buffers API (previously used for userspace TSO). > > > > Signed-off-by: David Marchand <david.marchand@redhat.com> > > --- > > Hi, David. This is a nice change! I didn't test much, but > the code looks correct in general. > > If you can create a system test for this issue, that would be great. The issue is netdev-dpdk / dpdk related, so I intend to add such a test in system-dpdk.at. Do you have any objection/better location? Thanks!
On 8/26/22 14:14, David Marchand wrote: > Hi Ilya, > > On Thu, Jun 30, 2022 at 3:06 PM Ilya Maximets <i.maximets@ovn.org> wrote: >> >> On 3/18/22 16:33, David Marchand wrote: >>> DPDK based dp-packets points to data buffers that can't be expanded >>> dynamically. >>> Their layout is as follows: >>> - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), >>> - a maximum size chosen at mempool creation, >>> >>> In some usecases though (like encapsulating with multiple tunnels), >>> a 128 bytes headroom is too short. >>> >>> Dynamically allocate buffers in DPDK memory and make use of DPDK >>> external buffers API (previously used for userspace TSO). >>> >>> Signed-off-by: David Marchand <david.marchand@redhat.com> >>> --- >> >> Hi, David. This is a nice change! I didn't test much, but >> the code looks correct in general. >> >> If you can create a system test for this issue, that would be great. > > The issue is netdev-dpdk / dpdk related, so I intend to add such a > test in system-dpdk.at. > Do you have any objection/better location? Yeah, we do not really have any other place to run dpdk-specific tests at the moment, even if they are not really system tests. We can probably create some actual unit-tests with ovstest though. But I'm not sure how much work that will require. For this particular problem though, we will need a real system test anyway. Best regards, Ilya Maximets.
diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 35c72542a2..07fa67b1a1 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) new_allocated = new_headroom + dp_packet_size(b) + new_tailroom; switch (b->source) { - case DPBUF_DPDK: + case DPBUF_DPDK: { +#ifdef DPDK_NETDEV + uint32_t buf_len; + + buf_len = new_allocated; + new_base = netdev_dpdk_extbuf_allocate(&buf_len); + if (!new_base) { + out_of_memory(); + } + ovs_assert(buf_len <= UINT16_MAX); + dp_packet_copy__(b, new_base, new_headroom, new_tailroom); + netdev_dpdk_extbuf_replace(b, new_base, buf_len); + break; +#else OVS_NOT_REACHED(); +#endif + } case DPBUF_MALLOC: if (new_headroom == dp_packet_headroom(b)) { diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index fbc3b42d84..47e16f22c5 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2646,41 +2646,64 @@ out: } } +void * +netdev_dpdk_extbuf_allocate(uint32_t *data_len) +{ + *data_len += sizeof(struct rte_mbuf_ext_shared_info) + sizeof(uintptr_t); + *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t)); + return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE); +} + static void netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque) { rte_free(opaque); } +void +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t data_len) +{ + struct rte_mbuf *pkt = (struct rte_mbuf *) b; + struct rte_mbuf_ext_shared_info *shinfo; + uint16_t buf_len = data_len; + + shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, + netdev_dpdk_extbuf_free, + buf); + ovs_assert(shinfo != NULL); + + if (RTE_MBUF_HAS_EXTBUF(pkt)) { + rte_pktmbuf_detach_extbuf(pkt); + } + rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), buf_len, + shinfo); +} + static struct rte_mbuf * dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len) { uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len; - struct rte_mbuf_ext_shared_info *shinfo = NULL; + struct rte_mbuf_ext_shared_info *shinfo; uint16_t buf_len; void *buf; - total_len += sizeof *shinfo + sizeof(uintptr_t); - total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t)); - + buf = netdev_dpdk_extbuf_allocate(&total_len); + if (OVS_UNLIKELY(buf == NULL)) { + VLOG_ERR("Failed to allocate memory using rte_malloc: %u", total_len); + return NULL; + } if (OVS_UNLIKELY(total_len > UINT16_MAX)) { + netdev_dpdk_extbuf_free(NULL, buf); VLOG_ERR("Can't copy packet: too big %u", total_len); return NULL; } buf_len = total_len; - buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE); - if (OVS_UNLIKELY(buf == NULL)) { - VLOG_ERR("Failed to allocate memory using rte_malloc: %u", buf_len); - return NULL; - } - - /* Initialize shinfo. */ shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len, netdev_dpdk_extbuf_free, buf); if (OVS_UNLIKELY(shinfo == NULL)) { - rte_free(buf); + netdev_dpdk_extbuf_free(NULL, buf); VLOG_ERR("Failed to initialize shared info for mbuf while " "attempting to attach an external buffer."); return NULL; diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 699be3fb41..95594f07fb 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -31,6 +31,9 @@ struct netdev; void netdev_dpdk_register(void); void free_dpdk_buf(struct dp_packet *); +void *netdev_dpdk_extbuf_allocate(uint32_t *); +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t); + bool netdev_dpdk_flow_api_supported(struct netdev *); int
DPDK based dp-packets points to data buffers that can't be expanded dynamically. Their layout is as follows: - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM), - a maximum size chosen at mempool creation, In some usecases though (like encapsulating with multiple tunnels), a 128 bytes headroom is too short. Dynamically allocate buffers in DPDK memory and make use of DPDK external buffers API (previously used for userspace TSO). Signed-off-by: David Marchand <david.marchand@redhat.com> --- lib/dp-packet.c | 17 ++++++++++++++++- lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------ lib/netdev-dpdk.h | 3 +++ 3 files changed, 54 insertions(+), 13 deletions(-)