diff mbox series

[ovs-dev] dp-packet: Allow DPDK packet resize.

Message ID 20220318153339.31083-1-david.marchand@redhat.com
State Changes Requested
Headers show
Series [ovs-dev] dp-packet: Allow DPDK packet resize. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

David Marchand March 18, 2022, 3:33 p.m. UTC
DPDK based dp-packets points to data buffers that can't be expanded
dynamically.
Their layout is as follows:
- a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
- a maximum size chosen at mempool creation,

In some usecases though (like encapsulating with multiple tunnels),
a 128 bytes headroom is too short.

Dynamically allocate buffers in DPDK memory and make use of DPDK
external buffers API (previously used for userspace TSO).

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/dp-packet.c   | 17 ++++++++++++++++-
 lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------
 lib/netdev-dpdk.h |  3 +++
 3 files changed, 54 insertions(+), 13 deletions(-)

Comments

Peng He March 21, 2022, 5:53 a.m. UTC | #1
Hi,
do you have future plan to support mbuf chaining for dp-packet?
as I see you've actually relax some check on dp-packet in this
patch.

thanks

David Marchand <david.marchand@redhat.com> 于2022年3月18日周五 23:34写道:

> DPDK based dp-packets points to data buffers that can't be expanded
> dynamically.
> Their layout is as follows:
> - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
> - a maximum size chosen at mempool creation,
>
> In some usecases though (like encapsulating with multiple tunnels),
> a 128 bytes headroom is too short.
>
> Dynamically allocate buffers in DPDK memory and make use of DPDK
> external buffers API (previously used for userspace TSO).
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  lib/dp-packet.c   | 17 ++++++++++++++++-
>  lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------
>  lib/netdev-dpdk.h |  3 +++
>  3 files changed, 54 insertions(+), 13 deletions(-)
>
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index 35c72542a2..07fa67b1a1 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t
> new_headroom, size_t new_tailroom)
>      new_allocated = new_headroom + dp_packet_size(b) + new_tailroom;
>
>      switch (b->source) {
> -    case DPBUF_DPDK:
> +    case DPBUF_DPDK: {
> +#ifdef DPDK_NETDEV
> +        uint32_t buf_len;
> +
> +        buf_len = new_allocated;
> +        new_base = netdev_dpdk_extbuf_allocate(&buf_len);
> +        if (!new_base) {
> +            out_of_memory();
> +        }
> +        ovs_assert(buf_len <= UINT16_MAX);
> +        dp_packet_copy__(b, new_base, new_headroom, new_tailroom);
> +        netdev_dpdk_extbuf_replace(b, new_base, buf_len);
> +        break;
> +#else
>          OVS_NOT_REACHED();
> +#endif
> +    }
>
>      case DPBUF_MALLOC:
>          if (new_headroom == dp_packet_headroom(b)) {
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index fbc3b42d84..47e16f22c5 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2646,41 +2646,64 @@ out:
>      }
>  }
>
> +void *
> +netdev_dpdk_extbuf_allocate(uint32_t *data_len)
> +{
> +    *data_len += sizeof(struct rte_mbuf_ext_shared_info) +
> sizeof(uintptr_t);
> +    *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t));
> +    return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE);
> +}
> +
>  static void
>  netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
>  {
>      rte_free(opaque);
>  }
>
> +void
> +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t
> data_len)
> +{
> +    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
> +    struct rte_mbuf_ext_shared_info *shinfo;
> +    uint16_t buf_len = data_len;
> +
> +    shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> +                                                netdev_dpdk_extbuf_free,
> +                                                buf);
> +    ovs_assert(shinfo != NULL);
> +
> +    if (RTE_MBUF_HAS_EXTBUF(pkt)) {
> +        rte_pktmbuf_detach_extbuf(pkt);
> +    }
> +    rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf),
> buf_len,
> +                              shinfo);
> +}
> +
>  static struct rte_mbuf *
>  dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len)
>  {
>      uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len;
> -    struct rte_mbuf_ext_shared_info *shinfo = NULL;
> +    struct rte_mbuf_ext_shared_info *shinfo;
>      uint16_t buf_len;
>      void *buf;
>
> -    total_len += sizeof *shinfo + sizeof(uintptr_t);
> -    total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
> -
> +    buf = netdev_dpdk_extbuf_allocate(&total_len);
> +    if (OVS_UNLIKELY(buf == NULL)) {
> +        VLOG_ERR("Failed to allocate memory using rte_malloc: %u",
> total_len);
> +        return NULL;
> +    }
>      if (OVS_UNLIKELY(total_len > UINT16_MAX)) {
> +        netdev_dpdk_extbuf_free(NULL, buf);
>          VLOG_ERR("Can't copy packet: too big %u", total_len);
>          return NULL;
>      }
>
>      buf_len = total_len;
> -    buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
> -    if (OVS_UNLIKELY(buf == NULL)) {
> -        VLOG_ERR("Failed to allocate memory using rte_malloc: %u",
> buf_len);
> -        return NULL;
> -    }
> -
> -    /* Initialize shinfo. */
>      shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
>                                                  netdev_dpdk_extbuf_free,
>                                                  buf);
>      if (OVS_UNLIKELY(shinfo == NULL)) {
> -        rte_free(buf);
> +        netdev_dpdk_extbuf_free(NULL, buf);
>          VLOG_ERR("Failed to initialize shared info for mbuf while "
>                   "attempting to attach an external buffer.");
>          return NULL;
> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> index 699be3fb41..95594f07fb 100644
> --- a/lib/netdev-dpdk.h
> +++ b/lib/netdev-dpdk.h
> @@ -31,6 +31,9 @@ struct netdev;
>  void netdev_dpdk_register(void);
>  void free_dpdk_buf(struct dp_packet *);
>
> +void *netdev_dpdk_extbuf_allocate(uint32_t *);
> +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t);
> +
>  bool netdev_dpdk_flow_api_supported(struct netdev *);
>
>  int
> --
> 2.23.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Harold Huang March 22, 2022, 2:41 a.m. UTC | #2
On Fri, Mar 18, 2022 at 11:33 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> DPDK based dp-packets points to data buffers that can't be expanded
> dynamically.
> Their layout is as follows:
> - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
> - a maximum size chosen at mempool creation,
>
> In some usecases though (like encapsulating with multiple tunnels),
> a 128 bytes headroom is too short.
>
> Dynamically allocate buffers in DPDK memory and make use of DPDK
> external buffers API (previously used for userspace TSO).
>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
>  lib/dp-packet.c   | 17 ++++++++++++++++-
>  lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------
>  lib/netdev-dpdk.h |  3 +++
>  3 files changed, 54 insertions(+), 13 deletions(-)
>
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index 35c72542a2..07fa67b1a1 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom)
>      new_allocated = new_headroom + dp_packet_size(b) + new_tailroom;
>
>      switch (b->source) {
> -    case DPBUF_DPDK:
> +    case DPBUF_DPDK: {
> +#ifdef DPDK_NETDEV
> +        uint32_t buf_len;
> +
> +        buf_len = new_allocated;
> +        new_base = netdev_dpdk_extbuf_allocate(&buf_len);
> +        if (!new_base) {
> +            out_of_memory();
> +        }
> +        ovs_assert(buf_len <= UINT16_MAX);
> +        dp_packet_copy__(b, new_base, new_headroom, new_tailroom);
> +        netdev_dpdk_extbuf_replace(b, new_base, buf_len);

It seems that this is the first example in dp-packet.c to use the APIs
in netdev-dpdk.c. Why not move them to dp-packet.c directly?

> +        break;
> +#else
>          OVS_NOT_REACHED();
> +#endif
> +    }
>
>      case DPBUF_MALLOC:
>          if (new_headroom == dp_packet_headroom(b)) {
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index fbc3b42d84..47e16f22c5 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2646,41 +2646,64 @@ out:
>      }
>  }
>
> +void *
> +netdev_dpdk_extbuf_allocate(uint32_t *data_len)
> +{
> +    *data_len += sizeof(struct rte_mbuf_ext_shared_info) + sizeof(uintptr_t);
> +    *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t));
> +    return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE);
> +}
> +
>  static void
>  netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
>  {
>      rte_free(opaque);
>  }
>
> +void
> +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t data_len)
> +{
> +    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
> +    struct rte_mbuf_ext_shared_info *shinfo;
> +    uint16_t buf_len = data_len;
> +
> +    shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> +                                                netdev_dpdk_extbuf_free,
> +                                                buf);
> +    ovs_assert(shinfo != NULL);
> +
> +    if (RTE_MBUF_HAS_EXTBUF(pkt)) {
> +        rte_pktmbuf_detach_extbuf(pkt);
> +    }
> +    rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), buf_len,
> +                              shinfo);
> +}
> +
>  static struct rte_mbuf *
>  dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len)
>  {
>      uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len;
> -    struct rte_mbuf_ext_shared_info *shinfo = NULL;
> +    struct rte_mbuf_ext_shared_info *shinfo;
>      uint16_t buf_len;
>      void *buf;
>
> -    total_len += sizeof *shinfo + sizeof(uintptr_t);
> -    total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
> -
> +    buf = netdev_dpdk_extbuf_allocate(&total_len);
> +    if (OVS_UNLIKELY(buf == NULL)) {
> +        VLOG_ERR("Failed to allocate memory using rte_malloc: %u", total_len);
> +        return NULL;
> +    }
>      if (OVS_UNLIKELY(total_len > UINT16_MAX)) {
> +        netdev_dpdk_extbuf_free(NULL, buf);
>          VLOG_ERR("Can't copy packet: too big %u", total_len);
>          return NULL;
>      }
>
>      buf_len = total_len;
> -    buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
> -    if (OVS_UNLIKELY(buf == NULL)) {
> -        VLOG_ERR("Failed to allocate memory using rte_malloc: %u", buf_len);
> -        return NULL;
> -    }
> -
> -    /* Initialize shinfo. */
>      shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
>                                                  netdev_dpdk_extbuf_free,
>                                                  buf);
>      if (OVS_UNLIKELY(shinfo == NULL)) {
> -        rte_free(buf);
> +        netdev_dpdk_extbuf_free(NULL, buf);
>          VLOG_ERR("Failed to initialize shared info for mbuf while "
>                   "attempting to attach an external buffer.");
>          return NULL;
> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> index 699be3fb41..95594f07fb 100644
> --- a/lib/netdev-dpdk.h
> +++ b/lib/netdev-dpdk.h
> @@ -31,6 +31,9 @@ struct netdev;
>  void netdev_dpdk_register(void);
>  void free_dpdk_buf(struct dp_packet *);
>
> +void *netdev_dpdk_extbuf_allocate(uint32_t *);
> +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t);
> +
>  bool netdev_dpdk_flow_api_supported(struct netdev *);
>
>  int
> --
> 2.23.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Harold Huang March 22, 2022, 2:55 a.m. UTC | #3
Hi, Peng,
    Patch[1] is an example in OVS-DPDK to support muti-segments mbufs.
It was not applied to OVS upstream finally. Maybe muti-segments mbuf
feature is not accepted by the community although we need this feature
when we want to use some virtual PMD drivers such as vhost-user,
virtio-user to support TSO or GRO.

[1]: https://patchwork.ozlabs.org/project/openvswitch/cover/1539188552-129083-1-git-send-email-tiago.lam@intel.com/

On Mon, Mar 21, 2022 at 1:54 PM Peng He <xnhp0320@gmail.com> wrote:
>
> Hi,
> do you have future plan to support mbuf chaining for dp-packet?
> as I see you've actually relax some check on dp-packet in this
> patch.
>
> thanks
>
> David Marchand <david.marchand@redhat.com> 于2022年3月18日周五 23:34写道:
>
> > DPDK based dp-packets points to data buffers that can't be expanded
> > dynamically.
> > Their layout is as follows:
> > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
> > - a maximum size chosen at mempool creation,
> >
> > In some usecases though (like encapsulating with multiple tunnels),
> > a 128 bytes headroom is too short.
> >
> > Dynamically allocate buffers in DPDK memory and make use of DPDK
> > external buffers API (previously used for userspace TSO).
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
> >  lib/dp-packet.c   | 17 ++++++++++++++++-
> >  lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------
> >  lib/netdev-dpdk.h |  3 +++
> >  3 files changed, 54 insertions(+), 13 deletions(-)
> >
> > diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> > index 35c72542a2..07fa67b1a1 100644
> > --- a/lib/dp-packet.c
> > +++ b/lib/dp-packet.c
> > @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t
> > new_headroom, size_t new_tailroom)
> >      new_allocated = new_headroom + dp_packet_size(b) + new_tailroom;
> >
> >      switch (b->source) {
> > -    case DPBUF_DPDK:
> > +    case DPBUF_DPDK: {
> > +#ifdef DPDK_NETDEV
> > +        uint32_t buf_len;
> > +
> > +        buf_len = new_allocated;
> > +        new_base = netdev_dpdk_extbuf_allocate(&buf_len);
> > +        if (!new_base) {
> > +            out_of_memory();
> > +        }
> > +        ovs_assert(buf_len <= UINT16_MAX);
> > +        dp_packet_copy__(b, new_base, new_headroom, new_tailroom);
> > +        netdev_dpdk_extbuf_replace(b, new_base, buf_len);
> > +        break;
> > +#else
> >          OVS_NOT_REACHED();
> > +#endif
> > +    }
> >
> >      case DPBUF_MALLOC:
> >          if (new_headroom == dp_packet_headroom(b)) {
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index fbc3b42d84..47e16f22c5 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -2646,41 +2646,64 @@ out:
> >      }
> >  }
> >
> > +void *
> > +netdev_dpdk_extbuf_allocate(uint32_t *data_len)
> > +{
> > +    *data_len += sizeof(struct rte_mbuf_ext_shared_info) +
> > sizeof(uintptr_t);
> > +    *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t));
> > +    return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE);
> > +}
> > +
> >  static void
> >  netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
> >  {
> >      rte_free(opaque);
> >  }
> >
> > +void
> > +netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t
> > data_len)
> > +{
> > +    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
> > +    struct rte_mbuf_ext_shared_info *shinfo;
> > +    uint16_t buf_len = data_len;
> > +
> > +    shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> > +                                                netdev_dpdk_extbuf_free,
> > +                                                buf);
> > +    ovs_assert(shinfo != NULL);
> > +
> > +    if (RTE_MBUF_HAS_EXTBUF(pkt)) {
> > +        rte_pktmbuf_detach_extbuf(pkt);
> > +    }
> > +    rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf),
> > buf_len,
> > +                              shinfo);
> > +}
> > +
> >  static struct rte_mbuf *
> >  dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len)
> >  {
> >      uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len;
> > -    struct rte_mbuf_ext_shared_info *shinfo = NULL;
> > +    struct rte_mbuf_ext_shared_info *shinfo;
> >      uint16_t buf_len;
> >      void *buf;
> >
> > -    total_len += sizeof *shinfo + sizeof(uintptr_t);
> > -    total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
> > -
> > +    buf = netdev_dpdk_extbuf_allocate(&total_len);
> > +    if (OVS_UNLIKELY(buf == NULL)) {
> > +        VLOG_ERR("Failed to allocate memory using rte_malloc: %u",
> > total_len);
> > +        return NULL;
> > +    }
> >      if (OVS_UNLIKELY(total_len > UINT16_MAX)) {
> > +        netdev_dpdk_extbuf_free(NULL, buf);
> >          VLOG_ERR("Can't copy packet: too big %u", total_len);
> >          return NULL;
> >      }
> >
> >      buf_len = total_len;
> > -    buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
> > -    if (OVS_UNLIKELY(buf == NULL)) {
> > -        VLOG_ERR("Failed to allocate memory using rte_malloc: %u",
> > buf_len);
> > -        return NULL;
> > -    }
> > -
> > -    /* Initialize shinfo. */
> >      shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
> >                                                  netdev_dpdk_extbuf_free,
> >                                                  buf);
> >      if (OVS_UNLIKELY(shinfo == NULL)) {
> > -        rte_free(buf);
> > +        netdev_dpdk_extbuf_free(NULL, buf);
> >          VLOG_ERR("Failed to initialize shared info for mbuf while "
> >                   "attempting to attach an external buffer.");
> >          return NULL;
> > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> > index 699be3fb41..95594f07fb 100644
> > --- a/lib/netdev-dpdk.h
> > +++ b/lib/netdev-dpdk.h
> > @@ -31,6 +31,9 @@ struct netdev;
> >  void netdev_dpdk_register(void);
> >  void free_dpdk_buf(struct dp_packet *);
> >
> > +void *netdev_dpdk_extbuf_allocate(uint32_t *);
> > +void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t);
> > +
> >  bool netdev_dpdk_flow_api_supported(struct netdev *);
> >
> >  int
> > --
> > 2.23.0
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
>
> --
> hepeng
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
David Marchand March 22, 2022, 4:32 p.m. UTC | #4
On Mon, Mar 21, 2022 at 6:54 AM Peng He <xnhp0320@gmail.com> wrote:
>
> Hi,
> do you have future plan to support mbuf chaining for dp-packet?
> as I see you've actually relax some check on dp-packet in this
> patch.

To me, mbuf chaining means chaining packets, like having a list of packets.
So I guess you mean "multi segments" mbuf.

This patch only allows resizing mono segment mbuf.
I have no plan to revive the multi segments work.


Do you see an issue with proposed code change?
David Marchand March 22, 2022, 4:37 p.m. UTC | #5
On Tue, Mar 22, 2022 at 3:41 AM Harold Huang <baymaxhuang@gmail.com> wrote:
>
> On Fri, Mar 18, 2022 at 11:33 PM David Marchand
> <david.marchand@redhat.com> wrote:
> >
> > DPDK based dp-packets points to data buffers that can't be expanded
> > dynamically.
> > Their layout is as follows:
> > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
> > - a maximum size chosen at mempool creation,
> >
> > In some usecases though (like encapsulating with multiple tunnels),
> > a 128 bytes headroom is too short.
> >
> > Dynamically allocate buffers in DPDK memory and make use of DPDK
> > external buffers API (previously used for userspace TSO).
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
> >  lib/dp-packet.c   | 17 ++++++++++++++++-
> >  lib/netdev-dpdk.c | 47 +++++++++++++++++++++++++++++++++++------------
> >  lib/netdev-dpdk.h |  3 +++
> >  3 files changed, 54 insertions(+), 13 deletions(-)
> >
> > diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> > index 35c72542a2..07fa67b1a1 100644
> > --- a/lib/dp-packet.c
> > +++ b/lib/dp-packet.c
> > @@ -250,8 +250,23 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom)
> >      new_allocated = new_headroom + dp_packet_size(b) + new_tailroom;
> >
> >      switch (b->source) {
> > -    case DPBUF_DPDK:
> > +    case DPBUF_DPDK: {
> > +#ifdef DPDK_NETDEV
> > +        uint32_t buf_len;
> > +
> > +        buf_len = new_allocated;
> > +        new_base = netdev_dpdk_extbuf_allocate(&buf_len);
> > +        if (!new_base) {
> > +            out_of_memory();
> > +        }
> > +        ovs_assert(buf_len <= UINT16_MAX);
> > +        dp_packet_copy__(b, new_base, new_headroom, new_tailroom);
> > +        netdev_dpdk_extbuf_replace(b, new_base, buf_len);
>
> It seems that this is the first example in dp-packet.c to use the APIs
> in netdev-dpdk.c. Why not move them to dp-packet.c directly?

This is not the first time this approach is taken.

lib/dp-packet.c:            free_dpdk_buf((struct dp_packet*) b);
lib/dp-packet.h:            free_dpdk_buf((struct dp_packet*) b);
lib/netdev-dpdk.c:free_dpdk_buf(struct dp_packet *p)
lib/netdev-dpdk.h:void free_dpdk_buf(struct dp_packet *);
lib/netdev-dpdk.h:free_dpdk_buf(struct dp_packet *buf OVS_UNUSED)

I prefer to keep DPDK related bits in netdev-dpdk.c.
dp-packet does not have to know about external buffers and having all
code about external buffers in a single file is easier to
tracker/understand.
Ilya Maximets June 30, 2022, 1:05 p.m. UTC | #6
On 3/18/22 16:33, David Marchand wrote:
> DPDK based dp-packets points to data buffers that can't be expanded
> dynamically.
> Their layout is as follows:
> - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
> - a maximum size chosen at mempool creation,
> 
> In some usecases though (like encapsulating with multiple tunnels),
> a 128 bytes headroom is too short.
> 
> Dynamically allocate buffers in DPDK memory and make use of DPDK
> external buffers API (previously used for userspace TSO).
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---

Hi, David.  This is a nice change!  I didn't test much, but
the code looks correct in general.

If you can create a system test for this issue, that would be great.
Thanks!

Best regards, Ilya Maximets.
David Marchand Aug. 26, 2022, 12:14 p.m. UTC | #7
Hi Ilya,

On Thu, Jun 30, 2022 at 3:06 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 3/18/22 16:33, David Marchand wrote:
> > DPDK based dp-packets points to data buffers that can't be expanded
> > dynamically.
> > Their layout is as follows:
> > - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
> > - a maximum size chosen at mempool creation,
> >
> > In some usecases though (like encapsulating with multiple tunnels),
> > a 128 bytes headroom is too short.
> >
> > Dynamically allocate buffers in DPDK memory and make use of DPDK
> > external buffers API (previously used for userspace TSO).
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
>
> Hi, David.  This is a nice change!  I didn't test much, but
> the code looks correct in general.
>
> If you can create a system test for this issue, that would be great.

The issue is netdev-dpdk / dpdk related, so I intend to add such a
test in system-dpdk.at.
Do you have any objection/better location?

Thanks!
Ilya Maximets Aug. 26, 2022, 1:05 p.m. UTC | #8
On 8/26/22 14:14, David Marchand wrote:
> Hi Ilya,
> 
> On Thu, Jun 30, 2022 at 3:06 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>>
>> On 3/18/22 16:33, David Marchand wrote:
>>> DPDK based dp-packets points to data buffers that can't be expanded
>>> dynamically.
>>> Their layout is as follows:
>>> - a 128 bytes headroom chosen at DPDK build time (RTE_PKTMBUF_HEADROOM),
>>> - a maximum size chosen at mempool creation,
>>>
>>> In some usecases though (like encapsulating with multiple tunnels),
>>> a 128 bytes headroom is too short.
>>>
>>> Dynamically allocate buffers in DPDK memory and make use of DPDK
>>> external buffers API (previously used for userspace TSO).
>>>
>>> Signed-off-by: David Marchand <david.marchand@redhat.com>
>>> ---
>>
>> Hi, David.  This is a nice change!  I didn't test much, but
>> the code looks correct in general.
>>
>> If you can create a system test for this issue, that would be great.
> 
> The issue is netdev-dpdk / dpdk related, so I intend to add such a
> test in system-dpdk.at.
> Do you have any objection/better location?

Yeah, we do not really have any other place to run dpdk-specific
tests at the moment, even if they are not really system tests.

We can probably create some actual unit-tests with ovstest though.
But I'm not sure how much work that will require.

For this particular problem though, we will need a real system test
anyway.

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 35c72542a2..07fa67b1a1 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -250,8 +250,23 @@  dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom)
     new_allocated = new_headroom + dp_packet_size(b) + new_tailroom;
 
     switch (b->source) {
-    case DPBUF_DPDK:
+    case DPBUF_DPDK: {
+#ifdef DPDK_NETDEV
+        uint32_t buf_len;
+
+        buf_len = new_allocated;
+        new_base = netdev_dpdk_extbuf_allocate(&buf_len);
+        if (!new_base) {
+            out_of_memory();
+        }
+        ovs_assert(buf_len <= UINT16_MAX);
+        dp_packet_copy__(b, new_base, new_headroom, new_tailroom);
+        netdev_dpdk_extbuf_replace(b, new_base, buf_len);
+        break;
+#else
         OVS_NOT_REACHED();
+#endif
+    }
 
     case DPBUF_MALLOC:
         if (new_headroom == dp_packet_headroom(b)) {
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index fbc3b42d84..47e16f22c5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2646,41 +2646,64 @@  out:
     }
 }
 
+void *
+netdev_dpdk_extbuf_allocate(uint32_t *data_len)
+{
+    *data_len += sizeof(struct rte_mbuf_ext_shared_info) + sizeof(uintptr_t);
+    *data_len = RTE_ALIGN_CEIL(*data_len, sizeof(uintptr_t));
+    return rte_malloc(NULL, *data_len, RTE_CACHE_LINE_SIZE);
+}
+
 static void
 netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque)
 {
     rte_free(opaque);
 }
 
+void
+netdev_dpdk_extbuf_replace(struct dp_packet *b, void *buf, uint32_t data_len)
+{
+    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
+    struct rte_mbuf_ext_shared_info *shinfo;
+    uint16_t buf_len = data_len;
+
+    shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
+                                                netdev_dpdk_extbuf_free,
+                                                buf);
+    ovs_assert(shinfo != NULL);
+
+    if (RTE_MBUF_HAS_EXTBUF(pkt)) {
+        rte_pktmbuf_detach_extbuf(pkt);
+    }
+    rte_pktmbuf_attach_extbuf(pkt, buf, rte_malloc_virt2iova(buf), buf_len,
+                              shinfo);
+}
+
 static struct rte_mbuf *
 dpdk_pktmbuf_attach_extbuf(struct rte_mbuf *pkt, uint32_t data_len)
 {
     uint32_t total_len = RTE_PKTMBUF_HEADROOM + data_len;
-    struct rte_mbuf_ext_shared_info *shinfo = NULL;
+    struct rte_mbuf_ext_shared_info *shinfo;
     uint16_t buf_len;
     void *buf;
 
-    total_len += sizeof *shinfo + sizeof(uintptr_t);
-    total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
-
+    buf = netdev_dpdk_extbuf_allocate(&total_len);
+    if (OVS_UNLIKELY(buf == NULL)) {
+        VLOG_ERR("Failed to allocate memory using rte_malloc: %u", total_len);
+        return NULL;
+    }
     if (OVS_UNLIKELY(total_len > UINT16_MAX)) {
+        netdev_dpdk_extbuf_free(NULL, buf);
         VLOG_ERR("Can't copy packet: too big %u", total_len);
         return NULL;
     }
 
     buf_len = total_len;
-    buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
-    if (OVS_UNLIKELY(buf == NULL)) {
-        VLOG_ERR("Failed to allocate memory using rte_malloc: %u", buf_len);
-        return NULL;
-    }
-
-    /* Initialize shinfo. */
     shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
                                                 netdev_dpdk_extbuf_free,
                                                 buf);
     if (OVS_UNLIKELY(shinfo == NULL)) {
-        rte_free(buf);
+        netdev_dpdk_extbuf_free(NULL, buf);
         VLOG_ERR("Failed to initialize shared info for mbuf while "
                  "attempting to attach an external buffer.");
         return NULL;
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 699be3fb41..95594f07fb 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -31,6 +31,9 @@  struct netdev;
 void netdev_dpdk_register(void);
 void free_dpdk_buf(struct dp_packet *);
 
+void *netdev_dpdk_extbuf_allocate(uint32_t *);
+void netdev_dpdk_extbuf_replace(struct dp_packet *, void *, uint32_t);
+
 bool netdev_dpdk_flow_api_supported(struct netdev *);
 
 int