diff mbox

[ovs-dev,PATCHv3] netdev-provider: Apply batch object to netdev provider.

Message ID 1467233588-65303-1-git-send-email-u9012063@gmail.com
State Changes Requested
Delegated to: Daniele Di Proietto
Headers show

Commit Message

William Tu June 29, 2016, 8:53 p.m. UTC
Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
batch process functions and 'struct dp_packet_batch' to associate with
batch-level metadata.  This patch applies the packet batch object to
the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
batch APIs can be used in providers.  With batch metadata visible in
providers, optimizations can be introduced at per-batch level instead
of per-packet.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/141178364
Signed-off-by: William Tu <u9012063@gmail.com>
--
v2->v3:
- fix freebsd build issue
v1->v2:
- more descriptive commit message
---
 lib/netdev-bsd.c      |  9 ++++--
 lib/netdev-dpdk.c     | 81 +++++++++++++++++++++++++--------------------------
 lib/netdev-dummy.c    |  9 ++++--
 lib/netdev-linux.c    |  9 ++++--
 lib/netdev-provider.h | 25 ++++++++--------
 lib/netdev.c          |  6 ++--
 6 files changed, 72 insertions(+), 67 deletions(-)

Comments

Ben Pfaff July 3, 2016, 5:32 p.m. UTC | #1
On Wed, Jun 29, 2016 at 01:53:08PM -0700, William Tu wrote:
> Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
> batch process functions and 'struct dp_packet_batch' to associate with
> batch-level metadata.  This patch applies the packet batch object to
> the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
> batch APIs can be used in providers.  With batch metadata visible in
> providers, optimizations can be introduced at per-batch level instead
> of per-packet.
> 
> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/141178364
> Signed-off-by: William Tu <u9012063@gmail.com>
> --
> v2->v3:
> - fix freebsd build issue
> v1->v2:
> - more descriptive commit message

This seems aimed mostly at DPDK, so I'll leave it to someone who knows
DPDK better to properly review it.
Daniele Di Proietto July 15, 2016, 12:34 a.m. UTC | #2
Hi William,

Thanks for the patch, it makes sense to me!

In general I think it would be better to avoid introducing extra local
variables like 'pkts' and 'c': I think it would be more readable and the
compiler might not always be able to optimize them (I checked the assebly
output in a couple of functions).

Few comments inline, otherwise this looks good to me

2016-06-29 13:53 GMT-07:00 William Tu <u9012063@gmail.com>:

> Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
> batch process functions and 'struct dp_packet_batch' to associate with
> batch-level metadata.  This patch applies the packet batch object to
> the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
> batch APIs can be used in providers.  With batch metadata visible in
> providers, optimizations can be introduced at per-batch level instead
> of per-packet.
>
> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/141178364
> Signed-off-by: William Tu <u9012063@gmail.com>
> --
> v2->v3:
> - fix freebsd build issue
> v1->v2:
> - more descriptive commit message
> ---
>  lib/netdev-bsd.c      |  9 ++++--
>  lib/netdev-dpdk.c     | 81
> +++++++++++++++++++++++++--------------------------
>  lib/netdev-dummy.c    |  9 ++++--
>  lib/netdev-linux.c    |  9 ++++--
>  lib/netdev-provider.h | 25 ++++++++--------
>  lib/netdev.c          |  6 ++--
>  6 files changed, 72 insertions(+), 67 deletions(-)
>
> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
> index 2e92d97..6ab5048 100644
> --- a/lib/netdev-bsd.c
> +++ b/lib/netdev-bsd.c
> @@ -618,12 +618,13 @@ netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd *rxq,
> struct dp_packet *buffer)
>  }
>
>  static int
> -netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
> -                    int *c)
> +netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_);
>      struct netdev *netdev = rxq->up.netdev;
>      struct dp_packet *packet;
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>

extra variable


>      ssize_t retval;
>      int mtu;
>
> @@ -681,10 +682,12 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
>   */
>  static int
>  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
> -                struct dp_packet **pkts, int cnt, bool may_steal)
> +                struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
>      const char *name = netdev_get_name(netdev_);
> +    int cnt = batch->count;
> +    struct dp_packet **pkts = batch->packets;
>

extra variable


>      int error;
>      int i;
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 02e2c58..80ce233 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1253,12 +1253,14 @@ netdev_dpdk_vhost_update_rx_counters(struct
> netdev_stats *stats,
>   */
>  static int
>  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> -                           struct dp_packet **packets, int *c)
> +                           struct dp_packet_batch *batch)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>      int qid = rxq->queue_id;
>      struct ingress_policer *policer =
> netdev_dpdk_get_ingress_policer(dev);
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>

extra variable


>      uint16_t nb_rx = 0;
>      uint16_t dropped = 0;
>
> @@ -1294,12 +1296,13 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>  }
>
>  static int
> -netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
> -                     int *c)
> +netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>      struct ingress_policer *policer =
> netdev_dpdk_get_ingress_policer(dev);
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>

extra variable


>      int nb_rx;
>      int dropped = 0;
>
> @@ -1373,11 +1376,13 @@ netdev_dpdk_vhost_update_tx_counters(struct
> netdev_stats *stats,
>
>  static void
>  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> -                         struct dp_packet **pkts, int cnt,
> +                         struct dp_packet_batch *batch,
>                           bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> +    struct dp_packet **pkts = batch->packets;
> +    int cnt = batch->count;
>

extra variable


>      struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
>      unsigned int total_pkts = cnt;
>      unsigned int qos_pkts = cnt;
> @@ -1425,11 +1430,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>
>  out:
>      if (may_steal) {
> -        int i;
> -
> -        for (i = 0; i < total_pkts; i++) {
> -            dp_packet_delete(pkts[i]);
> -        }
> +        dp_packet_delete_batch(batch, may_steal);
>      }
>

As discussed offline the if becomes unnecessary.  Maybe it's more clear if
we remove it?


>  }
>
> @@ -1464,18 +1465,19 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>
>  /* Tx function. Transmit packets indefinitely */
>  static void
> -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
> -                int cnt)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
> +dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch)
> +OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>  #if !defined(__CHECKER__) && !defined(_WIN32)
> -    const size_t PKT_ARRAY_SIZE = cnt;
> +    const size_t PKT_ARRAY_SIZE = batch->count;
>  #else
>      /* Sparse or MSVC doesn't like variable length array. */
>      enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
>  #endif
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct rte_mbuf *mbufs[PKT_ARRAY_SIZE];
> +    struct dp_packet **pkts = batch->packets;
> +    int cnt = batch->count;
>

extra variables


>      int dropped = 0;
>      int newcnt = 0;
>      int i;
> @@ -1519,7 +1521,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet **pkts,
>      }
>
>      if (dev->type == DPDK_DEV_VHOST) {
> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> mbufs, newcnt, true);
> +        struct dp_packet_batch mbatch;
> +
> +        dp_packet_batch_init(&mbatch);
> +        mbatch.count = newcnt;
> +        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf *));
> +        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
>

Even though the performance here doesn't really matter tha much (we're on
dpdk_do_tx_copy,
meaning that the packet is coming from a non DPDK source or that we have
multiple outputs),
I'd still like to avoid the memcpy().

Maybe we can keep the __netdev_dpdk_vhost_send signature as it is (keep the
packet and count
parameter separate) to avoid it?


>      } else {
>          unsigned int qos_pkts = newcnt;
>
> @@ -1543,37 +1550,30 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet **pkts,
>  }
>
>  static int
> -netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet
> **pkts,
> -                 int cnt, bool may_steal)
> +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> +                       struct dp_packet_batch *batch,
> +                       bool may_steal)
>  {
> -    if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
> -        int i;
>
> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
> +    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> +        dpdk_do_tx_copy(netdev, qid, batch);
>          if (may_steal) {
> -            for (i = 0; i < cnt; i++) {
> -                dp_packet_delete(pkts[i]);
> -            }
> +            dp_packet_delete_batch(batch, may_steal);
>          }
>      } else {
> -        int i;
> -
> -        for (i = 0; i < cnt; i++) {
> -            int cutlen = dp_packet_get_cutlen(pkts[i]);
> -
> -            dp_packet_set_size(pkts[i], dp_packet_size(pkts[i]) - cutlen);
> -            dp_packet_reset_cutlen(pkts[i]);
> -        }
> -        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
> +        dp_packet_batch_apply_cutlen(batch);
> +        __netdev_dpdk_vhost_send(netdev, qid, batch, may_steal);
>      }
>      return 0;
>  }
>
>  static inline void
>  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> -                   struct dp_packet **pkts, int cnt, bool may_steal)
> +                   struct dp_packet_batch *batch, bool may_steal)
>  {
>      int i;
> +    struct dp_packet **pkts = batch->packets;
> +    int cnt = batch->count;
>

extra variables


>
>      if (OVS_UNLIKELY(dev->txq_needs_locking)) {
>          qid = qid % dev->real_n_txq;
> @@ -1584,12 +1584,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
> qid,
>                       pkts[0]->source != DPBUF_DPDK)) {
>          struct netdev *netdev = &dev->up;
>
> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
> +        dpdk_do_tx_copy(netdev, qid, batch);
>
>          if (may_steal) {
> -            for (i = 0; i < cnt; i++) {
> -                dp_packet_delete(pkts[i]);
> -            }
> +            dp_packet_delete_batch(batch, may_steal);
>          }
>      } else {
>          int next_tx_idx = 0;
> @@ -1649,11 +1647,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
> qid,
>
>  static int
>  netdev_dpdk_eth_send(struct netdev *netdev, int qid,
> -                     struct dp_packet **pkts, int cnt, bool may_steal)
> +                     struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>      return 0;
>  }
>
> @@ -2648,20 +2646,21 @@ dpdk_ring_open(const char dev_name[], unsigned int
> *eth_port_id) OVS_REQUIRES(dp
>
>  static int
>  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
> -                      struct dp_packet **pkts, int cnt, bool may_steal)
> +                      struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    struct dp_packet **pkts = batch->packets;
>

extra variable


>      unsigned i;
>
>      /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure
> that the
>       * rss hash field is clear. This is because the same mbuf may be
> modified by
>       * the consumer of the ring and return into the datapath without
> recalculating
>       * the RSS hash. */
> -    for (i = 0; i < cnt; i++) {
> +    for (i = 0; i < batch->count; i++) {
>          dp_packet_rss_invalidate(pkts[i]);
>      }
>
> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>      return 0;
>  }
>
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 24c107e..6b1d3a3 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -964,12 +964,13 @@ netdev_dummy_rxq_dealloc(struct netdev_rxq *rxq_)
>  }
>
>  static int
> -netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **arr,
> -                      int *c)
> +netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_);
>      struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
>      struct dp_packet *packet;
> +    struct dp_packet **arr = batch->packets;
> +    int *c = &batch->count;
>

extra variables


>
>      ovs_mutex_lock(&netdev->mutex);
>      if (!ovs_list_is_empty(&rx->recv_queue)) {
> @@ -1047,10 +1048,12 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
>
>  static int
>  netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
> -                  struct dp_packet **pkts, int cnt, bool may_steal)
> +                  struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_dummy *dev = netdev_dummy_cast(netdev);
>      int error = 0;
> +    int cnt = batch->count;
> +    struct dp_packet **pkts = batch->packets;
>

extra variables


>      int i;
>
>      for (i = 0; i < cnt; i++) {
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index ef11d12..594d437 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -1091,12 +1091,13 @@ netdev_linux_rxq_recv_tap(int fd, struct dp_packet
> *buffer)
>  }
>
>  static int
> -netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
> -                      int *c)
> +netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
>      struct netdev *netdev = rx->up.netdev;
>      struct dp_packet *buffer;
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>

extra variables


>      ssize_t retval;
>      int mtu;
>
> @@ -1161,10 +1162,12 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
>   * expected to do additional queuing of packets. */
>  static int
>  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
> -                  struct dp_packet **pkts, int cnt, bool may_steal)
> +                  struct dp_packet_batch *batch, bool may_steal)
>  {
>      int i;
>      int error = 0;
> +    int cnt = batch->count;
> +    struct dp_packet **pkts = batch->packets;
>

extra variables


>
>      /* 'i' is incremented only if there's no error */
>      for (i = 0; i < cnt;) {
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 5da377f..8c7c8ea 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -340,8 +340,8 @@ struct netdev_class {
>       * network device from being usefully used by the netdev-based
> "userspace
>       * datapath".  It will also prevent the OVS implementation of bonding
> from
>       * working properly over 'netdev'.) */
> -    int (*send)(struct netdev *netdev, int qid, struct dp_packet
> **buffers,
> -                int cnt, bool may_steal);
> +    int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch,
> +                bool may_steal);
>
>      /* Registers with the poll loop to wake up from the next call to
>       * poll_block() when the packet transmission queue for 'netdev' has
> @@ -727,25 +727,24 @@ struct netdev_class {
>      void (*rxq_destruct)(struct netdev_rxq *);
>      void (*rxq_dealloc)(struct netdev_rxq *);
>
> -    /* Attempts to receive a batch of packets from 'rx'.  The caller
> supplies
> -     * 'pkts' as the pointer to the beginning of an array of MAX_RX_BATCH
> -     * pointers to dp_packet.  If successful, the implementation stores
> -     * pointers to up to MAX_RX_BATCH dp_packets into the array,
> transferring
> -     * ownership of the packets to the caller, stores the number of
> received
> -     * packets into '*cnt', and returns 0.
> +    /* Attempts to receive a batch of packets from 'batch'.  In batch, the
>

"packets from 'rx'."


> +     * caller supplies 'packets' as the pointer to the beginning of an
> array
> +     * of MAX_RX_BATCH pointers to dp_packet.  If successful, the
> +     * implementation stores pointers to up to MAX_RX_BATCH dp_packets
> into
> +     * the array, transferring ownership of the packets to the caller,
> stores
> +     * the number of received packets into 'count', and returns 0.
>       *
>       * The implementation does not necessarily initialize any non-data
> members
> -     * of 'pkts'.  That is, the caller must initialize layer pointers and
> -     * metadata itself, if desired, e.g. with pkt_metadata_init() and
> -     * miniflow_extract().
> +     * of 'packets' in 'batch'.  That is, the caller must initialize layer
> +     * pointers and metadata itself, if desired, e.g. with
> pkt_metadata_init()
> +     * and miniflow_extract().
>       *
>       * Implementations should allocate buffers with DP_NETDEV_HEADROOM
> bytes of
>       * headroom.
>       *
>       * Returns EAGAIN immediately if no packet is ready to be received or
>       * another positive errno value if an error was encountered. */
> -    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet **pkts,
> -                    int *cnt);
> +    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch);
>
>      /* Registers with the poll loop to wake up from the next call to
>       * poll_block() when a packet is ready to be received with
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 6651173..405bf41 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -625,7 +625,7 @@ netdev_rxq_recv(struct netdev_rxq *rx, struct
> dp_packet_batch *batch)
>  {
>      int retval;
>
> -    retval = rx->netdev->netdev_class->rxq_recv(rx, batch->packets,
> &batch->count);
> +    retval = rx->netdev->netdev_class->rxq_recv(rx,  batch);
>      if (!retval) {
>          COVERAGE_INC(netdev_received);
>      } else {
> @@ -711,9 +711,7 @@ netdev_send(struct netdev *netdev, int qid, struct
> dp_packet_batch *batch,
>          return EOPNOTSUPP;
>      }
>
> -    int error = netdev->netdev_class->send(netdev, qid,
> -                                           batch->packets, batch->count,
> -                                           may_steal);
> +    int error = netdev->netdev_class->send(netdev, qid, batch, may_steal);
>      if (!error) {
>          COVERAGE_INC(netdev_sent);
>          if (!may_steal) {
> --
> 2.5.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
William Tu July 19, 2016, 12:06 a.m. UTC | #3
Thanks for reviewing the patch. I've updated patchv4 at
https://patchwork.ozlabs.org/patch/649854/

At netdev_dpdk_send__(), we do need to introduce extra variables,
other than that, I've removed the rest of extra variables.

Regards,
William

On Thu, Jul 14, 2016 at 5:34 PM, Daniele Di Proietto
<diproiettod@ovn.org> wrote:
> Hi William,
>
> Thanks for the patch, it makes sense to me!
>
> In general I think it would be better to avoid introducing extra local
> variables like 'pkts' and 'c': I think it would be more readable and the
> compiler might not always be able to optimize them (I checked the assebly
> output in a couple of functions).
>
> Few comments inline, otherwise this looks good to me
>
> 2016-06-29 13:53 GMT-07:00 William Tu <u9012063@gmail.com>:
>>
>> Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
>> batch process functions and 'struct dp_packet_batch' to associate with
>> batch-level metadata.  This patch applies the packet batch object to
>> the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
>> batch APIs can be used in providers.  With batch metadata visible in
>> providers, optimizations can be introduced at per-batch level instead
>> of per-packet.
>>
>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/141178364
>> Signed-off-by: William Tu <u9012063@gmail.com>
>> --
>> v2->v3:
>> - fix freebsd build issue
>> v1->v2:
>> - more descriptive commit message
>> ---
>>  lib/netdev-bsd.c      |  9 ++++--
>>  lib/netdev-dpdk.c     | 81
>> +++++++++++++++++++++++++--------------------------
>>  lib/netdev-dummy.c    |  9 ++++--
>>  lib/netdev-linux.c    |  9 ++++--
>>  lib/netdev-provider.h | 25 ++++++++--------
>>  lib/netdev.c          |  6 ++--
>>  6 files changed, 72 insertions(+), 67 deletions(-)
>>
>> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
>> index 2e92d97..6ab5048 100644
>> --- a/lib/netdev-bsd.c
>> +++ b/lib/netdev-bsd.c
>> @@ -618,12 +618,13 @@ netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd *rxq,
>> struct dp_packet *buffer)
>>  }
>>
>>  static int
>> -netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
>> -                    int *c)
>> +netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_);
>>      struct netdev *netdev = rxq->up.netdev;
>>      struct dp_packet *packet;
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variable
>
>>
>>      ssize_t retval;
>>      int mtu;
>>
>> @@ -681,10 +682,12 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
>>   */
>>  static int
>>  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
>> -                struct dp_packet **pkts, int cnt, bool may_steal)
>> +                struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
>>      const char *name = netdev_get_name(netdev_);
>> +    int cnt = batch->count;
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variable
>
>>
>>      int error;
>>      int i;
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 02e2c58..80ce233 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1253,12 +1253,14 @@ netdev_dpdk_vhost_update_rx_counters(struct
>> netdev_stats *stats,
>>   */
>>  static int
>>  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>> -                           struct dp_packet **packets, int *c)
>> +                           struct dp_packet_batch *batch)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>>      int qid = rxq->queue_id;
>>      struct ingress_policer *policer =
>> netdev_dpdk_get_ingress_policer(dev);
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variable
>
>>
>>      uint16_t nb_rx = 0;
>>      uint16_t dropped = 0;
>>
>> @@ -1294,12 +1296,13 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>>  }
>>
>>  static int
>> -netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
>> -                     int *c)
>> +netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>>      struct ingress_policer *policer =
>> netdev_dpdk_get_ingress_policer(dev);
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variable
>
>>
>>      int nb_rx;
>>      int dropped = 0;
>>
>> @@ -1373,11 +1376,13 @@ netdev_dpdk_vhost_update_tx_counters(struct
>> netdev_stats *stats,
>>
>>  static void
>>  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>> -                         struct dp_packet **pkts, int cnt,
>> +                         struct dp_packet_batch *batch,
>>                           bool may_steal)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>> +    struct dp_packet **pkts = batch->packets;
>> +    int cnt = batch->count;
>
>
> extra variable
>
>>
>>      struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
>>      unsigned int total_pkts = cnt;
>>      unsigned int qos_pkts = cnt;
>> @@ -1425,11 +1430,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
>> qid,
>>
>>  out:
>>      if (may_steal) {
>> -        int i;
>> -
>> -        for (i = 0; i < total_pkts; i++) {
>> -            dp_packet_delete(pkts[i]);
>> -        }
>> +        dp_packet_delete_batch(batch, may_steal);
>>      }
>
>
> As discussed offline the if becomes unnecessary.  Maybe it's more clear if
> we remove it?
>
>>
>>  }
>>
>> @@ -1464,18 +1465,19 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>>
>>  /* Tx function. Transmit packets indefinitely */
>>  static void
>> -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>> -                int cnt)
>> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>> +dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
>> *batch)
>> +OVS_NO_THREAD_SAFETY_ANALYSIS
>>  {
>>  #if !defined(__CHECKER__) && !defined(_WIN32)
>> -    const size_t PKT_ARRAY_SIZE = cnt;
>> +    const size_t PKT_ARRAY_SIZE = batch->count;
>>  #else
>>      /* Sparse or MSVC doesn't like variable length array. */
>>      enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
>>  #endif
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>      struct rte_mbuf *mbufs[PKT_ARRAY_SIZE];
>> +    struct dp_packet **pkts = batch->packets;
>> +    int cnt = batch->count;
>
>
> extra variables
>
>>
>>      int dropped = 0;
>>      int newcnt = 0;
>>      int i;
>> @@ -1519,7 +1521,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>> struct dp_packet **pkts,
>>      }
>>
>>      if (dev->type == DPDK_DEV_VHOST) {
>> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
>> mbufs, newcnt, true);
>> +        struct dp_packet_batch mbatch;
>> +
>> +        dp_packet_batch_init(&mbatch);
>> +        mbatch.count = newcnt;
>> +        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf
>> *));
>> +        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
>
>
> Even though the performance here doesn't really matter tha much (we're on
> dpdk_do_tx_copy,
> meaning that the packet is coming from a non DPDK source or that we have
> multiple outputs),
> I'd still like to avoid the memcpy().
>
> Maybe we can keep the __netdev_dpdk_vhost_send signature as it is (keep the
> packet and count
> parameter separate) to avoid it?
>
>>
>>      } else {
>>          unsigned int qos_pkts = newcnt;
>>
>> @@ -1543,37 +1550,30 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>> struct dp_packet **pkts,
>>  }
>>
>>  static int
>> -netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet
>> **pkts,
>> -                 int cnt, bool may_steal)
>> +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>> +                       struct dp_packet_batch *batch,
>> +                       bool may_steal)
>>  {
>> -    if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
>> -        int i;
>>
>> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
>> +    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
>> +        dpdk_do_tx_copy(netdev, qid, batch);
>>          if (may_steal) {
>> -            for (i = 0; i < cnt; i++) {
>> -                dp_packet_delete(pkts[i]);
>> -            }
>> +            dp_packet_delete_batch(batch, may_steal);
>>          }
>>      } else {
>> -        int i;
>> -
>> -        for (i = 0; i < cnt; i++) {
>> -            int cutlen = dp_packet_get_cutlen(pkts[i]);
>> -
>> -            dp_packet_set_size(pkts[i], dp_packet_size(pkts[i]) -
>> cutlen);
>> -            dp_packet_reset_cutlen(pkts[i]);
>> -        }
>> -        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
>> +        dp_packet_batch_apply_cutlen(batch);
>> +        __netdev_dpdk_vhost_send(netdev, qid, batch, may_steal);
>>      }
>>      return 0;
>>  }
>>
>>  static inline void
>>  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>> -                   struct dp_packet **pkts, int cnt, bool may_steal)
>> +                   struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      int i;
>> +    struct dp_packet **pkts = batch->packets;
>> +    int cnt = batch->count;
>
>
> extra variables
>
>>
>>
>>      if (OVS_UNLIKELY(dev->txq_needs_locking)) {
>>          qid = qid % dev->real_n_txq;
>> @@ -1584,12 +1584,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
>> qid,
>>                       pkts[0]->source != DPBUF_DPDK)) {
>>          struct netdev *netdev = &dev->up;
>>
>> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
>> +        dpdk_do_tx_copy(netdev, qid, batch);
>>
>>          if (may_steal) {
>> -            for (i = 0; i < cnt; i++) {
>> -                dp_packet_delete(pkts[i]);
>> -            }
>> +            dp_packet_delete_batch(batch, may_steal);
>>          }
>>      } else {
>>          int next_tx_idx = 0;
>> @@ -1649,11 +1647,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
>> qid,
>>
>>  static int
>>  netdev_dpdk_eth_send(struct netdev *netdev, int qid,
>> -                     struct dp_packet **pkts, int cnt, bool may_steal)
>> +                     struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
>> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>>      return 0;
>>  }
>>
>> @@ -2648,20 +2646,21 @@ dpdk_ring_open(const char dev_name[], unsigned int
>> *eth_port_id) OVS_REQUIRES(dp
>>
>>  static int
>>  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
>> -                      struct dp_packet **pkts, int cnt, bool may_steal)
>> +                      struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variable
>
>>
>>      unsigned i;
>>
>>      /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure
>> that the
>>       * rss hash field is clear. This is because the same mbuf may be
>> modified by
>>       * the consumer of the ring and return into the datapath without
>> recalculating
>>       * the RSS hash. */
>> -    for (i = 0; i < cnt; i++) {
>> +    for (i = 0; i < batch->count; i++) {
>>          dp_packet_rss_invalidate(pkts[i]);
>>      }
>>
>> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
>> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>>      return 0;
>>  }
>>
>> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
>> index 24c107e..6b1d3a3 100644
>> --- a/lib/netdev-dummy.c
>> +++ b/lib/netdev-dummy.c
>> @@ -964,12 +964,13 @@ netdev_dummy_rxq_dealloc(struct netdev_rxq *rxq_)
>>  }
>>
>>  static int
>> -netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **arr,
>> -                      int *c)
>> +netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_);
>>      struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
>>      struct dp_packet *packet;
>> +    struct dp_packet **arr = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variables
>
>>
>>
>>      ovs_mutex_lock(&netdev->mutex);
>>      if (!ovs_list_is_empty(&rx->recv_queue)) {
>> @@ -1047,10 +1048,12 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
>>
>>  static int
>>  netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
>> -                  struct dp_packet **pkts, int cnt, bool may_steal)
>> +                  struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_dummy *dev = netdev_dummy_cast(netdev);
>>      int error = 0;
>> +    int cnt = batch->count;
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variables
>
>>
>>      int i;
>>
>>      for (i = 0; i < cnt; i++) {
>> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>> index ef11d12..594d437 100644
>> --- a/lib/netdev-linux.c
>> +++ b/lib/netdev-linux.c
>> @@ -1091,12 +1091,13 @@ netdev_linux_rxq_recv_tap(int fd, struct dp_packet
>> *buffer)
>>  }
>>
>>  static int
>> -netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet
>> **packets,
>> -                      int *c)
>> +netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
>>      struct netdev *netdev = rx->up.netdev;
>>      struct dp_packet *buffer;
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variables
>
>>
>>      ssize_t retval;
>>      int mtu;
>>
>> @@ -1161,10 +1162,12 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
>>   * expected to do additional queuing of packets. */
>>  static int
>>  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
>> -                  struct dp_packet **pkts, int cnt, bool may_steal)
>> +                  struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      int i;
>>      int error = 0;
>> +    int cnt = batch->count;
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variables
>
>>
>>
>>      /* 'i' is incremented only if there's no error */
>>      for (i = 0; i < cnt;) {
>> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
>> index 5da377f..8c7c8ea 100644
>> --- a/lib/netdev-provider.h
>> +++ b/lib/netdev-provider.h
>> @@ -340,8 +340,8 @@ struct netdev_class {
>>       * network device from being usefully used by the netdev-based
>> "userspace
>>       * datapath".  It will also prevent the OVS implementation of bonding
>> from
>>       * working properly over 'netdev'.) */
>> -    int (*send)(struct netdev *netdev, int qid, struct dp_packet
>> **buffers,
>> -                int cnt, bool may_steal);
>> +    int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch
>> *batch,
>> +                bool may_steal);
>>
>>      /* Registers with the poll loop to wake up from the next call to
>>       * poll_block() when the packet transmission queue for 'netdev' has
>> @@ -727,25 +727,24 @@ struct netdev_class {
>>      void (*rxq_destruct)(struct netdev_rxq *);
>>      void (*rxq_dealloc)(struct netdev_rxq *);
>>
>> -    /* Attempts to receive a batch of packets from 'rx'.  The caller
>> supplies
>> -     * 'pkts' as the pointer to the beginning of an array of MAX_RX_BATCH
>> -     * pointers to dp_packet.  If successful, the implementation stores
>> -     * pointers to up to MAX_RX_BATCH dp_packets into the array,
>> transferring
>> -     * ownership of the packets to the caller, stores the number of
>> received
>> -     * packets into '*cnt', and returns 0.
>> +    /* Attempts to receive a batch of packets from 'batch'.  In batch,
>> the
>
>
> "packets from 'rx'."
>
>>
>> +     * caller supplies 'packets' as the pointer to the beginning of an
>> array
>> +     * of MAX_RX_BATCH pointers to dp_packet.  If successful, the
>> +     * implementation stores pointers to up to MAX_RX_BATCH dp_packets
>> into
>> +     * the array, transferring ownership of the packets to the caller,
>> stores
>> +     * the number of received packets into 'count', and returns 0.
>>       *
>>       * The implementation does not necessarily initialize any non-data
>> members
>> -     * of 'pkts'.  That is, the caller must initialize layer pointers and
>> -     * metadata itself, if desired, e.g. with pkt_metadata_init() and
>> -     * miniflow_extract().
>> +     * of 'packets' in 'batch'.  That is, the caller must initialize
>> layer
>> +     * pointers and metadata itself, if desired, e.g. with
>> pkt_metadata_init()
>> +     * and miniflow_extract().
>>       *
>>       * Implementations should allocate buffers with DP_NETDEV_HEADROOM
>> bytes of
>>       * headroom.
>>       *
>>       * Returns EAGAIN immediately if no packet is ready to be received or
>>       * another positive errno value if an error was encountered. */
>> -    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet **pkts,
>> -                    int *cnt);
>> +    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch
>> *batch);
>>
>>      /* Registers with the poll loop to wake up from the next call to
>>       * poll_block() when a packet is ready to be received with
>> diff --git a/lib/netdev.c b/lib/netdev.c
>> index 6651173..405bf41 100644
>> --- a/lib/netdev.c
>> +++ b/lib/netdev.c
>> @@ -625,7 +625,7 @@ netdev_rxq_recv(struct netdev_rxq *rx, struct
>> dp_packet_batch *batch)
>>  {
>>      int retval;
>>
>> -    retval = rx->netdev->netdev_class->rxq_recv(rx, batch->packets,
>> &batch->count);
>> +    retval = rx->netdev->netdev_class->rxq_recv(rx,  batch);
>>      if (!retval) {
>>          COVERAGE_INC(netdev_received);
>>      } else {
>> @@ -711,9 +711,7 @@ netdev_send(struct netdev *netdev, int qid, struct
>> dp_packet_batch *batch,
>>          return EOPNOTSUPP;
>>      }
>>
>> -    int error = netdev->netdev_class->send(netdev, qid,
>> -                                           batch->packets, batch->count,
>> -                                           may_steal);
>> +    int error = netdev->netdev_class->send(netdev, qid, batch,
>> may_steal);
>>      if (!error) {
>>          COVERAGE_INC(netdev_sent);
>>          if (!may_steal) {
>> --
>> 2.5.0
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev
>
>
diff mbox

Patch

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 2e92d97..6ab5048 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -618,12 +618,13 @@  netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd *rxq, struct dp_packet *buffer)
 }
 
 static int
-netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
-                    int *c)
+netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_);
     struct netdev *netdev = rxq->up.netdev;
     struct dp_packet *packet;
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     ssize_t retval;
     int mtu;
 
@@ -681,10 +682,12 @@  netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
  */
 static int
 netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
-                struct dp_packet **pkts, int cnt, bool may_steal)
+                struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
     const char *name = netdev_get_name(netdev_);
+    int cnt = batch->count;
+    struct dp_packet **pkts = batch->packets;
     int error;
     int i;
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 02e2c58..80ce233 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1253,12 +1253,14 @@  netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
  */
 static int
 netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
-                           struct dp_packet **packets, int *c)
+                           struct dp_packet_batch *batch)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
     struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
     int qid = rxq->queue_id;
     struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     uint16_t nb_rx = 0;
     uint16_t dropped = 0;
 
@@ -1294,12 +1296,13 @@  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
 }
 
 static int
-netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
-                     int *c)
+netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
     struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
     struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     int nb_rx;
     int dropped = 0;
 
@@ -1373,11 +1376,13 @@  netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
 
 static void
 __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
-                         struct dp_packet **pkts, int cnt,
+                         struct dp_packet_batch *batch,
                          bool may_steal)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
+    struct dp_packet **pkts = batch->packets;
+    int cnt = batch->count;
     struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
     unsigned int total_pkts = cnt;
     unsigned int qos_pkts = cnt;
@@ -1425,11 +1430,7 @@  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 
 out:
     if (may_steal) {
-        int i;
-
-        for (i = 0; i < total_pkts; i++) {
-            dp_packet_delete(pkts[i]);
-        }
+        dp_packet_delete_batch(batch, may_steal);
     }
 }
 
@@ -1464,18 +1465,19 @@  dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
 
 /* Tx function. Transmit packets indefinitely */
 static void
-dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
-                int cnt)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
+dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
+OVS_NO_THREAD_SAFETY_ANALYSIS
 {
 #if !defined(__CHECKER__) && !defined(_WIN32)
-    const size_t PKT_ARRAY_SIZE = cnt;
+    const size_t PKT_ARRAY_SIZE = batch->count;
 #else
     /* Sparse or MSVC doesn't like variable length array. */
     enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct rte_mbuf *mbufs[PKT_ARRAY_SIZE];
+    struct dp_packet **pkts = batch->packets;
+    int cnt = batch->count;
     int dropped = 0;
     int newcnt = 0;
     int i;
@@ -1519,7 +1521,12 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
     }
 
     if (dev->type == DPDK_DEV_VHOST) {
-        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs, newcnt, true);
+        struct dp_packet_batch mbatch;
+
+        dp_packet_batch_init(&mbatch);
+        mbatch.count = newcnt;
+        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf *));
+        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
     } else {
         unsigned int qos_pkts = newcnt;
 
@@ -1543,37 +1550,30 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
 }
 
 static int
-netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet **pkts,
-                 int cnt, bool may_steal)
+netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
+                       struct dp_packet_batch *batch,
+                       bool may_steal)
 {
-    if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
-        int i;
 
-        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
+    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
+        dpdk_do_tx_copy(netdev, qid, batch);
         if (may_steal) {
-            for (i = 0; i < cnt; i++) {
-                dp_packet_delete(pkts[i]);
-            }
+            dp_packet_delete_batch(batch, may_steal);
         }
     } else {
-        int i;
-
-        for (i = 0; i < cnt; i++) {
-            int cutlen = dp_packet_get_cutlen(pkts[i]);
-
-            dp_packet_set_size(pkts[i], dp_packet_size(pkts[i]) - cutlen);
-            dp_packet_reset_cutlen(pkts[i]);
-        }
-        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
+        dp_packet_batch_apply_cutlen(batch);
+        __netdev_dpdk_vhost_send(netdev, qid, batch, may_steal);
     }
     return 0;
 }
 
 static inline void
 netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
-                   struct dp_packet **pkts, int cnt, bool may_steal)
+                   struct dp_packet_batch *batch, bool may_steal)
 {
     int i;
+    struct dp_packet **pkts = batch->packets;
+    int cnt = batch->count;
 
     if (OVS_UNLIKELY(dev->txq_needs_locking)) {
         qid = qid % dev->real_n_txq;
@@ -1584,12 +1584,10 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
                      pkts[0]->source != DPBUF_DPDK)) {
         struct netdev *netdev = &dev->up;
 
-        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
+        dpdk_do_tx_copy(netdev, qid, batch);
 
         if (may_steal) {
-            for (i = 0; i < cnt; i++) {
-                dp_packet_delete(pkts[i]);
-            }
+            dp_packet_delete_batch(batch, may_steal);
         }
     } else {
         int next_tx_idx = 0;
@@ -1649,11 +1647,11 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 
 static int
 netdev_dpdk_eth_send(struct netdev *netdev, int qid,
-                     struct dp_packet **pkts, int cnt, bool may_steal)
+                     struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
     return 0;
 }
 
@@ -2648,20 +2646,21 @@  dpdk_ring_open(const char dev_name[], unsigned int *eth_port_id) OVS_REQUIRES(dp
 
 static int
 netdev_dpdk_ring_send(struct netdev *netdev, int qid,
-                      struct dp_packet **pkts, int cnt, bool may_steal)
+                      struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    struct dp_packet **pkts = batch->packets;
     unsigned i;
 
     /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that the
      * rss hash field is clear. This is because the same mbuf may be modified by
      * the consumer of the ring and return into the datapath without recalculating
      * the RSS hash. */
-    for (i = 0; i < cnt; i++) {
+    for (i = 0; i < batch->count; i++) {
         dp_packet_rss_invalidate(pkts[i]);
     }
 
-    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
     return 0;
 }
 
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 24c107e..6b1d3a3 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -964,12 +964,13 @@  netdev_dummy_rxq_dealloc(struct netdev_rxq *rxq_)
 }
 
 static int
-netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **arr,
-                      int *c)
+netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_);
     struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
     struct dp_packet *packet;
+    struct dp_packet **arr = batch->packets;
+    int *c = &batch->count;
 
     ovs_mutex_lock(&netdev->mutex);
     if (!ovs_list_is_empty(&rx->recv_queue)) {
@@ -1047,10 +1048,12 @@  netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
 
 static int
 netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
-                  struct dp_packet **pkts, int cnt, bool may_steal)
+                  struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_dummy *dev = netdev_dummy_cast(netdev);
     int error = 0;
+    int cnt = batch->count;
+    struct dp_packet **pkts = batch->packets;
     int i;
 
     for (i = 0; i < cnt; i++) {
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index ef11d12..594d437 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1091,12 +1091,13 @@  netdev_linux_rxq_recv_tap(int fd, struct dp_packet *buffer)
 }
 
 static int
-netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
-                      int *c)
+netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
     struct netdev *netdev = rx->up.netdev;
     struct dp_packet *buffer;
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     ssize_t retval;
     int mtu;
 
@@ -1161,10 +1162,12 @@  netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
  * expected to do additional queuing of packets. */
 static int
 netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
-                  struct dp_packet **pkts, int cnt, bool may_steal)
+                  struct dp_packet_batch *batch, bool may_steal)
 {
     int i;
     int error = 0;
+    int cnt = batch->count;
+    struct dp_packet **pkts = batch->packets;
 
     /* 'i' is incremented only if there's no error */
     for (i = 0; i < cnt;) {
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 5da377f..8c7c8ea 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -340,8 +340,8 @@  struct netdev_class {
      * network device from being usefully used by the netdev-based "userspace
      * datapath".  It will also prevent the OVS implementation of bonding from
      * working properly over 'netdev'.) */
-    int (*send)(struct netdev *netdev, int qid, struct dp_packet **buffers,
-                int cnt, bool may_steal);
+    int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
+                bool may_steal);
 
     /* Registers with the poll loop to wake up from the next call to
      * poll_block() when the packet transmission queue for 'netdev' has
@@ -727,25 +727,24 @@  struct netdev_class {
     void (*rxq_destruct)(struct netdev_rxq *);
     void (*rxq_dealloc)(struct netdev_rxq *);
 
-    /* Attempts to receive a batch of packets from 'rx'.  The caller supplies
-     * 'pkts' as the pointer to the beginning of an array of MAX_RX_BATCH
-     * pointers to dp_packet.  If successful, the implementation stores
-     * pointers to up to MAX_RX_BATCH dp_packets into the array, transferring
-     * ownership of the packets to the caller, stores the number of received
-     * packets into '*cnt', and returns 0.
+    /* Attempts to receive a batch of packets from 'batch'.  In batch, the
+     * caller supplies 'packets' as the pointer to the beginning of an array
+     * of MAX_RX_BATCH pointers to dp_packet.  If successful, the
+     * implementation stores pointers to up to MAX_RX_BATCH dp_packets into
+     * the array, transferring ownership of the packets to the caller, stores
+     * the number of received packets into 'count', and returns 0.
      *
      * The implementation does not necessarily initialize any non-data members
-     * of 'pkts'.  That is, the caller must initialize layer pointers and
-     * metadata itself, if desired, e.g. with pkt_metadata_init() and
-     * miniflow_extract().
+     * of 'packets' in 'batch'.  That is, the caller must initialize layer
+     * pointers and metadata itself, if desired, e.g. with pkt_metadata_init()
+     * and miniflow_extract().
      *
      * Implementations should allocate buffers with DP_NETDEV_HEADROOM bytes of
      * headroom.
      *
      * Returns EAGAIN immediately if no packet is ready to be received or
      * another positive errno value if an error was encountered. */
-    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet **pkts,
-                    int *cnt);
+    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch);
 
     /* Registers with the poll loop to wake up from the next call to
      * poll_block() when a packet is ready to be received with
diff --git a/lib/netdev.c b/lib/netdev.c
index 6651173..405bf41 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -625,7 +625,7 @@  netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *batch)
 {
     int retval;
 
-    retval = rx->netdev->netdev_class->rxq_recv(rx, batch->packets, &batch->count);
+    retval = rx->netdev->netdev_class->rxq_recv(rx,  batch);
     if (!retval) {
         COVERAGE_INC(netdev_received);
     } else {
@@ -711,9 +711,7 @@  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
         return EOPNOTSUPP;
     }
 
-    int error = netdev->netdev_class->send(netdev, qid,
-                                           batch->packets, batch->count,
-                                           may_steal);
+    int error = netdev->netdev_class->send(netdev, qid, batch, may_steal);
     if (!error) {
         COVERAGE_INC(netdev_sent);
         if (!may_steal) {