diff mbox

[ovs-dev,PATCHv2] netdev-provider: Apply batch object to netdev provider.

Message ID 1466865867-8235-1-git-send-email-u9012063@gmail.com
State Superseded
Headers show

Commit Message

William Tu June 25, 2016, 2:44 p.m. UTC
Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
batch process functions and 'struct dp_packet_batch' to associate with
batch-level metadata.  This patch applies the packet batch object to
the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
batch APIs can be used in providers.  With batch metadata visible in
providers, optimizations can be introduced at per-batch level instead
of per-packet.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140135888
Signed-off-by: William Tu <u9012063@gmail.com>
--
v1->v2: make commit message more descriptive.
---
 lib/netdev-bsd.c      |  9 ++++--
 lib/netdev-dpdk.c     | 81 +++++++++++++++++++++++++--------------------------
 lib/netdev-dummy.c    |  9 ++++--
 lib/netdev-linux.c    |  9 ++++--
 lib/netdev-provider.h | 25 ++++++++--------
 lib/netdev.c          |  6 ++--
 6 files changed, 72 insertions(+), 67 deletions(-)

Comments

Darrell Ball June 27, 2016, 3:02 a.m. UTC | #1
On Sat, Jun 25, 2016 at 7:44 AM, William Tu <u9012063@gmail.com> wrote:

> Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
> batch process functions and 'struct dp_packet_batch' to associate with
> batch-level metadata.  This patch applies the packet batch object to
> the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
> batch APIs can be used in providers.  With batch metadata visible in
> providers, optimizations can be introduced at per-batch level instead
> of per-packet.
>
> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140135888
> <https://travis-ci.org/williamtu/ovs-travis/builds/140135888 Signed-off-by>


The test shows as failed ?

The code change is replacing an array of ptrs to packets and count with a
packet batch and replacing some code blocks with equivalent existing inline
functions based on batchs.

I have some comments inline.




> Signed-off-by
> <https://travis-ci.org/williamtu/ovs-travis/builds/140135888 Signed-off-by>:
> William Tu <u9012063@gmail.com>
> --
> v1->v2: make commit message more descriptive.
> ---
>  lib/netdev-bsd.c      |  9 ++++--
>  lib/netdev-dpdk.c     | 81
> +++++++++++++++++++++++++--------------------------
>  lib/netdev-dummy.c    |  9 ++++--
>  lib/netdev-linux.c    |  9 ++++--
>  lib/netdev-provider.h | 25 ++++++++--------
>  lib/netdev.c          |  6 ++--
>  6 files changed, 72 insertions(+), 67 deletions(-)
>
> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
> index 2e92d97..1b55b1a 100644
> --- a/lib/netdev-bsd.c
> +++ b/lib/netdev-bsd.c
> @@ -618,12 +618,13 @@ netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd *rxq,
> struct dp_packet *buffer)
>  }
>
>  static int
> -netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
> -                    int *c)
> +netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_);
>      struct netdev *netdev = rxq->up.netdev;
>      struct dp_packet *packet;
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>      ssize_t retval;
>      int mtu;
>
> @@ -681,10 +682,12 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
>   */
>  static int
>  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
> -                struct dp_packet **pkts, int cnt, bool may_steal)
> +                struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
>      const char *name = netdev_get_name(netdev_);
> +    int cnt = batch->count;
> +    struct dp_packet **okts = batch->packets;
>


"okts" is not used in the function; "pkts" is used in the function.
The function will no longer work.



>      int error;
>      int i;
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index ed14a21..3b1e7fa 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1251,12 +1251,14 @@ netdev_dpdk_vhost_update_rx_counters(struct
> netdev_stats *stats,
>   */
>  static int
>  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> -                           struct dp_packet **packets, int *c)
> +                           struct dp_packet_batch *batch)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>      int qid = rxq->queue_id;
>      struct ingress_policer *policer =
> netdev_dpdk_get_ingress_policer(dev);
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>

I see this pattern throughout where extra variables are used in all
functions.
There are 2 local variables for packets and count and also the batch
parameter,
which itself already has reference to packets and count from the start of
the call chain
in netdev_*().

Pre-change, there are two parameters for packets and count derived from the
batch
parameter at the start of the call chain in netdev_*().

These functions are intended to be fast.



>      uint16_t nb_rx = 0;
>      uint16_t dropped = 0;
>
> @@ -1292,12 +1294,13 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>  }
>
>  static int
> -netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
> -                     int *c)
> +netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>      struct ingress_policer *policer =
> netdev_dpdk_get_ingress_policer(dev);
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>      int nb_rx;
>      int dropped = 0;
>
> @@ -1371,11 +1374,13 @@ netdev_dpdk_vhost_update_tx_counters(struct
> netdev_stats *stats,
>
>  static void
>  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> -                         struct dp_packet **pkts, int cnt,
> +                         struct dp_packet_batch *batch,
>                           bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> +    struct dp_packet **pkts = batch->packets;
> +    int cnt = batch->count;
>      struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
>      unsigned int total_pkts = cnt;
>      unsigned int qos_pkts = cnt;
> @@ -1423,11 +1428,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>
>  out:
>      if (may_steal) {
> -        int i;
> -
> -        for (i = 0; i < total_pkts; i++) {
> -            dp_packet_delete(pkts[i]);
> -        }
> +        dp_packet_delete_batch(batch, may_steal);
>      }
>  }
>
> @@ -1462,18 +1463,19 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>
>  /* Tx function. Transmit packets indefinitely */
>  static void
> -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
> -                int cnt)
> -    OVS_NO_THREAD_SAFETY_ANALYSIS
> +dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch)
> +OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>  #if !defined(__CHECKER__) && !defined(_WIN32)
> -    const size_t PKT_ARRAY_SIZE = cnt;
> +    const size_t PKT_ARRAY_SIZE = batch->count;
>  #else
>      /* Sparse or MSVC doesn't like variable length array. */
>      enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
>  #endif
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct rte_mbuf *mbufs[PKT_ARRAY_SIZE];
> +    struct dp_packet **pkts = batch->packets;
> +    int cnt = batch->count;
>      int dropped = 0;
>      int newcnt = 0;
>      int i;
> @@ -1517,7 +1519,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet **pkts,
>      }
>
>      if (dev->type == DPDK_DEV_VHOST) {
> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> mbufs, newcnt, true);
> +        struct dp_packet_batch mbatch;
> +
> +        dp_packet_batch_init(&mbatch);
> +        mbatch.count = newcnt;
> +        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf
> *));

+        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
>

This looks more expensive to me in terms of processing performance in this
caller especially and also the callee.
This code is intended to be fast.



>      } else {
>          unsigned int qos_pkts = newcnt;
>
> @@ -1541,37 +1548,30 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet **pkts,
>  }
>
>  static int
> -netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet
> **pkts,
> -                 int cnt, bool may_steal)
> +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> +                       struct dp_packet_batch *batch,
> +                       bool may_steal)
>  {
> -    if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
> -        int i;
>
> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
> +    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> +        dpdk_do_tx_copy(netdev, qid, batch);
>          if (may_steal) {
> -            for (i = 0; i < cnt; i++) {
> -                dp_packet_delete(pkts[i]);
> -            }
> +            dp_packet_delete_batch(batch, may_steal);
>          }
>      } else {
> -        int i;
> -
> -        for (i = 0; i < cnt; i++) {
> -            int cutlen = dp_packet_get_cutlen(pkts[i]);
> -
> -            dp_packet_set_size(pkts[i], dp_packet_size(pkts[i]) - cutlen);
> -            dp_packet_reset_cutlen(pkts[i]);
> -        }
> -        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
> +        dp_packet_batch_apply_cutlen(batch);
> +        __netdev_dpdk_vhost_send(netdev, qid, batch, may_steal);
>      }
>      return 0;
>  }
>
>  static inline void
>  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> -                   struct dp_packet **pkts, int cnt, bool may_steal)
> +                   struct dp_packet_batch *batch, bool may_steal)
>  {
>      int i;
> +    struct dp_packet **pkts = batch->packets;
> +    int cnt = batch->count;
>
>      if (OVS_UNLIKELY(dev->txq_needs_locking)) {
>          qid = qid % dev->real_n_txq;
> @@ -1582,12 +1582,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
> qid,
>                       pkts[0]->source != DPBUF_DPDK)) {
>          struct netdev *netdev = &dev->up;
>
> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
> +        dpdk_do_tx_copy(netdev, qid, batch);
>
>          if (may_steal) {
> -            for (i = 0; i < cnt; i++) {
> -                dp_packet_delete(pkts[i]);
> -            }
> +            dp_packet_delete_batch(batch, may_steal);
>          }
>      } else {
>          int next_tx_idx = 0;
> @@ -1647,11 +1645,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
> qid,
>
>  static int
>  netdev_dpdk_eth_send(struct netdev *netdev, int qid,
> -                     struct dp_packet **pkts, int cnt, bool may_steal)
> +                     struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>      return 0;
>  }
>
> @@ -2646,20 +2644,21 @@ dpdk_ring_open(const char dev_name[], unsigned int
> *eth_port_id) OVS_REQUIRES(dp
>
>  static int
>  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
> -                      struct dp_packet **pkts, int cnt, bool may_steal)
> +                      struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    struct dp_packet **pkts = batch->packets;
>      unsigned i;
>
>      /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure
> that the
>       * rss hash field is clear. This is because the same mbuf may be
> modified by
>       * the consumer of the ring and return into the datapath without
> recalculating
>       * the RSS hash. */
> -    for (i = 0; i < cnt; i++) {
> +    for (i = 0; i < batch->count; i++) {
>          dp_packet_rss_invalidate(pkts[i]);
>      }
>
> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>      return 0;
>  }
>
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 127b6ae..63fc0b3 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -951,12 +951,13 @@ netdev_dummy_rxq_dealloc(struct netdev_rxq *rxq_)
>  }
>
>  static int
> -netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **arr,
> -                      int *c)
> +netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_);
>      struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
>      struct dp_packet *packet;
> +    struct dp_packet **arr = batch->packets;
> +    int *c = &batch->count;
>
>      ovs_mutex_lock(&netdev->mutex);
>      if (!ovs_list_is_empty(&rx->recv_queue)) {
> @@ -1034,10 +1035,12 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
>
>  static int
>  netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
> -                  struct dp_packet **pkts, int cnt, bool may_steal)
> +                  struct dp_packet_batch *batch, bool may_steal)
>  {
>      struct netdev_dummy *dev = netdev_dummy_cast(netdev);
>      int error = 0;
> +    int cnt = batch->count;
> +    struct dp_packet **pkts = batch->packets;
>      int i;
>
>      for (i = 0; i < cnt; i++) {
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index ef11d12..594d437 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -1091,12 +1091,13 @@ netdev_linux_rxq_recv_tap(int fd, struct dp_packet
> *buffer)
>  }
>
>  static int
> -netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
> -                      int *c)
> +netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
> *batch)
>  {
>      struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
>      struct netdev *netdev = rx->up.netdev;
>      struct dp_packet *buffer;
> +    struct dp_packet **packets = batch->packets;
> +    int *c = &batch->count;
>      ssize_t retval;
>      int mtu;
>
> @@ -1161,10 +1162,12 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
>   * expected to do additional queuing of packets. */
>  static int
>  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
> -                  struct dp_packet **pkts, int cnt, bool may_steal)
> +                  struct dp_packet_batch *batch, bool may_steal)
>  {
>      int i;
>      int error = 0;
> +    int cnt = batch->count;
> +    struct dp_packet **pkts = batch->packets;
>
>      /* 'i' is incremented only if there's no error */
>      for (i = 0; i < cnt;) {
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index 5da377f..8c7c8ea 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -340,8 +340,8 @@ struct netdev_class {
>       * network device from being usefully used by the netdev-based
> "userspace
>       * datapath".  It will also prevent the OVS implementation of bonding
> from
>       * working properly over 'netdev'.) */
> -    int (*send)(struct netdev *netdev, int qid, struct dp_packet
> **buffers,
> -                int cnt, bool may_steal);
> +    int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch,
> +                bool may_steal);
>
>      /* Registers with the poll loop to wake up from the next call to
>       * poll_block() when the packet transmission queue for 'netdev' has
> @@ -727,25 +727,24 @@ struct netdev_class {
>      void (*rxq_destruct)(struct netdev_rxq *);
>      void (*rxq_dealloc)(struct netdev_rxq *);
>
> -    /* Attempts to receive a batch of packets from 'rx'.  The caller
> supplies
> -     * 'pkts' as the pointer to the beginning of an array of MAX_RX_BATCH
> -     * pointers to dp_packet.  If successful, the implementation stores
> -     * pointers to up to MAX_RX_BATCH dp_packets into the array,
> transferring
> -     * ownership of the packets to the caller, stores the number of
> received
> -     * packets into '*cnt', and returns 0.
> +    /* Attempts to receive a batch of packets from 'batch'.  In batch, the
> +     * caller supplies 'packets' as the pointer to the beginning of an
> array
> +     * of MAX_RX_BATCH pointers to dp_packet.  If successful, the
> +     * implementation stores pointers to up to MAX_RX_BATCH dp_packets
> into
> +     * the array, transferring ownership of the packets to the caller,
> stores
> +     * the number of received packets into 'count', and returns 0.
>       *
>       * The implementation does not necessarily initialize any non-data
> members
> -     * of 'pkts'.  That is, the caller must initialize layer pointers and
> -     * metadata itself, if desired, e.g. with pkt_metadata_init() and
> -     * miniflow_extract().
> +     * of 'packets' in 'batch'.  That is, the caller must initialize layer
> +     * pointers and metadata itself, if desired, e.g. with
> pkt_metadata_init()
> +     * and miniflow_extract().
>       *
>       * Implementations should allocate buffers with DP_NETDEV_HEADROOM
> bytes of
>       * headroom.
>       *
>       * Returns EAGAIN immediately if no packet is ready to be received or
>       * another positive errno value if an error was encountered. */
> -    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet **pkts,
> -                    int *cnt);
> +    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch);
>
>      /* Registers with the poll loop to wake up from the next call to
>       * poll_block() when a packet is ready to be received with
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 6651173..405bf41 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -625,7 +625,7 @@ netdev_rxq_recv(struct netdev_rxq *rx, struct
> dp_packet_batch *batch)
>  {
>      int retval;
>
> -    retval = rx->netdev->netdev_class->rxq_recv(rx, batch->packets,
> &batch->count);
> +    retval = rx->netdev->netdev_class->rxq_recv(rx,  batch);
>      if (!retval) {
>          COVERAGE_INC(netdev_received);
>      } else {
> @@ -711,9 +711,7 @@ netdev_send(struct netdev *netdev, int qid, struct
> dp_packet_batch *batch,
>          return EOPNOTSUPP;
>      }
>
> -    int error = netdev->netdev_class->send(netdev, qid,
> -                                           batch->packets, batch->count,
> -                                           may_steal);
> +    int error = netdev->netdev_class->send(netdev, qid, batch, may_steal);
>      if (!error) {
>          COVERAGE_INC(netdev_sent);
>          if (!may_steal) {
> --
> 2.5.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
William Tu June 27, 2016, 4:54 a.m. UTC | #2
Hi Darrell,

Thanks for you feedback!

On Sun, Jun 26, 2016 at 8:02 PM, Darrell Ball <dlu998@gmail.com> wrote:
>
>
> On Sat, Jun 25, 2016 at 7:44 AM, William Tu <u9012063@gmail.com> wrote:
>>
>> Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
>> batch process functions and 'struct dp_packet_batch' to associate with
>> batch-level metadata.  This patch applies the packet batch object to
>> the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
>> batch APIs can be used in providers.  With batch metadata visible in
>> providers, optimizations can be introduced at per-batch level instead
>> of per-packet.
>>
>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140135888
>
>
> The test shows as failed ?

it's due to one of the OVN testcases, which sometimes fails.

>
> The code change is replacing an array of ptrs to packets and count with a
> packet batch and replacing some code blocks with equivalent existing inline
> functions based on batchs.
>
> I have some comments inline.
>
>> @@ -681,10 +682,12 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
>>   */
>>  static int
>>  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
>> -                struct dp_packet **pkts, int cnt, bool may_steal)
>> +                struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
>>      const char *name = netdev_get_name(netdev_);
>> +    int cnt = batch->count;
>> +    struct dp_packet **okts = batch->packets;
>
>
>
> "okts" is not used in the function; "pkts" is used in the function.
> The function will no longer work.

my mistake, will fix it in next version. thanks!

>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index ed14a21..3b1e7fa 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1251,12 +1251,14 @@ netdev_dpdk_vhost_update_rx_counters(struct
>> netdev_stats *stats,
>>   */
>>  static int
>>  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>> -                           struct dp_packet **packets, int *c)
>> +                           struct dp_packet_batch *batch)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>>      int qid = rxq->queue_id;
>>      struct ingress_policer *policer =
>> netdev_dpdk_get_ingress_policer(dev);
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> I see this pattern throughout where extra variables are used in all
> functions.
> There are 2 local variables for packets and count and also the batch
> parameter,
> which itself already has reference to packets and count from the start of
> the call chain
> in netdev_*().
>
> Pre-change, there are two parameters for packets and count derived from the
> batch
> parameter at the start of the call chain in netdev_*().
>
> These functions are intended to be fast.

I don't think adding these 2 extra local variables will have overhead,
I believe it's the same because compiler will optimize it.

>> @@ -1517,7 +1519,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>> struct dp_packet **pkts,
>>      }
>>
>>      if (dev->type == DPDK_DEV_VHOST) {
>> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
>> mbufs, newcnt, true);
>> +        struct dp_packet_batch mbatch;
>> +
>> +        dp_packet_batch_init(&mbatch);
>> +        mbatch.count = newcnt;
>> +        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf
>> *));
>>
>> +        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
>
>
> This looks more expensive to me in terms of processing performance in this
> caller especially and also the callee.
> This code is intended to be fast.

This isn't copying the packet contents but only the array of 8byte
pointers. It definitely has some overhead but should be minor.

Regards,
William
Darrell Ball June 27, 2016, 4:44 p.m. UTC | #3
On Sun, Jun 26, 2016 at 9:54 PM, William Tu <u9012063@gmail.com> wrote:

> Hi Darrell,
>
> Thanks for you feedback!
>
> On Sun, Jun 26, 2016 at 8:02 PM, Darrell Ball <dlu998@gmail.com> wrote:
> >
> >
> > On Sat, Jun 25, 2016 at 7:44 AM, William Tu <u9012063@gmail.com> wrote:
> >>
> >> Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
> >> batch process functions and 'struct dp_packet_batch' to associate with
> >> batch-level metadata.  This patch applies the packet batch object to
> >> the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
> >> batch APIs can be used in providers.  With batch metadata visible in
> >> providers, optimizations can be introduced at per-batch level instead
> >> of per-packet.
> >>
> >> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/140135888
> >
> >
> > The test shows as failed ?
>
> it's due to one of the OVN testcases, which sometimes fails.
>
> >
> > The code change is replacing an array of ptrs to packets and count with a
> > packet batch and replacing some code blocks with equivalent existing
> inline
> > functions based on batchs.
> >
> > I have some comments inline.
> >
> >> @@ -681,10 +682,12 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
> >>   */
> >>  static int
> >>  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
> >> -                struct dp_packet **pkts, int cnt, bool may_steal)
> >> +                struct dp_packet_batch *batch, bool may_steal)
> >>  {
> >>      struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
> >>      const char *name = netdev_get_name(netdev_);
> >> +    int cnt = batch->count;
> >> +    struct dp_packet **okts = batch->packets;
> >
> >
> >
> > "okts" is not used in the function; "pkts" is used in the function.
> > The function will no longer work.
>
> my mistake, will fix it in next version. thanks!
>

Please check that it builds and test



>
> >
> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> >> index ed14a21..3b1e7fa 100644
> >> --- a/lib/netdev-dpdk.c
> >> +++ b/lib/netdev-dpdk.c
> >> @@ -1251,12 +1251,14 @@ netdev_dpdk_vhost_update_rx_counters(struct
> >> netdev_stats *stats,
> >>   */
> >>  static int
> >>  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> >> -                           struct dp_packet **packets, int *c)
> >> +                           struct dp_packet_batch *batch)
> >>  {
> >>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> >>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> >>      int qid = rxq->queue_id;
> >>      struct ingress_policer *policer =
> >> netdev_dpdk_get_ingress_policer(dev);
> >> +    struct dp_packet **packets = batch->packets;
> >> +    int *c = &batch->count;
> >
> >
> > I see this pattern throughout where extra variables are used in all
> > functions.
> > There are 2 local variables for packets and count and also the batch
> > parameter,
> > which itself already has reference to packets and count from the start of
> > the call chain
> > in netdev_*().
> >
> > Pre-change, there are two parameters for packets and count derived from
> the
> > batch
> > parameter at the start of the call chain in netdev_*().
> >
> > These functions are intended to be fast.
>
> I don't think adding these 2 extra local variables will have overhead,
> I believe it's the same because compiler will optimize it.
>

Please don't make that general assumption



>
> >> @@ -1517,7 +1519,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> >> struct dp_packet **pkts,
> >>      }
> >>
> >>      if (dev->type == DPDK_DEV_VHOST) {
> >> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> >> mbufs, newcnt, true);
> >> +        struct dp_packet_batch mbatch;
> >> +
> >> +        dp_packet_batch_init(&mbatch);
> >> +        mbatch.count = newcnt;
> >> +        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf
> >> *));
> >>
> >> +        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
> >
> >
> > This looks more expensive to me in terms of processing performance in
> this
> > caller especially and also the callee.
> > This code is intended to be fast.
>
> This isn't copying the packet contents but only the array of 8byte
> pointers.


8 bytes vs 4 bytes vs ... depends on your system arch.


> It definitely has some overhead but should be minor.
>


To avoid any further confusion, below is the datastructure,
where the packet pointer array is sized at 32, today.

So it is a 256 bytes (for 8 byte addresses) allocation and memcpy
for each batch.

enum { NETDEV_MAX_BURST = 32 }

struct dp_packet_batch {
    int count;
    bool trunc; /* true if the batch needs truncate. */
    >>>>  struct dp_packet *packets[NETDEV_MAX_BURST]; <<<<
};

The point is this is extra overhead for code that tries to be fast.



>
> Regards,
> William
>
diff mbox

Patch

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 2e92d97..1b55b1a 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -618,12 +618,13 @@  netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd *rxq, struct dp_packet *buffer)
 }
 
 static int
-netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
-                    int *c)
+netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_);
     struct netdev *netdev = rxq->up.netdev;
     struct dp_packet *packet;
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     ssize_t retval;
     int mtu;
 
@@ -681,10 +682,12 @@  netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
  */
 static int
 netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
-                struct dp_packet **pkts, int cnt, bool may_steal)
+                struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
     const char *name = netdev_get_name(netdev_);
+    int cnt = batch->count;
+    struct dp_packet **okts = batch->packets;
     int error;
     int i;
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index ed14a21..3b1e7fa 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1251,12 +1251,14 @@  netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
  */
 static int
 netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
-                           struct dp_packet **packets, int *c)
+                           struct dp_packet_batch *batch)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
     struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
     int qid = rxq->queue_id;
     struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     uint16_t nb_rx = 0;
     uint16_t dropped = 0;
 
@@ -1292,12 +1294,13 @@  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
 }
 
 static int
-netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
-                     int *c)
+netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
     struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
     struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     int nb_rx;
     int dropped = 0;
 
@@ -1371,11 +1374,13 @@  netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
 
 static void
 __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
-                         struct dp_packet **pkts, int cnt,
+                         struct dp_packet_batch *batch,
                          bool may_steal)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
+    struct dp_packet **pkts = batch->packets;
+    int cnt = batch->count;
     struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
     unsigned int total_pkts = cnt;
     unsigned int qos_pkts = cnt;
@@ -1423,11 +1428,7 @@  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 
 out:
     if (may_steal) {
-        int i;
-
-        for (i = 0; i < total_pkts; i++) {
-            dp_packet_delete(pkts[i]);
-        }
+        dp_packet_delete_batch(batch, may_steal);
     }
 }
 
@@ -1462,18 +1463,19 @@  dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
 
 /* Tx function. Transmit packets indefinitely */
 static void
-dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
-                int cnt)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
+dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
+OVS_NO_THREAD_SAFETY_ANALYSIS
 {
 #if !defined(__CHECKER__) && !defined(_WIN32)
-    const size_t PKT_ARRAY_SIZE = cnt;
+    const size_t PKT_ARRAY_SIZE = batch->count;
 #else
     /* Sparse or MSVC doesn't like variable length array. */
     enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct rte_mbuf *mbufs[PKT_ARRAY_SIZE];
+    struct dp_packet **pkts = batch->packets;
+    int cnt = batch->count;
     int dropped = 0;
     int newcnt = 0;
     int i;
@@ -1517,7 +1519,12 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
     }
 
     if (dev->type == DPDK_DEV_VHOST) {
-        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs, newcnt, true);
+        struct dp_packet_batch mbatch;
+
+        dp_packet_batch_init(&mbatch);
+        mbatch.count = newcnt;
+        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf *));
+        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
     } else {
         unsigned int qos_pkts = newcnt;
 
@@ -1541,37 +1548,30 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
 }
 
 static int
-netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet **pkts,
-                 int cnt, bool may_steal)
+netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
+                       struct dp_packet_batch *batch,
+                       bool may_steal)
 {
-    if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
-        int i;
 
-        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
+    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
+        dpdk_do_tx_copy(netdev, qid, batch);
         if (may_steal) {
-            for (i = 0; i < cnt; i++) {
-                dp_packet_delete(pkts[i]);
-            }
+            dp_packet_delete_batch(batch, may_steal);
         }
     } else {
-        int i;
-
-        for (i = 0; i < cnt; i++) {
-            int cutlen = dp_packet_get_cutlen(pkts[i]);
-
-            dp_packet_set_size(pkts[i], dp_packet_size(pkts[i]) - cutlen);
-            dp_packet_reset_cutlen(pkts[i]);
-        }
-        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
+        dp_packet_batch_apply_cutlen(batch);
+        __netdev_dpdk_vhost_send(netdev, qid, batch, may_steal);
     }
     return 0;
 }
 
 static inline void
 netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
-                   struct dp_packet **pkts, int cnt, bool may_steal)
+                   struct dp_packet_batch *batch, bool may_steal)
 {
     int i;
+    struct dp_packet **pkts = batch->packets;
+    int cnt = batch->count;
 
     if (OVS_UNLIKELY(dev->txq_needs_locking)) {
         qid = qid % dev->real_n_txq;
@@ -1582,12 +1582,10 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
                      pkts[0]->source != DPBUF_DPDK)) {
         struct netdev *netdev = &dev->up;
 
-        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
+        dpdk_do_tx_copy(netdev, qid, batch);
 
         if (may_steal) {
-            for (i = 0; i < cnt; i++) {
-                dp_packet_delete(pkts[i]);
-            }
+            dp_packet_delete_batch(batch, may_steal);
         }
     } else {
         int next_tx_idx = 0;
@@ -1647,11 +1645,11 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 
 static int
 netdev_dpdk_eth_send(struct netdev *netdev, int qid,
-                     struct dp_packet **pkts, int cnt, bool may_steal)
+                     struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
     return 0;
 }
 
@@ -2646,20 +2644,21 @@  dpdk_ring_open(const char dev_name[], unsigned int *eth_port_id) OVS_REQUIRES(dp
 
 static int
 netdev_dpdk_ring_send(struct netdev *netdev, int qid,
-                      struct dp_packet **pkts, int cnt, bool may_steal)
+                      struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    struct dp_packet **pkts = batch->packets;
     unsigned i;
 
     /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that the
      * rss hash field is clear. This is because the same mbuf may be modified by
      * the consumer of the ring and return into the datapath without recalculating
      * the RSS hash. */
-    for (i = 0; i < cnt; i++) {
+    for (i = 0; i < batch->count; i++) {
         dp_packet_rss_invalidate(pkts[i]);
     }
 
-    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
     return 0;
 }
 
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 127b6ae..63fc0b3 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -951,12 +951,13 @@  netdev_dummy_rxq_dealloc(struct netdev_rxq *rxq_)
 }
 
 static int
-netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **arr,
-                      int *c)
+netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_);
     struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
     struct dp_packet *packet;
+    struct dp_packet **arr = batch->packets;
+    int *c = &batch->count;
 
     ovs_mutex_lock(&netdev->mutex);
     if (!ovs_list_is_empty(&rx->recv_queue)) {
@@ -1034,10 +1035,12 @@  netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
 
 static int
 netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
-                  struct dp_packet **pkts, int cnt, bool may_steal)
+                  struct dp_packet_batch *batch, bool may_steal)
 {
     struct netdev_dummy *dev = netdev_dummy_cast(netdev);
     int error = 0;
+    int cnt = batch->count;
+    struct dp_packet **pkts = batch->packets;
     int i;
 
     for (i = 0; i < cnt; i++) {
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index ef11d12..594d437 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1091,12 +1091,13 @@  netdev_linux_rxq_recv_tap(int fd, struct dp_packet *buffer)
 }
 
 static int
-netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
-                      int *c)
+netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch)
 {
     struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
     struct netdev *netdev = rx->up.netdev;
     struct dp_packet *buffer;
+    struct dp_packet **packets = batch->packets;
+    int *c = &batch->count;
     ssize_t retval;
     int mtu;
 
@@ -1161,10 +1162,12 @@  netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
  * expected to do additional queuing of packets. */
 static int
 netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
-                  struct dp_packet **pkts, int cnt, bool may_steal)
+                  struct dp_packet_batch *batch, bool may_steal)
 {
     int i;
     int error = 0;
+    int cnt = batch->count;
+    struct dp_packet **pkts = batch->packets;
 
     /* 'i' is incremented only if there's no error */
     for (i = 0; i < cnt;) {
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 5da377f..8c7c8ea 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -340,8 +340,8 @@  struct netdev_class {
      * network device from being usefully used by the netdev-based "userspace
      * datapath".  It will also prevent the OVS implementation of bonding from
      * working properly over 'netdev'.) */
-    int (*send)(struct netdev *netdev, int qid, struct dp_packet **buffers,
-                int cnt, bool may_steal);
+    int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
+                bool may_steal);
 
     /* Registers with the poll loop to wake up from the next call to
      * poll_block() when the packet transmission queue for 'netdev' has
@@ -727,25 +727,24 @@  struct netdev_class {
     void (*rxq_destruct)(struct netdev_rxq *);
     void (*rxq_dealloc)(struct netdev_rxq *);
 
-    /* Attempts to receive a batch of packets from 'rx'.  The caller supplies
-     * 'pkts' as the pointer to the beginning of an array of MAX_RX_BATCH
-     * pointers to dp_packet.  If successful, the implementation stores
-     * pointers to up to MAX_RX_BATCH dp_packets into the array, transferring
-     * ownership of the packets to the caller, stores the number of received
-     * packets into '*cnt', and returns 0.
+    /* Attempts to receive a batch of packets from 'batch'.  In batch, the
+     * caller supplies 'packets' as the pointer to the beginning of an array
+     * of MAX_RX_BATCH pointers to dp_packet.  If successful, the
+     * implementation stores pointers to up to MAX_RX_BATCH dp_packets into
+     * the array, transferring ownership of the packets to the caller, stores
+     * the number of received packets into 'count', and returns 0.
      *
      * The implementation does not necessarily initialize any non-data members
-     * of 'pkts'.  That is, the caller must initialize layer pointers and
-     * metadata itself, if desired, e.g. with pkt_metadata_init() and
-     * miniflow_extract().
+     * of 'packets' in 'batch'.  That is, the caller must initialize layer
+     * pointers and metadata itself, if desired, e.g. with pkt_metadata_init()
+     * and miniflow_extract().
      *
      * Implementations should allocate buffers with DP_NETDEV_HEADROOM bytes of
      * headroom.
      *
      * Returns EAGAIN immediately if no packet is ready to be received or
      * another positive errno value if an error was encountered. */
-    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet **pkts,
-                    int *cnt);
+    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch *batch);
 
     /* Registers with the poll loop to wake up from the next call to
      * poll_block() when a packet is ready to be received with
diff --git a/lib/netdev.c b/lib/netdev.c
index 6651173..405bf41 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -625,7 +625,7 @@  netdev_rxq_recv(struct netdev_rxq *rx, struct dp_packet_batch *batch)
 {
     int retval;
 
-    retval = rx->netdev->netdev_class->rxq_recv(rx, batch->packets, &batch->count);
+    retval = rx->netdev->netdev_class->rxq_recv(rx,  batch);
     if (!retval) {
         COVERAGE_INC(netdev_received);
     } else {
@@ -711,9 +711,7 @@  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
         return EOPNOTSUPP;
     }
 
-    int error = netdev->netdev_class->send(netdev, qid,
-                                           batch->packets, batch->count,
-                                           may_steal);
+    int error = netdev->netdev_class->send(netdev, qid, batch, may_steal);
     if (!error) {
         COVERAGE_INC(netdev_sent);
         if (!may_steal) {