[ovs-dev,v4,3/7] netdev: Remove unused may_steal.

Message ID 1507215962-17692-4-git-send-email-i.maximets@samsung.com
State Superseded
Headers show
Series
  • Output packet batching.
Related show

Commit Message

Ilya Maximets Oct. 5, 2017, 3:05 p.m.
Not needed anymore because 'may_steal' already handled on
dpif-netdev layer and always true;

Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
---
 lib/dpif-netdev.c     |  2 +-
 lib/netdev-bsd.c      |  4 ++--
 lib/netdev-dpdk.c     | 65 +++++++++++++++++++++------------------------------
 lib/netdev-dummy.c    |  4 ++--
 lib/netdev-linux.c    |  4 ++--
 lib/netdev-provider.h |  7 +++---
 lib/netdev.c          | 12 ++++------
 lib/netdev.h          |  2 +-
 8 files changed, 42 insertions(+), 58 deletions(-)

Comments

Gao Zhenyu Oct. 8, 2017, 9:32 a.m. | #1
Hi llya,

  Thanks for working it. Your patch tried to eliminate the may_steal in
dpdk qos, because may_steal handled on dpif-netdev layer and always true.
  But in function dpdk_do_tx_copy, it set the may_steal to false, because
the packet buffer were not allocated by dpdk side so cannot released
by netdev_dpdk_policer_run(). Otherwise it may hit coredump. Did you test
that scenario?

Thanks
Zhenyu Gao



2017-10-05 23:05 GMT+08:00 Ilya Maximets <i.maximets@samsung.com>:

> Not needed anymore because 'may_steal' already handled on
> dpif-netdev layer and always true;
>
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
> ---
>  lib/dpif-netdev.c     |  2 +-
>  lib/netdev-bsd.c      |  4 ++--
>  lib/netdev-dpdk.c     | 65 +++++++++++++++++++++---------
> ---------------------
>  lib/netdev-dummy.c    |  4 ++--
>  lib/netdev-linux.c    |  4 ++--
>  lib/netdev-provider.h |  7 +++---
>  lib/netdev.c          | 12 ++++------
>  lib/netdev.h          |  2 +-
>  8 files changed, 42 insertions(+), 58 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c6f5c23..166b73a 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3251,7 +3251,7 @@ dp_netdev_pmd_flush_output_on_port(struct
> dp_netdev_pmd_thread *pmd,
>          tx_qid = pmd->static_tx_qid;
>      }
>
> -    netdev_send(p->port->netdev, tx_qid, &p->output_pkts, true,
> dynamic_txqs);
> +    netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs);
>      dp_packet_batch_init(&p->output_pkts);
>  }
>
> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
> index 8a4cdb3..4f243b5 100644
> --- a/lib/netdev-bsd.c
> +++ b/lib/netdev-bsd.c
> @@ -680,7 +680,7 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
>   */
>  static int
>  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
> -                struct dp_packet_batch *batch, bool may_steal,
> +                struct dp_packet_batch *batch,
>                  bool concurrent_txq OVS_UNUSED)
>  {
>      struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
> @@ -728,7 +728,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid
> OVS_UNUSED,
>      }
>
>      ovs_mutex_unlock(&dev->mutex);
> -    dp_packet_delete_batch(batch, may_steal);
> +    dp_packet_delete_batch(batch, true);
>
>      return error;
>  }
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index c60f46f..011c6f7 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
>       * For all QoS implementations it should always be non-null.
>       */
>      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> -                   int pkt_cnt, bool may_steal);
> +                   int pkt_cnt);
>  };
>
>  /* dpdk_qos_ops for each type of user space QoS implementation */
> @@ -1522,8 +1522,7 @@ netdev_dpdk_policer_pkt_handle(struct
> rte_meter_srtcm *meter,
>
>  static int
>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> -                        struct rte_mbuf **pkts, int pkt_cnt,
> -                        bool may_steal)
> +                        struct rte_mbuf **pkts, int pkt_cnt)
>  {
>      int i = 0;
>      int cnt = 0;
> @@ -1539,9 +1538,7 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>              }
>              cnt++;
>          } else {
> -            if (may_steal) {
> -                rte_pktmbuf_free(pkt);
> -            }
> +            rte_pktmbuf_free(pkt);
>          }
>      }
>
> @@ -1550,13 +1547,12 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>
>  static int
>  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> **pkts,
> -                    int pkt_cnt, bool may_steal)
> +                    int pkt_cnt)
>  {
>      int cnt = 0;
>
>      rte_spinlock_lock(&policer->policer_lock);
> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> -                                  pkt_cnt, may_steal);
> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
>      rte_spinlock_unlock(&policer->policer_lock);
>
>      return cnt;
> @@ -1660,7 +1656,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx, true);
> +                                    nb_rx);
>          dropped -= nb_rx;
>      }
>
> @@ -1699,7 +1695,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> dp_packet_batch *batch)
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx, true);
> +                                    nb_rx);
>          dropped -= nb_rx;
>      }
>
> @@ -1717,14 +1713,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> struct dp_packet_batch *batch)
>  }
>
>  static inline int
> -netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> -                    int cnt, bool may_steal)
> +netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts, int
> cnt)
>  {
>      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *,
> &dev->qos_conf);
>
>      if (qos_conf) {
>          rte_spinlock_lock(&qos_conf->lock);
> -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
> +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
>          rte_spinlock_unlock(&qos_conf->lock);
>      }
>
> @@ -1797,8 +1792,8 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>      rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
>
>      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> -    /* Check has QoS has been configured for the netdev */
> -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
> +    /* Check if QoS has been configured for the netdev. */
> +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
>      dropped = total_pkts - cnt;
>
>      do {
> @@ -1847,18 +1842,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)
>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
>      uint32_t cnt = batch_cnt;
>      uint32_t dropped = 0;
> +    uint32_t txcnt = 0;
>
>      if (dev->type != DPDK_DEV_VHOST) {
>          /* Check if QoS has been configured for this netdev. */
>          cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **)
> batch->packets,
> -                                  batch_cnt, false);
> +                                  batch_cnt);
>          dropped += batch_cnt - cnt;
>      }
>
>      dp_packet_batch_apply_cutlen(batch);
>
> -    uint32_t txcnt = 0;
> -
>      for (uint32_t i = 0; i < cnt; i++) {
>          struct dp_packet *packet = batch->packets[i];
>          uint32_t size = dp_packet_size(packet);
> @@ -1904,12 +1898,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)
>  static int
>  netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>                         struct dp_packet_batch *batch,
> -                       bool may_steal, bool concurrent_txq OVS_UNUSED)
> +                       bool concurrent_txq OVS_UNUSED)
>  {
>
> -    if (OVS_UNLIKELY(!may_steal || batch->packets[0]->source !=
> DPBUF_DPDK)) {
> +    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
>          dpdk_do_tx_copy(netdev, qid, batch);
> -        dp_packet_delete_batch(batch, may_steal);
> +        dp_packet_delete_batch(batch, true);
>      } else {
>          dp_packet_batch_apply_cutlen(batch);
>          __netdev_dpdk_vhost_send(netdev, qid, batch->packets,
> batch->count);
> @@ -1919,11 +1913,11 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>
>  static inline void
>  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> -                   struct dp_packet_batch *batch, bool may_steal,
> +                   struct dp_packet_batch *batch,
>                     bool concurrent_txq)
>  {
>      if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
> -        dp_packet_delete_batch(batch, may_steal);
> +        dp_packet_delete_batch(batch, true);
>          return;
>      }
>
> @@ -1932,12 +1926,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
> qid,
>          rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
>      }
>
> -    if (OVS_UNLIKELY(!may_steal ||
> -                     batch->packets[0]->source != DPBUF_DPDK)) {
> +    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
>          struct netdev *netdev = &dev->up;
>
>          dpdk_do_tx_copy(netdev, qid, batch);
> -        dp_packet_delete_batch(batch, may_steal);
> +        dp_packet_delete_batch(batch, true);
>      } else {
>          int tx_cnt, dropped;
>          int batch_cnt = dp_packet_batch_size(batch);
> @@ -1946,7 +1939,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>          dp_packet_batch_apply_cutlen(batch);
>
>          tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
> -        tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
> +        tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt);
>          dropped = batch_cnt - tx_cnt;
>
>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
> @@ -1965,12 +1958,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
> qid,
>
>  static int
>  netdev_dpdk_eth_send(struct netdev *netdev, int qid,
> -                     struct dp_packet_batch *batch, bool may_steal,
> -                     bool concurrent_txq)
> +                     struct dp_packet_batch *batch, bool concurrent_txq)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
> -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
> +    netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
>      return 0;
>  }
>
> @@ -2937,8 +2929,7 @@ dpdk_ring_open(const char dev_name[], dpdk_port_t
> *eth_port_id)
>
>  static int
>  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
> -                      struct dp_packet_batch *batch, bool may_steal,
> -                      bool concurrent_txq)
> +                      struct dp_packet_batch *batch, bool concurrent_txq)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct dp_packet *packet;
> @@ -2951,7 +2942,7 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
>          dp_packet_mbuf_rss_flag_reset(packet);
>      }
>
> -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
> +    netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
>      return 0;
>  }
>
> @@ -3163,15 +3154,13 @@ egress_policer_qos_is_equal(const struct qos_conf
> *conf,
>  }
>
>  static int
> -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt,
> -                   bool may_steal)
> +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt)
>  {
>      int cnt = 0;
>      struct egress_policer *policer =
>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
>
> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> -                                  pkt_cnt, may_steal);
> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
>
>      return cnt;
>  }
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index f731af1..57ef13f 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -1062,7 +1062,7 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
>
>  static int
>  netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
> -                  struct dp_packet_batch *batch, bool may_steal,
> +                  struct dp_packet_batch *batch,
>                    bool concurrent_txq OVS_UNUSED)
>  {
>      struct netdev_dummy *dev = netdev_dummy_cast(netdev);
> @@ -1132,7 +1132,7 @@ netdev_dummy_send(struct netdev *netdev, int qid
> OVS_UNUSED,
>          ovs_mutex_unlock(&dev->mutex);
>      }
>
> -    dp_packet_delete_batch(batch, may_steal);
> +    dp_packet_delete_batch(batch, true);
>
>      return error;
>  }
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 2ff3e2b..aaf4899 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -1269,7 +1269,7 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
>   * expected to do additional queuing of packets. */
>  static int
>  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
> -                  struct dp_packet_batch *batch, bool may_steal,
> +                  struct dp_packet_batch *batch,
>                    bool concurrent_txq OVS_UNUSED)
>  {
>      int error = 0;
> @@ -1305,7 +1305,7 @@ netdev_linux_send(struct netdev *netdev_, int qid
> OVS_UNUSED,
>      }
>
>  free_batch:
> -    dp_packet_delete_batch(batch, may_steal);
> +    dp_packet_delete_batch(batch, true);
>      return error;
>  }
>
> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
> index b3c57d5..e6ec79a 100644
> --- a/lib/netdev-provider.h
> +++ b/lib/netdev-provider.h
> @@ -347,9 +347,8 @@ struct netdev_class {
>       * If the function returns a non-zero value, some of the packets
> might have
>       * been sent anyway.
>       *
> -     * If 'may_steal' is false, the caller retains ownership of all the
> -     * packets.  If 'may_steal' is true, the caller transfers ownership
> of all
> -     * the packets to the network device, regardless of success.
> +     * The caller transfers ownership of all the packets to the network
> +     * device, regardless of success.
>       *
>       * If 'concurrent_txq' is true, the caller may perform concurrent
> calls
>       * to netdev_send() with the same 'qid'. The netdev provider is
> responsible
> @@ -369,7 +368,7 @@ struct netdev_class {
>       * datapath".  It will also prevent the OVS implementation of bonding
> from
>       * working properly over 'netdev'.) */
>      int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch,
> -                bool may_steal, bool concurrent_txq);
> +                bool concurrent_txq);
>
>      /* Registers with the poll loop to wake up from the next call to
>       * poll_block() when the packet transmission queue for 'netdev' has
> diff --git a/lib/netdev.c b/lib/netdev.c
> index b4e570b..fa0c9fc 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -771,9 +771,8 @@ netdev_get_pt_mode(const struct netdev *netdev)
>   * If the function returns a non-zero value, some of the packets might
> have
>   * been sent anyway.
>   *
> - * If 'may_steal' is false, the caller retains ownership of all the
> packets.
> - * If 'may_steal' is true, the caller transfers ownership of all the
> packets
> - * to the network device, regardless of success.
> + * The caller transfers ownership of all the packets to the network
> device,
> + * regardless of success.
>   *
>   * If 'concurrent_txq' is true, the caller may perform concurrent calls
>   * to netdev_send() with the same 'qid'. The netdev provider is
> responsible
> @@ -790,15 +789,12 @@ netdev_get_pt_mode(const struct netdev *netdev)
>   * cases this function will always return EOPNOTSUPP. */
>  int
>  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
> -            bool may_steal, bool concurrent_txq)
> +            bool concurrent_txq)
>  {
> -    int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
> +    int error = netdev->netdev_class->send(netdev, qid, batch,
>                                             concurrent_txq);
>      if (!error) {
>          COVERAGE_INC(netdev_sent);
> -        if (!may_steal) {
> -            dp_packet_batch_reset_cutlen(batch);
> -        }
>      }
>      return error;
>  }
> diff --git a/lib/netdev.h b/lib/netdev.h
> index f8482f7..bdcacf5 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -181,7 +181,7 @@ int netdev_rxq_drain(struct netdev_rxq *);
>
>  /* Packet transmission. */
>  int netdev_send(struct netdev *, int qid, struct dp_packet_batch *,
> -                bool may_steal, bool concurrent_txq);
> +                bool concurrent_txq);
>  void netdev_send_wait(struct netdev *, int qid);
>
>  /* Flow offloading. */
> --
> 2.7.4
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Ilya Maximets Oct. 9, 2017, 6:16 a.m. | #2
On 08.10.2017 12:32, Gao Zhenyu wrote:
> Hi llya,
> 
>   Thanks for working it. Your patch tried to eliminate the may_steal in dpdk qos, because may_steal handled on dpif-netdev layer and always true.
>   But in function dpdk_do_tx_copy, it set the may_steal to false, because the packet buffer were not allocated by dpdk side so cannot released by netdev_dpdk_policer_run(). Otherwise it may hit coredump. Did you test that scenario?
> 
> Thanks
> Zhenyu Gao

Good catch. Thanks.

Following incremental can be used to fix the issue:
----------------------------------------------------------------------------
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 300a0ae..3352ae2 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1538,7 +1538,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
             }
             cnt++;
         } else {
-            rte_pktmbuf_free(pkt);
+            /* In case of calling from 'dpdk_do_tx_copy' 'pkt' could be not
+             * a DPDK allocated mbuf. */
+            dp_packet_delete((struct dp_packet *) pkt);
         }
     }
 
----------------------------------------------------------------------------


Best regards, Ilya Maximets.
Gao Zhenyu Oct. 9, 2017, 6:42 a.m. | #3
But the netdev_dpdk_send__ function may release whole batch packets.

static inline void
netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
                   struct dp_packet_batch *batch, bool may_steal,
                   bool concurrent_txq)
{
.......
    if (OVS_UNLIKELY(!may_steal ||
                     batch->packets[0]->source != DPBUF_DPDK)) {
        struct netdev *netdev = &dev->up;

        dpdk_do_tx_copy(netdev, qid, batch);  <-------your patch releases
some packets in a batch
        dp_packet_delete_batch(batch, may_steal); <-----------it  releases
all packets in this batch, may hit issue I think
    }

2017-10-09 14:16 GMT+08:00 Ilya Maximets <i.maximets@samsung.com>:

> On 08.10.2017 12:32, Gao Zhenyu wrote:
> > Hi llya,
> >
> >   Thanks for working it. Your patch tried to eliminate the may_steal in
> dpdk qos, because may_steal handled on dpif-netdev layer and always true.
> >   But in function dpdk_do_tx_copy, it set the may_steal to false,
> because the packet buffer were not allocated by dpdk side so cannot
> released by netdev_dpdk_policer_run(). Otherwise it may hit coredump. Did
> you test that scenario?
> >
> > Thanks
> > Zhenyu Gao
>
> Good catch. Thanks.
>
> Following incremental can be used to fix the issue:
> ------------------------------------------------------------
> ----------------
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 300a0ae..3352ae2 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1538,7 +1538,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>              }
>              cnt++;
>          } else {
> -            rte_pktmbuf_free(pkt);
> +            /* In case of calling from 'dpdk_do_tx_copy' 'pkt' could be
> not
> +             * a DPDK allocated mbuf. */
> +            dp_packet_delete((struct dp_packet *) pkt);
>          }
>      }
>
> ------------------------------------------------------------
> ----------------
>
>
> Best regards, Ilya Maximets.
>
Ilya Maximets Oct. 9, 2017, 6:49 a.m. | #4
On 09.10.2017 09:16, Ilya Maximets wrote:
> On 08.10.2017 12:32, Gao Zhenyu wrote:
>> Hi llya,
>>
>>   Thanks for working it. Your patch tried to eliminate the may_steal in dpdk qos, because may_steal handled on dpif-netdev layer and always true.
>>   But in function dpdk_do_tx_copy, it set the may_steal to false, because the packet buffer were not allocated by dpdk side so cannot released by netdev_dpdk_policer_run(). Otherwise it may hit coredump. Did you test that scenario?
>>
>> Thanks
>> Zhenyu Gao
> 
> Good catch. Thanks.
> 
> Following incremental can be used to fix the issue:
> ----------------------------------------------------------------------------
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 300a0ae..3352ae2 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1538,7 +1538,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>              }
>              cnt++;
>          } else {
> -            rte_pktmbuf_free(pkt);
> +            /* In case of calling from 'dpdk_do_tx_copy' 'pkt' could be not
> +             * a DPDK allocated mbuf. */
> +            dp_packet_delete((struct dp_packet *) pkt);
>          }
>      }
>  
> ----------------------------------------------------------------------------
> 
> 
> Best regards, Ilya Maximets.
> 


Sorry, one more change is required to avoid double free:

----------------------------------------------------------------------------
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 3352ae2..bb8bbf3 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1851,6 +1851,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
         cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
                                   batch_cnt);
         dropped += batch_cnt - cnt;
+        batch->count = cnt;
     }
 
     for (uint32_t i = 0; i < cnt; i++) {
----------------------------------------------------------------------------


OTOH, 'may_steal' in QoS is a different entity. This patch is intended to
remove 'may_steal' from netdev API, but this is an internal to netdev-dpdk
variable. So, we can keep it in this patch set and use patch from v3.

Beside that, I think, some cleanup is needed here anyway.

Best regards, Ilya Maximets.
Ilya Maximets Oct. 9, 2017, 6:50 a.m. | #5
On 09.10.2017 09:42, Gao Zhenyu wrote:
> But the netdev_dpdk_send__ function may release whole batch packets.

Yes, sure. Look at reply to my previous message.

> 
> static inline void
> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>                    struct dp_packet_batch *batch, bool may_steal,
>                    bool concurrent_txq)
> {
> .......
>     if (OVS_UNLIKELY(!may_steal ||
>                      batch->packets[0]->source != DPBUF_DPDK)) {
>         struct netdev *netdev = &dev->up;
> 
>         dpdk_do_tx_copy(netdev, qid, batch);  <-------your patch releases some packets in a batch
>         dp_packet_delete_batch(batch, may_steal); <-----------it  releases all packets in this batch, may hit issue I think
>     }
> 
> 2017-10-09 14:16 GMT+08:00 Ilya Maximets <i.maximets@samsung.com <mailto:i.maximets@samsung.com>>:
> 
>     On 08.10.2017 12:32, Gao Zhenyu wrote:
>     > Hi llya,
>     >
>     >   Thanks for working it. Your patch tried to eliminate the may_steal in dpdk qos, because may_steal handled on dpif-netdev layer and always true.
>     >   But in function dpdk_do_tx_copy, it set the may_steal to false, because the packet buffer were not allocated by dpdk side so cannot released by netdev_dpdk_policer_run(). Otherwise it may hit coredump. Did you test that scenario?
>     >
>     > Thanks
>     > Zhenyu Gao
> 
>     Good catch. Thanks.
> 
>     Following incremental can be used to fix the issue:
>     ----------------------------------------------------------------------------
>     diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>     index 300a0ae..3352ae2 100644
>     --- a/lib/netdev-dpdk.c
>     +++ b/lib/netdev-dpdk.c
>     @@ -1538,7 +1538,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
>                  }
>                  cnt++;
>              } else {
>     -            rte_pktmbuf_free(pkt);
>     +            /* In case of calling from 'dpdk_do_tx_copy' 'pkt' could be not
>     +             * a DPDK allocated mbuf. */
>     +            dp_packet_delete((struct dp_packet *) pkt);
>              }
>          }
> 
>     ----------------------------------------------------------------------------
> 
> 
>     Best regards, Ilya Maximets.
> 
>
Eelco Chaudron Oct. 11, 2017, 9:07 a.m. | #6
On 05/10/17 17:05, Ilya Maximets wrote:
> Not needed anymore because 'may_steal' already handled on
> dpif-netdev layer and always true;
>
> Signed-off-by: Ilya Maximets <i.maximets@samsung.com>
This change looks good to me.

Acked-by: Eelco Chaudron <echaudro@redhat.com>

---8<---

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c6f5c23..166b73a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3251,7 +3251,7 @@  dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd,
         tx_qid = pmd->static_tx_qid;
     }
 
-    netdev_send(p->port->netdev, tx_qid, &p->output_pkts, true, dynamic_txqs);
+    netdev_send(p->port->netdev, tx_qid, &p->output_pkts, dynamic_txqs);
     dp_packet_batch_init(&p->output_pkts);
 }
 
diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 8a4cdb3..4f243b5 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -680,7 +680,7 @@  netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
  */
 static int
 netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
-                struct dp_packet_batch *batch, bool may_steal,
+                struct dp_packet_batch *batch,
                 bool concurrent_txq OVS_UNUSED)
 {
     struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
@@ -728,7 +728,7 @@  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
     }
 
     ovs_mutex_unlock(&dev->mutex);
-    dp_packet_delete_batch(batch, may_steal);
+    dp_packet_delete_batch(batch, true);
 
     return error;
 }
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index c60f46f..011c6f7 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -279,7 +279,7 @@  struct dpdk_qos_ops {
      * For all QoS implementations it should always be non-null.
      */
     int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
-                   int pkt_cnt, bool may_steal);
+                   int pkt_cnt);
 };
 
 /* dpdk_qos_ops for each type of user space QoS implementation */
@@ -1522,8 +1522,7 @@  netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
 
 static int
 netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
-                        struct rte_mbuf **pkts, int pkt_cnt,
-                        bool may_steal)
+                        struct rte_mbuf **pkts, int pkt_cnt)
 {
     int i = 0;
     int cnt = 0;
@@ -1539,9 +1538,7 @@  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
             }
             cnt++;
         } else {
-            if (may_steal) {
-                rte_pktmbuf_free(pkt);
-            }
+            rte_pktmbuf_free(pkt);
         }
     }
 
@@ -1550,13 +1547,12 @@  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
 
 static int
 ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
-                    int pkt_cnt, bool may_steal)
+                    int pkt_cnt)
 {
     int cnt = 0;
 
     rte_spinlock_lock(&policer->policer_lock);
-    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
-                                  pkt_cnt, may_steal);
+    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
     rte_spinlock_unlock(&policer->policer_lock);
 
     return cnt;
@@ -1660,7 +1656,7 @@  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
         dropped = nb_rx;
         nb_rx = ingress_policer_run(policer,
                                     (struct rte_mbuf **) batch->packets,
-                                    nb_rx, true);
+                                    nb_rx);
         dropped -= nb_rx;
     }
 
@@ -1699,7 +1695,7 @@  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
         dropped = nb_rx;
         nb_rx = ingress_policer_run(policer,
                                     (struct rte_mbuf **) batch->packets,
-                                    nb_rx, true);
+                                    nb_rx);
         dropped -= nb_rx;
     }
 
@@ -1717,14 +1713,13 @@  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
 }
 
 static inline int
-netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
-                    int cnt, bool may_steal)
+netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts, int cnt)
 {
     struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);
 
     if (qos_conf) {
         rte_spinlock_lock(&qos_conf->lock);
-        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
+        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
         rte_spinlock_unlock(&qos_conf->lock);
     }
 
@@ -1797,8 +1792,8 @@  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
     rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
 
     cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
-    /* Check has QoS has been configured for the netdev */
-    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
+    /* Check if QoS has been configured for the netdev. */
+    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
     dropped = total_pkts - cnt;
 
     do {
@@ -1847,18 +1842,17 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
     struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
     uint32_t cnt = batch_cnt;
     uint32_t dropped = 0;
+    uint32_t txcnt = 0;
 
     if (dev->type != DPDK_DEV_VHOST) {
         /* Check if QoS has been configured for this netdev. */
         cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
-                                  batch_cnt, false);
+                                  batch_cnt);
         dropped += batch_cnt - cnt;
     }
 
     dp_packet_batch_apply_cutlen(batch);
 
-    uint32_t txcnt = 0;
-
     for (uint32_t i = 0; i < cnt; i++) {
         struct dp_packet *packet = batch->packets[i];
         uint32_t size = dp_packet_size(packet);
@@ -1904,12 +1898,12 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 static int
 netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
                        struct dp_packet_batch *batch,
-                       bool may_steal, bool concurrent_txq OVS_UNUSED)
+                       bool concurrent_txq OVS_UNUSED)
 {
 
-    if (OVS_UNLIKELY(!may_steal || batch->packets[0]->source != DPBUF_DPDK)) {
+    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
         dpdk_do_tx_copy(netdev, qid, batch);
-        dp_packet_delete_batch(batch, may_steal);
+        dp_packet_delete_batch(batch, true);
     } else {
         dp_packet_batch_apply_cutlen(batch);
         __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch->count);
@@ -1919,11 +1913,11 @@  netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 
 static inline void
 netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
-                   struct dp_packet_batch *batch, bool may_steal,
+                   struct dp_packet_batch *batch,
                    bool concurrent_txq)
 {
     if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
-        dp_packet_delete_batch(batch, may_steal);
+        dp_packet_delete_batch(batch, true);
         return;
     }
 
@@ -1932,12 +1926,11 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
     }
 
-    if (OVS_UNLIKELY(!may_steal ||
-                     batch->packets[0]->source != DPBUF_DPDK)) {
+    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
         struct netdev *netdev = &dev->up;
 
         dpdk_do_tx_copy(netdev, qid, batch);
-        dp_packet_delete_batch(batch, may_steal);
+        dp_packet_delete_batch(batch, true);
     } else {
         int tx_cnt, dropped;
         int batch_cnt = dp_packet_batch_size(batch);
@@ -1946,7 +1939,7 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         dp_packet_batch_apply_cutlen(batch);
 
         tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
-        tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
+        tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt);
         dropped = batch_cnt - tx_cnt;
 
         dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
@@ -1965,12 +1958,11 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 
 static int
 netdev_dpdk_eth_send(struct netdev *netdev, int qid,
-                     struct dp_packet_batch *batch, bool may_steal,
-                     bool concurrent_txq)
+                     struct dp_packet_batch *batch, bool concurrent_txq)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+    netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
     return 0;
 }
 
@@ -2937,8 +2929,7 @@  dpdk_ring_open(const char dev_name[], dpdk_port_t *eth_port_id)
 
 static int
 netdev_dpdk_ring_send(struct netdev *netdev, int qid,
-                      struct dp_packet_batch *batch, bool may_steal,
-                      bool concurrent_txq)
+                      struct dp_packet_batch *batch, bool concurrent_txq)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     struct dp_packet *packet;
@@ -2951,7 +2942,7 @@  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
         dp_packet_mbuf_rss_flag_reset(packet);
     }
 
-    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+    netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
     return 0;
 }
 
@@ -3163,15 +3154,13 @@  egress_policer_qos_is_equal(const struct qos_conf *conf,
 }
 
 static int
-egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt,
-                   bool may_steal)
+egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int pkt_cnt)
 {
     int cnt = 0;
     struct egress_policer *policer =
         CONTAINER_OF(conf, struct egress_policer, qos_conf);
 
-    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
-                                  pkt_cnt, may_steal);
+    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
 
     return cnt;
 }
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index f731af1..57ef13f 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1062,7 +1062,7 @@  netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
 
 static int
 netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
-                  struct dp_packet_batch *batch, bool may_steal,
+                  struct dp_packet_batch *batch,
                   bool concurrent_txq OVS_UNUSED)
 {
     struct netdev_dummy *dev = netdev_dummy_cast(netdev);
@@ -1132,7 +1132,7 @@  netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
         ovs_mutex_unlock(&dev->mutex);
     }
 
-    dp_packet_delete_batch(batch, may_steal);
+    dp_packet_delete_batch(batch, true);
 
     return error;
 }
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2ff3e2b..aaf4899 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1269,7 +1269,7 @@  netdev_linux_tap_batch_send(struct netdev *netdev_,
  * expected to do additional queuing of packets. */
 static int
 netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
-                  struct dp_packet_batch *batch, bool may_steal,
+                  struct dp_packet_batch *batch,
                   bool concurrent_txq OVS_UNUSED)
 {
     int error = 0;
@@ -1305,7 +1305,7 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
     }
 
 free_batch:
-    dp_packet_delete_batch(batch, may_steal);
+    dp_packet_delete_batch(batch, true);
     return error;
 }
 
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index b3c57d5..e6ec79a 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -347,9 +347,8 @@  struct netdev_class {
      * If the function returns a non-zero value, some of the packets might have
      * been sent anyway.
      *
-     * If 'may_steal' is false, the caller retains ownership of all the
-     * packets.  If 'may_steal' is true, the caller transfers ownership of all
-     * the packets to the network device, regardless of success.
+     * The caller transfers ownership of all the packets to the network
+     * device, regardless of success.
      *
      * If 'concurrent_txq' is true, the caller may perform concurrent calls
      * to netdev_send() with the same 'qid'. The netdev provider is responsible
@@ -369,7 +368,7 @@  struct netdev_class {
      * datapath".  It will also prevent the OVS implementation of bonding from
      * working properly over 'netdev'.) */
     int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
-                bool may_steal, bool concurrent_txq);
+                bool concurrent_txq);
 
     /* Registers with the poll loop to wake up from the next call to
      * poll_block() when the packet transmission queue for 'netdev' has
diff --git a/lib/netdev.c b/lib/netdev.c
index b4e570b..fa0c9fc 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -771,9 +771,8 @@  netdev_get_pt_mode(const struct netdev *netdev)
  * If the function returns a non-zero value, some of the packets might have
  * been sent anyway.
  *
- * If 'may_steal' is false, the caller retains ownership of all the packets.
- * If 'may_steal' is true, the caller transfers ownership of all the packets
- * to the network device, regardless of success.
+ * The caller transfers ownership of all the packets to the network device,
+ * regardless of success.
  *
  * If 'concurrent_txq' is true, the caller may perform concurrent calls
  * to netdev_send() with the same 'qid'. The netdev provider is responsible
@@ -790,15 +789,12 @@  netdev_get_pt_mode(const struct netdev *netdev)
  * cases this function will always return EOPNOTSUPP. */
 int
 netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
-            bool may_steal, bool concurrent_txq)
+            bool concurrent_txq)
 {
-    int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
+    int error = netdev->netdev_class->send(netdev, qid, batch,
                                            concurrent_txq);
     if (!error) {
         COVERAGE_INC(netdev_sent);
-        if (!may_steal) {
-            dp_packet_batch_reset_cutlen(batch);
-        }
     }
     return error;
 }
diff --git a/lib/netdev.h b/lib/netdev.h
index f8482f7..bdcacf5 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -181,7 +181,7 @@  int netdev_rxq_drain(struct netdev_rxq *);
 
 /* Packet transmission. */
 int netdev_send(struct netdev *, int qid, struct dp_packet_batch *,
-                bool may_steal, bool concurrent_txq);
+                bool concurrent_txq);
 void netdev_send_wait(struct netdev *, int qid);
 
 /* Flow offloading. */