diff mbox series

[ovs-dev,RFC] netdev-dpdk: Narrow down txq critical section.

Message ID 20191202162431.16575-1-david.marchand@redhat.com
State RFC
Headers show
Series [ovs-dev,RFC] netdev-dpdk: Narrow down txq critical section. | expand

Commit Message

David Marchand Dec. 2, 2019, 4:24 p.m. UTC
tx_lock protects the NIC/vhost queue from concurrent access.
Move it closer to the parts it protects and let packets duplication (when
source is not DPDK) and the egress policer run out of it.

Signed-off-by: David Marchand <david.marchand@redhat.com>
---
I caught this by code review, but I imagine this could make the
contention on the tx lock even worse if broadcasting packets.

---
 lib/netdev-dpdk.c | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

Comments

Ilya Maximets Dec. 3, 2019, 11:17 a.m. UTC | #1
On 02.12.2019 17:24, David Marchand wrote:
> tx_lock protects the NIC/vhost queue from concurrent access.
> Move it closer to the parts it protects and let packets duplication (when
> source is not DPDK) and the egress policer run out of it.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> ---
> I caught this by code review, but I imagine this could make the
> contention on the tx lock even worse if broadcasting packets.

Idea is OK, I think.  I didn't look a t the code, but, IIRC, there was
already a similar patch in a mail-list.. Hard to find.

> 
> ---
>  lib/netdev-dpdk.c | 43 +++++++++++++++++++++++--------------------
>  1 file changed, 23 insertions(+), 20 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 4c9f122b0..9dd43f2a9 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2053,10 +2053,15 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
>   * Returns the number of packets that weren't transmitted. */
>  static inline int
>  netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> -                         struct rte_mbuf **pkts, int cnt)
> +                         struct rte_mbuf **pkts, int cnt, bool concurrent_txq)
>  {
>      uint32_t nb_tx = 0;
>  
> +    if (OVS_UNLIKELY(concurrent_txq)) {
> +        qid = qid % dev->up.n_txq;
> +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> +    }
> +
>      while (nb_tx != cnt) {
>          uint32_t ret;
>  
> @@ -2068,6 +2073,10 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>          nb_tx += ret;
>      }
>  
> +    if (OVS_UNLIKELY(concurrent_txq)) {
> +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> +    }
> +
>      if (OVS_UNLIKELY(nb_tx != cnt)) {
>          /* Free buffers, which we couldn't transmit, one at a time (each
>           * packet could come from a different mempool) */
> @@ -2412,11 +2421,6 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>          goto out;
>      }
>  
> -    if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
> -        COVERAGE_INC(vhost_tx_contention);
> -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> -    }
> -
>      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
>      sw_stats_add.tx_mtu_exceeded_drops = total_packets - cnt;
>  
> @@ -2427,6 +2431,11 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>  
>      n_packets_to_free = cnt;
>  
> +    if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
> +        COVERAGE_INC(vhost_tx_contention);
> +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> +    }
> +
>      do {
>          int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
>          unsigned int tx_pkts;
> @@ -2468,7 +2477,8 @@ out:
>  
>  /* Tx function. Transmit packets indefinitely */
>  static void
> -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
> +dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
> +                bool concurrent_txq)
>      OVS_NO_THREAD_SAFETY_ANALYSIS
>  {
>      const size_t batch_cnt = dp_packet_batch_size(batch);
> @@ -2527,7 +2537,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>              __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
>                                       txcnt);
>          } else {
> -            tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
> +            tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt,
> +                                                  concurrent_txq);
>          }
>      }
>  
> @@ -2549,7 +2560,7 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>  {
>  
>      if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> -        dpdk_do_tx_copy(netdev, qid, batch);
> +        dpdk_do_tx_copy(netdev, qid, batch, true);
>          dp_packet_delete_batch(batch, true);
>      } else {
>          __netdev_dpdk_vhost_send(netdev, qid, batch->packets,
> @@ -2568,15 +2579,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>          return;
>      }
>  
> -    if (OVS_UNLIKELY(concurrent_txq)) {
> -        qid = qid % dev->up.n_txq;
> -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> -    }
> -
>      if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
>          struct netdev *netdev = &dev->up;
>  
> -        dpdk_do_tx_copy(netdev, qid, batch);
> +        dpdk_do_tx_copy(netdev, qid, batch, concurrent_txq);
>          dp_packet_delete_batch(batch, true);
>      } else {
>          struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
> @@ -2591,7 +2597,8 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>          tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
>          qos_drops -= tx_cnt;
>  
> -        tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
> +        tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt,
> +                                              concurrent_txq);
>  
>          dropped = tx_failure + mtu_drops + qos_drops;
>          if (OVS_UNLIKELY(dropped)) {
> @@ -2603,10 +2610,6 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>              rte_spinlock_unlock(&dev->stats_lock);
>          }
>      }
> -
> -    if (OVS_UNLIKELY(concurrent_txq)) {
> -        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> -    }
>  }
>  
>  static int
>
David Marchand Dec. 3, 2019, 12:40 p.m. UTC | #2
On Tue, Dec 3, 2019 at 12:17 PM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 02.12.2019 17:24, David Marchand wrote:
> > tx_lock protects the NIC/vhost queue from concurrent access.
> > Move it closer to the parts it protects and let packets duplication (when
> > source is not DPDK) and the egress policer run out of it.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > ---
> > I caught this by code review, but I imagine this could make the
> > contention on the tx lock even worse if broadcasting packets.
>
> Idea is OK, I think.  I didn't look a t the code, but, IIRC, there was
> already a similar patch in a mail-list.. Hard to find.

Ah indeed, I found this:
https://mail.openvswitch.org/pipermail/ovs-dev/2019-January/355743.html
But I did not find it in patchwork, not sure why, copied its author.

My RFC patch goes one step further, since it also moves the
allocations out of the lock.


--
David Marchand
Li RongQing Dec. 4, 2019, 2:03 a.m. UTC | #3
> -----邮件原件-----
> 发件人: David Marchand [mailto:david.marchand@redhat.com]
> 发送时间: 2019年12月3日 20:40
> 收件人: Ilya Maximets <i.maximets@ovn.org>
> 抄送: ovs dev <dev@openvswitch.org>; Ian Stokes <ian.stokes@intel.com>;
> Li,Rongqing <lirongqing@baidu.com>
> 主题: Re: [RFC PATCH] netdev-dpdk: Narrow down txq critical section.
> 
> On Tue, Dec 3, 2019 at 12:17 PM Ilya Maximets <i.maximets@ovn.org> wrote:
> >
> > On 02.12.2019 17:24, David Marchand wrote:
> > > tx_lock protects the NIC/vhost queue from concurrent access.
> > > Move it closer to the parts it protects and let packets duplication
> > > (when source is not DPDK) and the egress policer run out of it.
> > >
> > > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > > ---
> > > I caught this by code review, but I imagine this could make the
> > > contention on the tx lock even worse if broadcasting packets.
> >
> > Idea is OK, I think.  I didn't look a t the code, but, IIRC, there was
> > already a similar patch in a mail-list.. Hard to find.
> 
> Ah indeed, I found this:
> https://mail.openvswitch.org/pipermail/ovs-dev/2019-January/355743.html
> But I did not find it in patchwork, not sure why, copied its author.
> 
> My RFC patch goes one step further, since it also moves the allocations out of
> the lock.
> 

you can continue your RFC , and I have no time to rework this patch

And I support your RFC, it maybe unable to benchmark, but always give little improvement.

-LI
> 
> --
> David Marchand
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4c9f122b0..9dd43f2a9 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2053,10 +2053,15 @@  netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
  * Returns the number of packets that weren't transmitted. */
 static inline int
 netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
-                         struct rte_mbuf **pkts, int cnt)
+                         struct rte_mbuf **pkts, int cnt, bool concurrent_txq)
 {
     uint32_t nb_tx = 0;
 
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        qid = qid % dev->up.n_txq;
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+    }
+
     while (nb_tx != cnt) {
         uint32_t ret;
 
@@ -2068,6 +2073,10 @@  netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
         nb_tx += ret;
     }
 
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
+    }
+
     if (OVS_UNLIKELY(nb_tx != cnt)) {
         /* Free buffers, which we couldn't transmit, one at a time (each
          * packet could come from a different mempool) */
@@ -2412,11 +2421,6 @@  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
         goto out;
     }
 
-    if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
-        COVERAGE_INC(vhost_tx_contention);
-        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
-    }
-
     cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
     sw_stats_add.tx_mtu_exceeded_drops = total_packets - cnt;
 
@@ -2427,6 +2431,11 @@  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 
     n_packets_to_free = cnt;
 
+    if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) {
+        COVERAGE_INC(vhost_tx_contention);
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+    }
+
     do {
         int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
         unsigned int tx_pkts;
@@ -2468,7 +2477,8 @@  out:
 
 /* Tx function. Transmit packets indefinitely */
 static void
-dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
+dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
+                bool concurrent_txq)
     OVS_NO_THREAD_SAFETY_ANALYSIS
 {
     const size_t batch_cnt = dp_packet_batch_size(batch);
@@ -2527,7 +2537,8 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
             __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
                                      txcnt);
         } else {
-            tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
+            tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt,
+                                                  concurrent_txq);
         }
     }
 
@@ -2549,7 +2560,7 @@  netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 {
 
     if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
-        dpdk_do_tx_copy(netdev, qid, batch);
+        dpdk_do_tx_copy(netdev, qid, batch, true);
         dp_packet_delete_batch(batch, true);
     } else {
         __netdev_dpdk_vhost_send(netdev, qid, batch->packets,
@@ -2568,15 +2579,10 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         return;
     }
 
-    if (OVS_UNLIKELY(concurrent_txq)) {
-        qid = qid % dev->up.n_txq;
-        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
-    }
-
     if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
         struct netdev *netdev = &dev->up;
 
-        dpdk_do_tx_copy(netdev, qid, batch);
+        dpdk_do_tx_copy(netdev, qid, batch, concurrent_txq);
         dp_packet_delete_batch(batch, true);
     } else {
         struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
@@ -2591,7 +2597,8 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
         qos_drops -= tx_cnt;
 
-        tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
+        tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt,
+                                              concurrent_txq);
 
         dropped = tx_failure + mtu_drops + qos_drops;
         if (OVS_UNLIKELY(dropped)) {
@@ -2603,10 +2610,6 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
             rte_spinlock_unlock(&dev->stats_lock);
         }
     }
-
-    if (OVS_UNLIKELY(concurrent_txq)) {
-        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
-    }
 }
 
 static int