[ovs-dev,3/5] dpif-netdev: Do not sleep when swapping queues.
diff mbox series

Message ID 1558621432-13363-4-git-send-email-david.marchand@redhat.com
State Changes Requested
Headers show
Series
  • Quicker pmd threads reloads
Related show

Commit Message

David Marchand May 23, 2019, 2:23 p.m. UTC
When swapping queues from a pmd thread to another (q0 polled by pmd0/q1
polled by pmd1 -> q1 polled by pmd0/q0 polled by pmd1), the current
"Step 5" puts both pmds to sleep waiting for the control thread to wake
them up later.

Prefer to make them spin in such a case to avoid sleeping an
undeterministic amount of time.

Signed-off-by: David Marchand <david.marchand@redhat.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/dpif-netdev.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

Comments

Stokes, Ian June 19, 2019, 1:39 p.m. UTC | #1
On 5/23/2019 3:23 PM, David Marchand wrote:
> When swapping queues from a pmd thread to another (q0 polled by pmd0/q1
> polled by pmd1 -> q1 polled by pmd0/q0 polled by pmd1), the current
> "Step 5" puts both pmds to sleep waiting for the control thread to wake
> them up later.
> 
> Prefer to make them spin in such a case to avoid sleeping an
> undeterministic amount of time.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>   lib/dpif-netdev.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 42 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 23cf6a6..243c1ce 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -683,6 +683,7 @@ struct dp_netdev_pmd_thread {
>       struct seq *reload_seq;
>       uint64_t last_reload_seq;
>       atomic_bool reload;             /* Do we need to reload ports? */
> +    atomic_bool wait_for_reload;    /* Can we busy wait for the next reload? */
>       atomic_bool exit;               /* For terminating the pmd thread. */
>       pthread_t thread;
>       unsigned core_id;               /* CPU core id of this pmd thread. */
> @@ -4896,6 +4897,33 @@ reconfigure_datapath(struct dp_netdev *dp)
>           HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) {
>               if (poll->rxq->pmd != pmd) {
>                   dp_netdev_del_rxq_from_pmd(pmd, poll);

I'm a little confused by the block below.

> +
> +                /* This pmd might sleep after this step reload if it has no
> +                 * rxq remaining. Can we tell it to busy wait for new rxq at
> +                 * Step 6 ? */

So whats the typical cases we target here, I would think this would 
occur if PMDs have been isolated and there are no non isolated queues 
available for the rxq to be assigned to?

> +                if (hmap_count(&pmd->poll_list) == 0) {
> +                    HMAP_FOR_EACH (port, node, &dp->ports) {
> +                        int qid;
> +
> +                        if (!netdev_is_pmd(port->netdev)) {
> +                            continue;
> +                        }
> +
> +                        for (qid = 0; qid < port->n_rxq; qid++) {
> +                            struct dp_netdev_rxq *q = &port->rxqs[qid];
> +
> +                            if (q->pmd == pmd) {
> +                                atomic_store_relaxed(&q->pmd->wait_for_reload,
> +                                                     true);

I was a little confused here, are we marking wait_for_reload true here 
so that reload in step 6 will handle any new assignment? Does this not 
put it in the busy-wait state already here in step 5? It's just from 
reading the comment it looked like busy wait status was expected in step 
6 rather than here.

Ian
> +                                break;
> +                            }
> +                        }
> +
> +                        if (qid != port->n_rxq) {
> +                            break;
> +                        }
> +                    }
> +                }
>               }
>           }
>           ovs_mutex_unlock(&pmd->port_mutex);
> @@ -5413,7 +5441,9 @@ pmd_thread_main(void *f_)
>       struct pmd_perf_stats *s = &pmd->perf_stats;
>       unsigned int lc = 0;
>       struct polled_queue *poll_list;
> +    bool wait_for_reload = false;
>       bool exiting;
> +    bool reload;
>       int poll_cnt;
>       int i;
>       int process_packets = 0;
> @@ -5441,9 +5471,16 @@ reload:
>       }
>   
>       if (!poll_cnt) {
> -        while (seq_read(pmd->reload_seq) == pmd->last_reload_seq) {
> -            seq_wait(pmd->reload_seq, pmd->last_reload_seq);
> -            poll_block();
> +        /* Don't sleep, control thread will ask for a reload shortly. */
> +        if (wait_for_reload) {
> +            do {
> +                atomic_read_relaxed(&pmd->reload, &reload);
> +            } while (!reload);
> +        } else {
> +            while (seq_read(pmd->reload_seq) == pmd->last_reload_seq) {
> +                seq_wait(pmd->reload_seq, pmd->last_reload_seq);
> +                poll_block();
> +            }
>           }
>           lc = UINT_MAX;
>       }
> @@ -5482,8 +5519,6 @@ reload:
>           }
>   
>           if (lc++ > 1024) {
> -            bool reload;
> -
>               lc = 0;
>   
>               coverage_try_clear();
> @@ -5503,6 +5538,7 @@ reload:
>       ovs_mutex_unlock(&pmd->perf_stats.stats_mutex);
>   
>       poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
> +    atomic_read_relaxed(&pmd->wait_for_reload, &wait_for_reload);
>       atomic_read_relaxed(&pmd->exit, &exiting);
>       /* Signal here to make sure the pmd finishes
>        * reloading the updated configuration. */
> @@ -5839,6 +5875,7 @@ dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
>   {
>       uint32_t old;
>   
> +    atomic_store_relaxed(&pmd->wait_for_reload, false);
>       atomic_store_relaxed(&pmd->reload, false);
>       pmd->last_reload_seq = seq_read(pmd->reload_seq);
>       atomic_sub_explicit(&pmd->dp->reloading_pmds, 1, &old,
>
David Marchand June 20, 2019, 2:31 p.m. UTC | #2
On Wed, Jun 19, 2019 at 3:40 PM Ian Stokes <ian.stokes@intel.com> wrote:

> On 5/23/2019 3:23 PM, David Marchand wrote:
> > When swapping queues from a pmd thread to another (q0 polled by pmd0/q1
> > polled by pmd1 -> q1 polled by pmd0/q0 polled by pmd1), the current
> > "Step 5" puts both pmds to sleep waiting for the control thread to wake
> > them up later.
> >
> > Prefer to make them spin in such a case to avoid sleeping an
> > undeterministic amount of time.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > Acked-by: Eelco Chaudron <echaudro@redhat.com>
> > ---
> >   lib/dpif-netdev.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
> >   1 file changed, 42 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index 23cf6a6..243c1ce 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -683,6 +683,7 @@ struct dp_netdev_pmd_thread {
> >       struct seq *reload_seq;
> >       uint64_t last_reload_seq;
> >       atomic_bool reload;             /* Do we need to reload ports? */
> > +    atomic_bool wait_for_reload;    /* Can we busy wait for the next
> reload? */
> >       atomic_bool exit;               /* For terminating the pmd thread.
> */
> >       pthread_t thread;
> >       unsigned core_id;               /* CPU core id of this pmd thread.
> */
> > @@ -4896,6 +4897,33 @@ reconfigure_datapath(struct dp_netdev *dp)
> >           HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) {
> >               if (poll->rxq->pmd != pmd) {
> >                   dp_netdev_del_rxq_from_pmd(pmd, poll);
>
> I'm a little confused by the block below.
>
> > +
> > +                /* This pmd might sleep after this step reload if it
> has no
> > +                 * rxq remaining. Can we tell it to busy wait for new
> rxq at
> > +                 * Step 6 ? */
>
> So whats the typical cases we target here, I would think this would
> occur if PMDs have been isolated and there are no non isolated queues
> available for the rxq to be assigned to?
>

It can happen in any situation when the rebalance code decides to reassign
rxqs in a way that, for a given pmd, the old polling list intersection with
the new polling list is null.
Because of this null intersection, between step 5 and step 6 reload
notifications, the pmd sleeps until it is woken on the poll_block().



> > +                if (hmap_count(&pmd->poll_list) == 0) {
> > +                    HMAP_FOR_EACH (port, node, &dp->ports) {
> > +                        int qid;
> > +
> > +                        if (!netdev_is_pmd(port->netdev)) {
> > +                            continue;
> > +                        }
> > +
> > +                        for (qid = 0; qid < port->n_rxq; qid++) {
> > +                            struct dp_netdev_rxq *q = &port->rxqs[qid];
> > +
> > +                            if (q->pmd == pmd) {
> > +
> atomic_store_relaxed(&q->pmd->wait_for_reload,
> > +                                                     true);
>
> I was a little confused here, are we marking wait_for_reload true here
> so that reload in step 6 will handle any new assignment? Does this not
> put it in the busy-wait state already here in step 5? It's just from
> reading the comment it looked like busy wait status was expected in step
> 6 rather than here.
>

Nop, we are in step 5, so yes, the comment should say "until" instead of
"at" ?
Stokes, Ian June 21, 2019, 7:22 p.m. UTC | #3
On 6/20/2019 3:31 PM, David Marchand wrote:
> 
> 
> 
> On Wed, Jun 19, 2019 at 3:40 PM Ian Stokes <ian.stokes@intel.com 
> <mailto:ian.stokes@intel.com>> wrote:
> 
>     On 5/23/2019 3:23 PM, David Marchand wrote:
>      > When swapping queues from a pmd thread to another (q0 polled by
>     pmd0/q1
>      > polled by pmd1 -> q1 polled by pmd0/q0 polled by pmd1), the current
>      > "Step 5" puts both pmds to sleep waiting for the control thread
>     to wake
>      > them up later.
>      >
>      > Prefer to make them spin in such a case to avoid sleeping an
>      > undeterministic amount of time.
>      >
>      > Signed-off-by: David Marchand <david.marchand@redhat.com
>     <mailto:david.marchand@redhat.com>>
>      > Acked-by: Eelco Chaudron <echaudro@redhat.com
>     <mailto:echaudro@redhat.com>>
>      > ---
>      >   lib/dpif-netdev.c | 47
>     ++++++++++++++++++++++++++++++++++++++++++-----
>      >   1 file changed, 42 insertions(+), 5 deletions(-)
>      >
>      > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>      > index 23cf6a6..243c1ce 100644
>      > --- a/lib/dpif-netdev.c
>      > +++ b/lib/dpif-netdev.c
>      > @@ -683,6 +683,7 @@ struct dp_netdev_pmd_thread {
>      >       struct seq *reload_seq;
>      >       uint64_t last_reload_seq;
>      >       atomic_bool reload;             /* Do we need to reload
>     ports? */
>      > +    atomic_bool wait_for_reload;    /* Can we busy wait for the
>     next reload? */
>      >       atomic_bool exit;               /* For terminating the pmd
>     thread. */
>      >       pthread_t thread;
>      >       unsigned core_id;               /* CPU core id of this pmd
>     thread. */
>      > @@ -4896,6 +4897,33 @@ reconfigure_datapath(struct dp_netdev *dp)
>      >           HMAP_FOR_EACH_SAFE (poll, poll_next, node,
>     &pmd->poll_list) {
>      >               if (poll->rxq->pmd != pmd) {
>      >                   dp_netdev_del_rxq_from_pmd(pmd, poll);
> 
>     I'm a little confused by the block below.
> 
>      > +
>      > +                /* This pmd might sleep after this step reload
>     if it has no
>      > +                 * rxq remaining. Can we tell it to busy wait
>     for new rxq at
>      > +                 * Step 6 ? */
> 
>     So whats the typical cases we target here, I would think this would
>     occur if PMDs have been isolated and there are no non isolated queues
>     available for the rxq to be assigned to?
> 
> 
> It can happen in any situation when the rebalance code decides to 
> reassign rxqs in a way that, for a given pmd, the old polling list 
> intersection with the new polling list is null.
> Because of this null intersection, between step 5 and step 6 reload 
> notifications, the pmd sleeps until it is woken on the poll_block().
> 

Ah, ok, now that makes a little more sense. From what i've seen, the 
rest of the series look ok, I just want to run a few more tests with 
this scenario in mind.

> 
> 
>      > +                if (hmap_count(&pmd->poll_list) == 0) {
>      > +                    HMAP_FOR_EACH (port, node, &dp->ports) {
>      > +                        int qid;
>      > +
>      > +                        if (!netdev_is_pmd(port->netdev)) {
>      > +                            continue;
>      > +                        }
>      > +
>      > +                        for (qid = 0; qid < port->n_rxq; qid++) {
>      > +                            struct dp_netdev_rxq *q =
>     &port->rxqs[qid];
>      > +
>      > +                            if (q->pmd == pmd) {
>      > +                               
>     atomic_store_relaxed(&q->pmd->wait_for_reload,
>      > +                                                     true);
> 
>     I was a little confused here, are we marking wait_for_reload true here
>     so that reload in step 6 will handle any new assignment? Does this not
>     put it in the busy-wait state already here in step 5? It's just from
>     reading the comment it looked like busy wait status was expected in
>     step
>     6 rather than here.
> 
> 
> Nop, we are in step 5, so yes, the comment should say "until" instead of 
> "at" ?

Perfect.

Thanks
Ian
> 
> 
> -- 
> David Marchand
Ilya Maximets June 25, 2019, 1:46 p.m. UTC | #4
On 23.05.2019 17:23, David Marchand wrote:
> When swapping queues from a pmd thread to another (q0 polled by pmd0/q1
> polled by pmd1 -> q1 polled by pmd0/q0 polled by pmd1), the current
> "Step 5" puts both pmds to sleep waiting for the control thread to wake
> them up later.
> 
> Prefer to make them spin in such a case to avoid sleeping an
> undeterministic amount of time.
> 
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Acked-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  lib/dpif-netdev.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 42 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 23cf6a6..243c1ce 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -683,6 +683,7 @@ struct dp_netdev_pmd_thread {
>      struct seq *reload_seq;
>      uint64_t last_reload_seq;
>      atomic_bool reload;             /* Do we need to reload ports? */
> +    atomic_bool wait_for_reload;    /* Can we busy wait for the next reload? */
>      atomic_bool exit;               /* For terminating the pmd thread. */
>      pthread_t thread;
>      unsigned core_id;               /* CPU core id of this pmd thread. */
> @@ -4896,6 +4897,33 @@ reconfigure_datapath(struct dp_netdev *dp)
>          HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) {
>              if (poll->rxq->pmd != pmd) {
>                  dp_netdev_del_rxq_from_pmd(pmd, poll);
> +
> +                /* This pmd might sleep after this step reload if it has no
> +                 * rxq remaining. Can we tell it to busy wait for new rxq at
> +                 * Step 6 ? */
> +                if (hmap_count(&pmd->poll_list) == 0) {
> +                    HMAP_FOR_EACH (port, node, &dp->ports) {
> +                        int qid;
> +
> +                        if (!netdev_is_pmd(port->netdev)) {
> +                            continue;
> +                        }
> +
> +                        for (qid = 0; qid < port->n_rxq; qid++) {
> +                            struct dp_netdev_rxq *q = &port->rxqs[qid];
> +
> +                            if (q->pmd == pmd) {
> +                                atomic_store_relaxed(&q->pmd->wait_for_reload,
> +                                                     true);
> +                                break;

8 levels of indentation make me feel frustrated.

What do you think about following incremental:

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 34ac09322..adc095579 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4810,6 +4834,7 @@ reconfigure_datapath(struct dp_netdev *dp)
 {
     struct dp_netdev_pmd_thread *pmd;
     struct dp_netdev_port *port;
+    struct hmapx busy_threads = HMAPX_INITIALIZER(&busy_threads);
     int wanted_txqs;
 
     dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
@@ -4895,6 +4920,20 @@ reconfigure_datapath(struct dp_netdev *dp)
     rxq_scheduling(dp, false);
 
     /* Step 5: Remove queues not compliant with new scheduling. */
+
+    /* Count all the threads that will have at least one queue to poll. */
+    HMAP_FOR_EACH (port, node, &dp->ports) {
+        for (int qid = 0; qid < port->n_rxq; qid++) {
+            struct dp_netdev_rxq *q = &port->rxqs[qid];
+
+            if (q->pmd) {
+                hmapx_add(&busy_threads, q->pmd);
+            }
+        }
+    }
+
+    /* Remove queues not compliant with new scheduling.  Asking busy threads
+     * to busy-wait for a new queue assignment. */
     CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
         struct rxq_poll *poll, *poll_next;
 
@@ -4903,37 +4942,20 @@ reconfigure_datapath(struct dp_netdev *dp)
             if (poll->rxq->pmd != pmd) {
                 dp_netdev_del_rxq_from_pmd(pmd, poll);
 
-                /* This pmd might sleep after this step reload if it has no
-                 * rxq remaining. Can we tell it to busy wait for new rxq at
-                 * Step 6 ? */
-                if (hmap_count(&pmd->poll_list) == 0) {
-                    HMAP_FOR_EACH (port, node, &dp->ports) {
-                        int qid;
-
-                        if (!netdev_is_pmd(port->netdev)) {
-                            continue;
-                        }
-
-                        for (qid = 0; qid < port->n_rxq; qid++) {
-                            struct dp_netdev_rxq *q = &port->rxqs[qid];
-
-                            if (q->pmd == pmd) {
-                                atomic_store_relaxed(&q->pmd->wait_for_reload,
-                                                     true);
-                                break;
-                            }
-                        }
-
-                        if (qid != port->n_rxq) {
-                            break;
-                        }
-                    }
+                /* This thread might sleep after this step if it has no rxq
+                 * remaining.  Tell it to busy wait for a new assignment
+                 * if it has at least one scheduled queue. */
+                if (hmap_count(&pmd->poll_list) == 0
+                    && hmapx_contains(&busy_threads, pmd)) {
+                    atomic_store_relaxed(&pmd->wait_for_reload, true);
                 }
             }
         }
         ovs_mutex_unlock(&pmd->port_mutex);
     }
 
+    hmapx_destroy(&busy_threads);
+
     /* Reload affected pmd threads.  We must wait for the pmd threads to remove
      * the old queues before readding them, otherwise a queue can be polled by
      * two threads at the same time. */
---

?

> +                            }
> +                        }
> +
> +                        if (qid != port->n_rxq) {
> +                            break;
> +                        }
> +                    }
> +                }
>              }
>          }
>          ovs_mutex_unlock(&pmd->port_mutex);
> @@ -5413,7 +5441,9 @@ pmd_thread_main(void *f_)
>      struct pmd_perf_stats *s = &pmd->perf_stats;
>      unsigned int lc = 0;
>      struct polled_queue *poll_list;
> +    bool wait_for_reload = false;
>      bool exiting;
> +    bool reload;
>      int poll_cnt;
>      int i;
>      int process_packets = 0;
> @@ -5441,9 +5471,16 @@ reload:
>      }
>  
>      if (!poll_cnt) {
> -        while (seq_read(pmd->reload_seq) == pmd->last_reload_seq) {
> -            seq_wait(pmd->reload_seq, pmd->last_reload_seq);
> -            poll_block();
> +        /* Don't sleep, control thread will ask for a reload shortly. */

This comment should be moved inside the 'if' because it belongs only to
one branch.

> +        if (wait_for_reload) {
> +            do {
> +                atomic_read_relaxed(&pmd->reload, &reload);
> +            } while (!reload);
> +        } else {
> +            while (seq_read(pmd->reload_seq) == pmd->last_reload_seq) {
> +                seq_wait(pmd->reload_seq, pmd->last_reload_seq);
> +                poll_block();
> +            }
>          }
>          lc = UINT_MAX;
>      }
> @@ -5482,8 +5519,6 @@ reload:
>          }
>  
>          if (lc++ > 1024) {
> -            bool reload;
> -
>              lc = 0;
>  
>              coverage_try_clear();
> @@ -5503,6 +5538,7 @@ reload:
>      ovs_mutex_unlock(&pmd->perf_stats.stats_mutex);
>  
>      poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
> +    atomic_read_relaxed(&pmd->wait_for_reload, &wait_for_reload);
>      atomic_read_relaxed(&pmd->exit, &exiting);
>      /* Signal here to make sure the pmd finishes
>       * reloading the updated configuration. */
> @@ -5839,6 +5875,7 @@ dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
>  {
>      uint32_t old;
>  
> +    atomic_store_relaxed(&pmd->wait_for_reload, false);
>      atomic_store_relaxed(&pmd->reload, false);
>      pmd->last_reload_seq = seq_read(pmd->reload_seq);
>      atomic_sub_explicit(&pmd->dp->reloading_pmds, 1, &old,
>
David Marchand June 25, 2019, 2:43 p.m. UTC | #5
On Tue, Jun 25, 2019 at 3:47 PM Ilya Maximets <i.maximets@samsung.com>
wrote:

> On 23.05.2019 17:23, David Marchand wrote:
> > When swapping queues from a pmd thread to another (q0 polled by pmd0/q1
> > polled by pmd1 -> q1 polled by pmd0/q0 polled by pmd1), the current
> > "Step 5" puts both pmds to sleep waiting for the control thread to wake
> > them up later.
> >
> > Prefer to make them spin in such a case to avoid sleeping an
> > undeterministic amount of time.
> >
> > Signed-off-by: David Marchand <david.marchand@redhat.com>
> > Acked-by: Eelco Chaudron <echaudro@redhat.com>
> > ---
> >  lib/dpif-netdev.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 42 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index 23cf6a6..243c1ce 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -683,6 +683,7 @@ struct dp_netdev_pmd_thread {
> >      struct seq *reload_seq;
> >      uint64_t last_reload_seq;
> >      atomic_bool reload;             /* Do we need to reload ports? */
> > +    atomic_bool wait_for_reload;    /* Can we busy wait for the next
> reload? */
> >      atomic_bool exit;               /* For terminating the pmd thread.
> */
> >      pthread_t thread;
> >      unsigned core_id;               /* CPU core id of this pmd thread.
> */
> > @@ -4896,6 +4897,33 @@ reconfigure_datapath(struct dp_netdev *dp)
> >          HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) {
> >              if (poll->rxq->pmd != pmd) {
> >                  dp_netdev_del_rxq_from_pmd(pmd, poll);
> > +
> > +                /* This pmd might sleep after this step reload if it
> has no
> > +                 * rxq remaining. Can we tell it to busy wait for new
> rxq at
> > +                 * Step 6 ? */
> > +                if (hmap_count(&pmd->poll_list) == 0) {
> > +                    HMAP_FOR_EACH (port, node, &dp->ports) {
> > +                        int qid;
> > +
> > +                        if (!netdev_is_pmd(port->netdev)) {
> > +                            continue;
> > +                        }
> > +
> > +                        for (qid = 0; qid < port->n_rxq; qid++) {
> > +                            struct dp_netdev_rxq *q = &port->rxqs[qid];
> > +
> > +                            if (q->pmd == pmd) {
> > +
> atomic_store_relaxed(&q->pmd->wait_for_reload,
> > +                                                     true);
> > +                                break;
>
> 8 levels of indentation make me feel frustrated.
>
> What do you think about following incremental:
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 34ac09322..adc095579 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -4810,6 +4834,7 @@ reconfigure_datapath(struct dp_netdev *dp)
>  {
>      struct dp_netdev_pmd_thread *pmd;
>      struct dp_netdev_port *port;
> +    struct hmapx busy_threads = HMAPX_INITIALIZER(&busy_threads);
>      int wanted_txqs;
>
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
> @@ -4895,6 +4920,20 @@ reconfigure_datapath(struct dp_netdev *dp)
>      rxq_scheduling(dp, false);
>
>      /* Step 5: Remove queues not compliant with new scheduling. */
> +
> +    /* Count all the threads that will have at least one queue to poll. */
> +    HMAP_FOR_EACH (port, node, &dp->ports) {
> +        for (int qid = 0; qid < port->n_rxq; qid++) {
> +            struct dp_netdev_rxq *q = &port->rxqs[qid];
> +
> +            if (q->pmd) {
> +                hmapx_add(&busy_threads, q->pmd);
> +            }
> +        }
> +    }
> +
> +    /* Remove queues not compliant with new scheduling.  Asking busy
> threads
> +     * to busy-wait for a new queue assignment. */
>

Not a fan of this comment, looks duplicated with the one before that
announces what Step 5 does.
Plus, we have another one later about the busy wait.

I would take your suggestion but without this comment here.



     CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
>          struct rxq_poll *poll, *poll_next;
>
> @@ -4903,37 +4942,20 @@ reconfigure_datapath(struct dp_netdev *dp)
>              if (poll->rxq->pmd != pmd) {
>                  dp_netdev_del_rxq_from_pmd(pmd, poll);
>
> -                /* This pmd might sleep after this step reload if it has
> no
> -                 * rxq remaining. Can we tell it to busy wait for new rxq
> at
> -                 * Step 6 ? */
> -                if (hmap_count(&pmd->poll_list) == 0) {
> -                    HMAP_FOR_EACH (port, node, &dp->ports) {
> -                        int qid;
> -
> -                        if (!netdev_is_pmd(port->netdev)) {
> -                            continue;
> -                        }
> -
> -                        for (qid = 0; qid < port->n_rxq; qid++) {
> -                            struct dp_netdev_rxq *q = &port->rxqs[qid];
> -
> -                            if (q->pmd == pmd) {
> -
> atomic_store_relaxed(&q->pmd->wait_for_reload,
> -                                                     true);
> -                                break;
> -                            }
> -                        }
> -
> -                        if (qid != port->n_rxq) {
> -                            break;
> -                        }
> -                    }
> +                /* This thread might sleep after this step if it has no
> rxq
> +                 * remaining.  Tell it to busy wait for a new assignment
> +                 * if it has at least one scheduled queue. */
> +                if (hmap_count(&pmd->poll_list) == 0
> +                    && hmapx_contains(&busy_threads, pmd)) {
> +                    atomic_store_relaxed(&pmd->wait_for_reload, true);
>                  }
>              }
>          }
>          ovs_mutex_unlock(&pmd->port_mutex);
>      }
>
> +    hmapx_destroy(&busy_threads);
> +
>      /* Reload affected pmd threads.  We must wait for the pmd threads to
> remove
>       * the old queues before readding them, otherwise a queue can be
> polled by
>       * two threads at the same time. */
> ---
>
> ?
>
> > +                            }
> > +                        }
> > +
> > +                        if (qid != port->n_rxq) {
> > +                            break;
> > +                        }
> > +                    }
> > +                }
> >              }
> >          }
> >          ovs_mutex_unlock(&pmd->port_mutex);
> > @@ -5413,7 +5441,9 @@ pmd_thread_main(void *f_)
> >      struct pmd_perf_stats *s = &pmd->perf_stats;
> >      unsigned int lc = 0;
> >      struct polled_queue *poll_list;
> > +    bool wait_for_reload = false;
> >      bool exiting;
> > +    bool reload;
> >      int poll_cnt;
> >      int i;
> >      int process_packets = 0;
> > @@ -5441,9 +5471,16 @@ reload:
> >      }
> >
> >      if (!poll_cnt) {
> > -        while (seq_read(pmd->reload_seq) == pmd->last_reload_seq) {
> > -            seq_wait(pmd->reload_seq, pmd->last_reload_seq);
> > -            poll_block();
> > +        /* Don't sleep, control thread will ask for a reload shortly. */
>
> This comment should be moved inside the 'if' because it belongs only to
> one branch.
>

Yep.

Patch
diff mbox series

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 23cf6a6..243c1ce 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -683,6 +683,7 @@  struct dp_netdev_pmd_thread {
     struct seq *reload_seq;
     uint64_t last_reload_seq;
     atomic_bool reload;             /* Do we need to reload ports? */
+    atomic_bool wait_for_reload;    /* Can we busy wait for the next reload? */
     atomic_bool exit;               /* For terminating the pmd thread. */
     pthread_t thread;
     unsigned core_id;               /* CPU core id of this pmd thread. */
@@ -4896,6 +4897,33 @@  reconfigure_datapath(struct dp_netdev *dp)
         HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) {
             if (poll->rxq->pmd != pmd) {
                 dp_netdev_del_rxq_from_pmd(pmd, poll);
+
+                /* This pmd might sleep after this step reload if it has no
+                 * rxq remaining. Can we tell it to busy wait for new rxq at
+                 * Step 6 ? */
+                if (hmap_count(&pmd->poll_list) == 0) {
+                    HMAP_FOR_EACH (port, node, &dp->ports) {
+                        int qid;
+
+                        if (!netdev_is_pmd(port->netdev)) {
+                            continue;
+                        }
+
+                        for (qid = 0; qid < port->n_rxq; qid++) {
+                            struct dp_netdev_rxq *q = &port->rxqs[qid];
+
+                            if (q->pmd == pmd) {
+                                atomic_store_relaxed(&q->pmd->wait_for_reload,
+                                                     true);
+                                break;
+                            }
+                        }
+
+                        if (qid != port->n_rxq) {
+                            break;
+                        }
+                    }
+                }
             }
         }
         ovs_mutex_unlock(&pmd->port_mutex);
@@ -5413,7 +5441,9 @@  pmd_thread_main(void *f_)
     struct pmd_perf_stats *s = &pmd->perf_stats;
     unsigned int lc = 0;
     struct polled_queue *poll_list;
+    bool wait_for_reload = false;
     bool exiting;
+    bool reload;
     int poll_cnt;
     int i;
     int process_packets = 0;
@@ -5441,9 +5471,16 @@  reload:
     }
 
     if (!poll_cnt) {
-        while (seq_read(pmd->reload_seq) == pmd->last_reload_seq) {
-            seq_wait(pmd->reload_seq, pmd->last_reload_seq);
-            poll_block();
+        /* Don't sleep, control thread will ask for a reload shortly. */
+        if (wait_for_reload) {
+            do {
+                atomic_read_relaxed(&pmd->reload, &reload);
+            } while (!reload);
+        } else {
+            while (seq_read(pmd->reload_seq) == pmd->last_reload_seq) {
+                seq_wait(pmd->reload_seq, pmd->last_reload_seq);
+                poll_block();
+            }
         }
         lc = UINT_MAX;
     }
@@ -5482,8 +5519,6 @@  reload:
         }
 
         if (lc++ > 1024) {
-            bool reload;
-
             lc = 0;
 
             coverage_try_clear();
@@ -5503,6 +5538,7 @@  reload:
     ovs_mutex_unlock(&pmd->perf_stats.stats_mutex);
 
     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
+    atomic_read_relaxed(&pmd->wait_for_reload, &wait_for_reload);
     atomic_read_relaxed(&pmd->exit, &exiting);
     /* Signal here to make sure the pmd finishes
      * reloading the updated configuration. */
@@ -5839,6 +5875,7 @@  dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
 {
     uint32_t old;
 
+    atomic_store_relaxed(&pmd->wait_for_reload, false);
     atomic_store_relaxed(&pmd->reload, false);
     pmd->last_reload_seq = seq_read(pmd->reload_seq);
     atomic_sub_explicit(&pmd->dp->reloading_pmds, 1, &old,