diff mbox

[ovs-dev,v6] dpif-netdev: Assign ports to pmds on non-local numa node.

Message ID 1494431949-13326-1-git-send-email-billy.o.mahony@intel.com
State Superseded
Delegated to: Darrell Ball
Headers show

Commit Message

Billy O'Mahony May 10, 2017, 3:59 p.m. UTC
From: billyom <billy.o.mahony@intel.com>

Previously if there is no available (non-isolated) pmd on the numa node
for a port then the port is not polled at all. This can result in a
non-operational system until such time as nics are physically
repositioned. It is preferable to operate with a pmd on the 'wrong' numa
node albeit with lower performance. Local pmds are still chosen when
available.

Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>
---
v6: Change 'port' to 'queue' in a warning msg
v5: Fix warning msg; Update same in docs
v4: Fix a checkpatch error
v3: Fix warning messages not appearing when using multiqueue
v2: Add details of warning messages into docs

 Documentation/intro/install/dpdk.rst | 10 +++++++++
 lib/dpif-netdev.c                    | 43 +++++++++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 5 deletions(-)

Comments

Stokes, Ian May 11, 2017, 11:09 a.m. UTC | #1
> Previously if there is no available (non-isolated) pmd on the numa node
> for a port then the port is not polled at all. This can result in a non-
> operational system until such time as nics are physically repositioned. It
> is preferable to operate with a pmd on the 'wrong' numa node albeit with
> lower performance. Local pmds are still chosen when available.
> 
> Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>
> ---
> v6: Change 'port' to 'queue' in a warning msg
> v5: Fix warning msg; Update same in docs
> v4: Fix a checkpatch error
> v3: Fix warning messages not appearing when using multiqueue
> v2: Add details of warning messages into docs
> 
>  Documentation/intro/install/dpdk.rst | 10 +++++++++
>  lib/dpif-netdev.c                    | 43
> +++++++++++++++++++++++++++++++-----
>  2 files changed, 48 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/intro/install/dpdk.rst
> b/Documentation/intro/install/dpdk.rst
> index d1c0e65..7a66bff 100644
> --- a/Documentation/intro/install/dpdk.rst
> +++ b/Documentation/intro/install/dpdk.rst
> @@ -460,6 +460,16 @@ affinitized accordingly.
>      pmd thread on a NUMA node is only created if there is at least one
> DPDK
>      interface from that NUMA node added to OVS.
> 
> +  .. note::
> +   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues
> for
> +   PCI device will assigned to a pmd on it's local NUMA node if pmd-cpu-
> mask
> +   has created a pmd thread on that NUMA node.  If not the queue will be
> +   assigned to a pmd on a remote NUMA node.  This will result in reduced
> +   maximum throughput on that device.  In the case such a queue
> assignment
> +   is made a warning message will be logged: "There's no available (non-
> +   isolated) pmd thread on numa node N. Queue Q on port P will be
> assigned to
> +   the pmd on core C (numa node N'). Expect reduced performance."
> +
>  - QEMU vCPU thread Affinity
> 
>    A VM performing simple packet forwarding or running complex packet
> pipelines diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> b3a0806..34f1963 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list *rr, int
> numa_id)  }
> 
>  static void
> -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
> +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,
> +                      int *all_numa_ids, unsigned all_numa_ids_sz,
> +                      int *num_ids_written)
>  {
>      struct dp_netdev_pmd_thread *pmd;
>      struct rr_numa *numa;
> +    unsigned idx = 0;
> 
>      hmap_init(&rr->numas);
> 
> @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev *dp, struct
> rr_numa_list *rr)
>          numa->n_pmds++;
>          numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa-
> >pmds);
>          numa->pmds[numa->n_pmds - 1] = pmd;
> +
> +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;
> +        idx++;
>      }
> +    *num_ids_written = idx;
>  }
> 
>  static struct dp_netdev_pmd_thread *
> @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned)
> OVS_REQUIRES(dp->port_mutex)  {
>      struct dp_netdev_port *port;
>      struct rr_numa_list rr;
> +    int all_numa_ids [64];
> +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];
> +    unsigned all_numa_ids_idx = 0;
> +    int all_numa_ids_max_idx = 0;
> +    int num_numa_ids = 0;
> 
> -    rr_numa_list_populate(dp, &rr);
> +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,
> +                          &num_numa_ids);
> +    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz - 1);
> 
>      HMAP_FOR_EACH (port, node, &dp->ports) {
>          struct rr_numa *numa;
> @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned)
> OVS_REQUIRES(dp->port_mutex)
>                  }
>              } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
>                  if (!numa) {
> -                    VLOG_WARN("There's no available (non isolated) pmd
> thread "
> +                    if (all_numa_ids_max_idx < 0) {
> +                        VLOG_ERR("There is no available (non-isolated)
> pmd "
> +                                 "thread for port \'%s\' queue %d. This
> queue "
> +                                 "will not be polled. Is pmd-cpu-mask set
> to "
> +                                 "zero? Or are all PMDs isolated to other
> "
> +                                 "queues?", netdev_get_name(port-
> >netdev),
> +                                 qid);
> +                        continue;
> +                    }
> +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];
> +                    struct rr_numa *alt_numa;
> +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);
> +                    q->pmd = rr_numa_get_pmd(alt_numa);
> +                    VLOG_WARN("There's no available (non-isolated) pmd
> thread "
>                                "on numa node %d. Queue %d on port \'%s\'
> will "
> -                              "not be polled.",
> -                              numa_id, qid, netdev_get_name(port-
> >netdev));
> +                              "be assigned to the pmd on core %d "
> +                              "(numa node %d). Expect reduced
> performance.",
> +                              numa_id, qid, netdev_get_name(port-
> >netdev),
> +                              q->pmd->core_id, q->pmd->numa_id);
> +                    all_numa_ids_idx++;
> +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {
> +                        all_numa_ids_idx = 0;
> +                    }
>                  } else {
>                      q->pmd = rr_numa_get_pmd(numa);
>                  }
> --
> 2.7.4
> 

Thanks Billy, LGTM and tested ok.

Acked-by: Ian Stokes <ian.stokes@intel.com>

> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Billy O'Mahony June 13, 2017, 9:01 a.m. UTC | #2
Hi All,

Does anyone else have any comments on this patch?

I'm adding Ilya and Jan to the CC as I believe you both had comments on this previously. Apologies if I've forgotten anyone else that commented from the CC!

Regards,
/Billy

> -----Original Message-----
> From: Stokes, Ian
> Sent: Thursday, May 11, 2017 12:09 PM
> To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org
> Subject: RE: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-
> local numa node.
> 
> > Previously if there is no available (non-isolated) pmd on the numa
> > node for a port then the port is not polled at all. This can result in
> > a non- operational system until such time as nics are physically
> > repositioned. It is preferable to operate with a pmd on the 'wrong'
> > numa node albeit with lower performance. Local pmds are still chosen
> when available.
> >
> > Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>
> > ---
> > v6: Change 'port' to 'queue' in a warning msg
> > v5: Fix warning msg; Update same in docs
> > v4: Fix a checkpatch error
> > v3: Fix warning messages not appearing when using multiqueue
> > v2: Add details of warning messages into docs
> >
> >  Documentation/intro/install/dpdk.rst | 10 +++++++++
> >  lib/dpif-netdev.c                    | 43
> > +++++++++++++++++++++++++++++++-----
> >  2 files changed, 48 insertions(+), 5 deletions(-)
> >
> > diff --git a/Documentation/intro/install/dpdk.rst
> > b/Documentation/intro/install/dpdk.rst
> > index d1c0e65..7a66bff 100644
> > --- a/Documentation/intro/install/dpdk.rst
> > +++ b/Documentation/intro/install/dpdk.rst
> > @@ -460,6 +460,16 @@ affinitized accordingly.
> >      pmd thread on a NUMA node is only created if there is at least
> > one DPDK
> >      interface from that NUMA node added to OVS.
> >
> > +  .. note::
> > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx
> > + queues
> > for
> > +   PCI device will assigned to a pmd on it's local NUMA node if
> > + pmd-cpu-
> > mask
> > +   has created a pmd thread on that NUMA node.  If not the queue will be
> > +   assigned to a pmd on a remote NUMA node.  This will result in reduced
> > +   maximum throughput on that device.  In the case such a queue
> > assignment
> > +   is made a warning message will be logged: "There's no available (non-
> > +   isolated) pmd thread on numa node N. Queue Q on port P will be
> > assigned to
> > +   the pmd on core C (numa node N'). Expect reduced performance."
> > +
> >  - QEMU vCPU thread Affinity
> >
> >    A VM performing simple packet forwarding or running complex packet
> > pipelines diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > b3a0806..34f1963 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list *rr,
> > int
> > numa_id)  }
> >
> >  static void
> > -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
> > +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,
> > +                      int *all_numa_ids, unsigned all_numa_ids_sz,
> > +                      int *num_ids_written)
> >  {
> >      struct dp_netdev_pmd_thread *pmd;
> >      struct rr_numa *numa;
> > +    unsigned idx = 0;
> >
> >      hmap_init(&rr->numas);
> >
> > @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev *dp,
> > struct rr_numa_list *rr)
> >          numa->n_pmds++;
> >          numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof
> > *numa-
> > >pmds);
> >          numa->pmds[numa->n_pmds - 1] = pmd;
> > +
> > +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;
> > +        idx++;
> >      }
> > +    *num_ids_written = idx;
> >  }
> >
> >  static struct dp_netdev_pmd_thread *
> > @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool
> > pinned)
> > OVS_REQUIRES(dp->port_mutex)  {
> >      struct dp_netdev_port *port;
> >      struct rr_numa_list rr;
> > +    int all_numa_ids [64];
> > +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];
> > +    unsigned all_numa_ids_idx = 0;
> > +    int all_numa_ids_max_idx = 0;
> > +    int num_numa_ids = 0;
> >
> > -    rr_numa_list_populate(dp, &rr);
> > +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,
> > +                          &num_numa_ids);
> > +    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz -
> > + 1);
> >
> >      HMAP_FOR_EACH (port, node, &dp->ports) {
> >          struct rr_numa *numa;
> > @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool
> > pinned)
> > OVS_REQUIRES(dp->port_mutex)
> >                  }
> >              } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
> >                  if (!numa) {
> > -                    VLOG_WARN("There's no available (non isolated) pmd
> > thread "
> > +                    if (all_numa_ids_max_idx < 0) {
> > +                        VLOG_ERR("There is no available
> > + (non-isolated)
> > pmd "
> > +                                 "thread for port \'%s\' queue %d.
> > + This
> > queue "
> > +                                 "will not be polled. Is pmd-cpu-mask
> > + set
> > to "
> > +                                 "zero? Or are all PMDs isolated to
> > + other
> > "
> > +                                 "queues?", netdev_get_name(port-
> > >netdev),
> > +                                 qid);
> > +                        continue;
> > +                    }
> > +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];
> > +                    struct rr_numa *alt_numa;
> > +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);
> > +                    q->pmd = rr_numa_get_pmd(alt_numa);
> > +                    VLOG_WARN("There's no available (non-isolated)
> > + pmd
> > thread "
> >                                "on numa node %d. Queue %d on port \'%s\'
> > will "
> > -                              "not be polled.",
> > -                              numa_id, qid, netdev_get_name(port-
> > >netdev));
> > +                              "be assigned to the pmd on core %d "
> > +                              "(numa node %d). Expect reduced
> > performance.",
> > +                              numa_id, qid, netdev_get_name(port-
> > >netdev),
> > +                              q->pmd->core_id, q->pmd->numa_id);
> > +                    all_numa_ids_idx++;
> > +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {
> > +                        all_numa_ids_idx = 0;
> > +                    }
> >                  } else {
> >                      q->pmd = rr_numa_get_pmd(numa);
> >                  }
> > --
> > 2.7.4
> >
> 
> Thanks Billy, LGTM and tested ok.
> 
> Acked-by: Ian Stokes <ian.stokes@intel.com>
> 
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Darrell Ball June 26, 2017, 7:03 a.m. UTC | #3
I think this is helpful in some cases where lower performance is an acceptable
tradeoff with more frugal and/or more flexible usage of cpu resources.

I did not test it since Ian has already done that, but I reviewed
the code change and other related code.

One comment inline regarding the added documentation.

Otherwise, Acked-by: Darrell Ball <dlu998@gmail.com>



On 5/10/17, 8:59 AM, "ovs-dev-bounces@openvswitch.org on behalf of Billy O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of billy.o.mahony@intel.com> wrote:

    From: billyom <billy.o.mahony@intel.com>
    
    Previously if there is no available (non-isolated) pmd on the numa node
    for a port then the port is not polled at all. This can result in a
    non-operational system until such time as nics are physically
    repositioned. It is preferable to operate with a pmd on the 'wrong' numa
    node albeit with lower performance. Local pmds are still chosen when
    available.
    
    Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>
    ---
    v6: Change 'port' to 'queue' in a warning msg
    v5: Fix warning msg; Update same in docs
    v4: Fix a checkpatch error
    v3: Fix warning messages not appearing when using multiqueue
    v2: Add details of warning messages into docs
    
     Documentation/intro/install/dpdk.rst | 10 +++++++++
     lib/dpif-netdev.c                    | 43 +++++++++++++++++++++++++++++++-----
     2 files changed, 48 insertions(+), 5 deletions(-)
    
    diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst
    index d1c0e65..7a66bff 100644
    --- a/Documentation/intro/install/dpdk.rst
    +++ b/Documentation/intro/install/dpdk.rst
    @@ -460,6 +460,16 @@ affinitized accordingly.
         pmd thread on a NUMA node is only created if there is at least one DPDK
         interface from that NUMA node added to OVS.
     
    +  .. note::
    +   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues for
    +   PCI device will assigned to a pmd on it's local NUMA node if pmd-cpu-mask
    +   has created a pmd thread on that NUMA node.  If not the queue will be
    +   assigned to a pmd on a remote NUMA node.



I think the below text is a bit more accurate
+  .. note::
+   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues for
+   a PCI device will be assigned to a pmd on it's local NUMA node.  A pmd is
+   created if at least one dpdk interface is added to OVS on that NUMA node or
+   if the pmd-cpu-mask has created a pmd thread on that NUMA node.  

    + This will result in reduced
    +   maximum throughput on that device.  In the case such a queue assignment
    +   is made a warning message will be logged: "There's no available (non-
    +   isolated) pmd thread on numa node N. Queue Q on port P will be assigned to
    +   the pmd on core C (numa node N'). Expect reduced performance."
    +
     - QEMU vCPU thread Affinity
     
       A VM performing simple packet forwarding or running complex packet pipelines
    diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
    index b3a0806..34f1963 100644
    --- a/lib/dpif-netdev.c
    +++ b/lib/dpif-netdev.c
    @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id)
     }
     
     static void
    -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
    +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,
    +                      int *all_numa_ids, unsigned all_numa_ids_sz,
    +                      int *num_ids_written)
     {
         struct dp_netdev_pmd_thread *pmd;
         struct rr_numa *numa;
    +    unsigned idx = 0;
     
         hmap_init(&rr->numas);
     
    @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
             numa->n_pmds++;
             numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
             numa->pmds[numa->n_pmds - 1] = pmd;
    +
    +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;
    +        idx++;
         }
    +    *num_ids_written = idx;
     }
     
     static struct dp_netdev_pmd_thread *
    @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
     {
         struct dp_netdev_port *port;
         struct rr_numa_list rr;
    +    int all_numa_ids [64];
    +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];
    +    unsigned all_numa_ids_idx = 0;
    +    int all_numa_ids_max_idx = 0;
    +    int num_numa_ids = 0;
     
    -    rr_numa_list_populate(dp, &rr);
    +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,
    +                          &num_numa_ids);
    +    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz - 1);
     
         HMAP_FOR_EACH (port, node, &dp->ports) {
             struct rr_numa *numa;
    @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
                     }
                 } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
                     if (!numa) {
    -                    VLOG_WARN("There's no available (non isolated) pmd thread "
    +                    if (all_numa_ids_max_idx < 0) {
    +                        VLOG_ERR("There is no available (non-isolated) pmd "
    +                                 "thread for port \'%s\' queue %d. This queue "
    +                                 "will not be polled. Is pmd-cpu-mask set to "
    +                                 "zero? Or are all PMDs isolated to other "
    +                                 "queues?", netdev_get_name(port->netdev),
    +                                 qid);
    +                        continue;
    +                    }
    +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];
    +                    struct rr_numa *alt_numa;
    +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);
    +                    q->pmd = rr_numa_get_pmd(alt_numa);
    +                    VLOG_WARN("There's no available (non-isolated) pmd thread "
                                   "on numa node %d. Queue %d on port \'%s\' will "
    -                              "not be polled.",
    -                              numa_id, qid, netdev_get_name(port->netdev));
    +                              "be assigned to the pmd on core %d "
    +                              "(numa node %d). Expect reduced performance.",
    +                              numa_id, qid, netdev_get_name(port->netdev),
    +                              q->pmd->core_id, q->pmd->numa_id);
    +                    all_numa_ids_idx++;
    +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {
    +                        all_numa_ids_idx = 0;
    +                    }
                     } else {
                         q->pmd = rr_numa_get_pmd(numa);
                     }
    -- 
    2.7.4
    
    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Billy O'Mahony June 26, 2017, 1:52 p.m. UTC | #4
Hi Darrell,

Thanks for reviewing.

> -----Original Message-----
> From: Darrell Ball [mailto:dball@vmware.com]
> Sent: Monday, June 26, 2017 8:04 AM
> To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-
> local numa node.
> 
> I think this is helpful in some cases where lower performance is an
> acceptable tradeoff with more frugal and/or more flexible usage of cpu
> resources.
> 
> I did not test it since Ian has already done that, but I reviewed the code
> change and other related code.
> 
> One comment inline regarding the added documentation.
> 
> Otherwise, Acked-by: Darrell Ball <dlu998@gmail.com>
> 
> 
> 
> On 5/10/17, 8:59 AM, "ovs-dev-bounces@openvswitch.org on behalf of Billy
> O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of
> billy.o.mahony@intel.com> wrote:
> 
>     From: billyom <billy.o.mahony@intel.com>
> 
>     Previously if there is no available (non-isolated) pmd on the numa node
>     for a port then the port is not polled at all. This can result in a
>     non-operational system until such time as nics are physically
>     repositioned. It is preferable to operate with a pmd on the 'wrong' numa
>     node albeit with lower performance. Local pmds are still chosen when
>     available.
> 
>     Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>
>     ---
>     v6: Change 'port' to 'queue' in a warning msg
>     v5: Fix warning msg; Update same in docs
>     v4: Fix a checkpatch error
>     v3: Fix warning messages not appearing when using multiqueue
>     v2: Add details of warning messages into docs
> 
>      Documentation/intro/install/dpdk.rst | 10 +++++++++
>      lib/dpif-netdev.c                    | 43 +++++++++++++++++++++++++++++++---
> --
>      2 files changed, 48 insertions(+), 5 deletions(-)
> 
>     diff --git a/Documentation/intro/install/dpdk.rst
> b/Documentation/intro/install/dpdk.rst
>     index d1c0e65..7a66bff 100644
>     --- a/Documentation/intro/install/dpdk.rst
>     +++ b/Documentation/intro/install/dpdk.rst
>     @@ -460,6 +460,16 @@ affinitized accordingly.
>          pmd thread on a NUMA node is only created if there is at least one DPDK
>          interface from that NUMA node added to OVS.
> 
>     +  .. note::
>     +   On NUMA systems PCI devices are also local to a NUMA node.  Rx
> queues for
>     +   PCI device will assigned to a pmd on it's local NUMA node if pmd-cpu-
> mask
>     +   has created a pmd thread on that NUMA node.  If not the queue will be
>     +   assigned to a pmd on a remote NUMA node.
> 
> 
> 
> I think the below text is a bit more accurate
> +  .. note::
> +   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues
> for
> +   a PCI device will be assigned to a pmd on it's local NUMA node. 

[[BO'M]] 
Re suggested sentence: "Rx queues for a PCI device will be assigned to a pmd on it's local NUMA node."
However, with this patch that is no longer always the case. Assignment to a pmd on it's local NUMA node will be preferred but it that is not possible then assignment will be done to a PMD on a non-local NUMA node. 

Is the suggested change in order to emphasize that a PMD is created if specified in pmd-cpu-mask but not actually scheduled by the OS unless at least one rxq is assigned to it? That case is covered in the preceeding paragraph (which is not modified by this patch) "A pmd thread on a NUMA node is only created if there is at least one DPDK interface from that NUMA node added to OVS."  Which, now I read it should be modified to read "A PMD thread only becomes runnable if there is at least one DPDK interface assigned to it." 

 A pmd is
> +   created if at least one dpdk interface is added to OVS on that NUMA node
> or
> +   if the pmd-cpu-mask has created a pmd thread on that NUMA node.

 [[BO'M]] 
Is this part of the suggested change in order to emphasize that a PMD is created if specified in pmd-cpu-mask but not actually scheduled by the OS unless at least one rxq is assigned to it? (introduced 2788a1b). That detail is covered somewhat in the preceding paragraph (which is not modified by this patch) "A pmd thread on a NUMA node is only created if there is at least one DPDK interface from that NUMA node added to OVS."  

Which, now I read it should be modified to actually reflect that. What do you think? 

> 
>     + This will result in reduced
>     +   maximum throughput on that device.  In the case such a queue
> assignment
>     +   is made a warning message will be logged: "There's no available (non-
>     +   isolated) pmd thread on numa node N. Queue Q on port P will be
> assigned to
>     +   the pmd on core C (numa node N'). Expect reduced performance."
>     +
>      - QEMU vCPU thread Affinity
> 
>        A VM performing simple packet forwarding or running complex packet
> pipelines
>     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>     index b3a0806..34f1963 100644
>     --- a/lib/dpif-netdev.c
>     +++ b/lib/dpif-netdev.c
>     @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list *rr,
> int numa_id)
>      }
> 
>      static void
>     -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
>     +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,
>     +                      int *all_numa_ids, unsigned all_numa_ids_sz,
>     +                      int *num_ids_written)
>      {
>          struct dp_netdev_pmd_thread *pmd;
>          struct rr_numa *numa;
>     +    unsigned idx = 0;
> 
>          hmap_init(&rr->numas);
> 
>     @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev *dp,
> struct rr_numa_list *rr)
>              numa->n_pmds++;
>              numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof
> *numa->pmds);
>              numa->pmds[numa->n_pmds - 1] = pmd;
>     +
>     +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;
>     +        idx++;
>          }
>     +    *num_ids_written = idx;
>      }
> 
>      static struct dp_netdev_pmd_thread *
>     @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool
> pinned) OVS_REQUIRES(dp->port_mutex)
>      {
>          struct dp_netdev_port *port;
>          struct rr_numa_list rr;
>     +    int all_numa_ids [64];
>     +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];
>     +    unsigned all_numa_ids_idx = 0;
>     +    int all_numa_ids_max_idx = 0;
>     +    int num_numa_ids = 0;
> 
>     -    rr_numa_list_populate(dp, &rr);
>     +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,
>     +                          &num_numa_ids);
>     +    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz - 1);
> 
>          HMAP_FOR_EACH (port, node, &dp->ports) {
>              struct rr_numa *numa;
>     @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool
> pinned) OVS_REQUIRES(dp->port_mutex)
>                      }
>                  } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
>                      if (!numa) {
>     -                    VLOG_WARN("There's no available (non isolated) pmd thread "
>     +                    if (all_numa_ids_max_idx < 0) {
>     +                        VLOG_ERR("There is no available (non-isolated) pmd "
>     +                                 "thread for port \'%s\' queue %d. This queue "
>     +                                 "will not be polled. Is pmd-cpu-mask set to "
>     +                                 "zero? Or are all PMDs isolated to other "
>     +                                 "queues?", netdev_get_name(port->netdev),
>     +                                 qid);
>     +                        continue;
>     +                    }
>     +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];
>     +                    struct rr_numa *alt_numa;
>     +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);
>     +                    q->pmd = rr_numa_get_pmd(alt_numa);
>     +                    VLOG_WARN("There's no available (non-isolated) pmd thread "
>                                    "on numa node %d. Queue %d on port \'%s\' will "
>     -                              "not be polled.",
>     -                              numa_id, qid, netdev_get_name(port->netdev));
>     +                              "be assigned to the pmd on core %d "
>     +                              "(numa node %d). Expect reduced performance.",
>     +                              numa_id, qid, netdev_get_name(port->netdev),
>     +                              q->pmd->core_id, q->pmd->numa_id);
>     +                    all_numa_ids_idx++;
>     +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {
>     +                        all_numa_ids_idx = 0;
>     +                    }
>                      } else {
>                          q->pmd = rr_numa_get_pmd(numa);
>                      }
>     --
>     2.7.4
> 
>     _______________________________________________
>     dev mailing list
>     dev@openvswitch.org
>     https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Darrell Ball June 26, 2017, 3:53 p.m. UTC | #5
On 6/26/17, 6:52 AM, "O Mahony, Billy" <billy.o.mahony@intel.com> wrote:

    Hi Darrell,
    
    
    
    Thanks for reviewing.
    
    
    
    > -----Original Message-----

    
    > From: Darrell Ball [mailto:dball@vmware.com]

    
    > Sent: Monday, June 26, 2017 8:04 AM

    
    > To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

    
    > Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-

    
    > local numa node.

    
    > 

    
    > I think this is helpful in some cases where lower performance is an

    
    > acceptable tradeoff with more frugal and/or more flexible usage of cpu

    
    > resources.

    
    > 

    
    > I did not test it since Ian has already done that, but I reviewed the code

    
    > change and other related code.

    
    > 

    
    > One comment inline regarding the added documentation.

    
    > 

    
    > Otherwise, Acked-by: Darrell Ball <dlu998@gmail.com>

    
    > 

    
    > 

    
    > 

    
    > On 5/10/17, 8:59 AM, "ovs-dev-bounces@openvswitch.org on behalf of Billy

    
    > O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of

    
    > billy.o.mahony@intel.com> wrote:

    
    > 

    
    >     From: billyom <billy.o.mahony@intel.com>

    
    > 

    
    >     Previously if there is no available (non-isolated) pmd on the numa node

    
    >     for a port then the port is not polled at all. This can result in a

    
    >     non-operational system until such time as nics are physically

    
    >     repositioned. It is preferable to operate with a pmd on the 'wrong' numa

    
    >     node albeit with lower performance. Local pmds are still chosen when

    
    >     available.

    
    > 

    
    >     Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>

    
    >     ---

    
    >     v6: Change 'port' to 'queue' in a warning msg

    
    >     v5: Fix warning msg; Update same in docs

    
    >     v4: Fix a checkpatch error

    
    >     v3: Fix warning messages not appearing when using multiqueue

    
    >     v2: Add details of warning messages into docs

    
    > 

    
    >      Documentation/intro/install/dpdk.rst | 10 +++++++++

    
    >      lib/dpif-netdev.c                    | 43 +++++++++++++++++++++++++++++++---

    
    > --

    
    >      2 files changed, 48 insertions(+), 5 deletions(-)

    
    > 

    
    >     diff --git a/Documentation/intro/install/dpdk.rst

    
    > b/Documentation/intro/install/dpdk.rst

    
    >     index d1c0e65..7a66bff 100644

    
    >     --- a/Documentation/intro/install/dpdk.rst

    
    >     +++ b/Documentation/intro/install/dpdk.rst

    
    >     @@ -460,6 +460,16 @@ affinitized accordingly.

    
    >          pmd thread on a NUMA node is only created if there is at least one DPDK

    
    >          interface from that NUMA node added to OVS.

    
    > 

    
    >     +  .. note::

    
    >     +   On NUMA systems PCI devices are also local to a NUMA node.  Rx

    
    > queues for

    
    >     +   PCI device will assigned to a pmd on it's local NUMA node if pmd-cpu-

    
    > mask

    
    >     +   has created a pmd thread on that NUMA node.  If not the queue will be

    
    >     +   assigned to a pmd on a remote NUMA node.

    
    > 

    
    > 

    
    > 

    
    > I think the below text is a bit more accurate

    
    > +  .. note::

    
    > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues

    
    > for

    
    > +   a PCI device will be assigned to a pmd on it's local NUMA node. 

    
    
    
    [[BO'M]] 
    
    Re suggested sentence: "Rx queues for a PCI device will be assigned to a pmd on it's local NUMA node."
    
    However, with this patch that is no longer always the case. Assignment to a pmd on it's local NUMA node will be preferred but it that is not possible then assignment will be done to a PMD on a non-local NUMA node. 
    
    
    
    Is the suggested change in order to emphasize that a PMD is created if specified in pmd-cpu-mask but not actually scheduled by the OS unless at least one rxq is assigned to it? That case is covered in the preceeding paragraph (which is not modified by this patch) "A pmd thread on a NUMA node is only created if there is at least one DPDK interface from that NUMA node added to OVS."  Which, now I read it should be modified to read "A PMD thread only becomes runnable if there is at least one DPDK interface assigned to it." 
    

[Darrell]
I unintentionally deleted the below line of text from the beginning of the following paragraph – “my bad”.
“If not the queue will be assigned to a pmd on a remote NUMA node. “



The full text was intended to be

+  .. note::
+   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues for
+   a PCI device will be assigned to a pmd on it's local NUMA node.  A pmd is
+   created if at least one dpdk interface is added to OVS on that NUMA node or
+   if the pmd-cpu-mask has created a pmd thread on that NUMA node.  If not the queue will be
+   assigned to a pmd on a remote NUMA node. This will result in reduced
+   maximum throughput on that device.  In the case such a queue assignment
+   is made a warning message will be logged: "There's no available (non-
+   isolated) pmd thread on numa node N. Queue Q on port P will be assigned to
+   the pmd on core C (numa node N'). Expect reduced performance."
+

which is not totally accurate.

Also, as you pointed out, I also added a redundant line from the above paragraph
I expected one 
.. note::
and you added a second.

This is getting confusing.
How about dividing the thread and queue discussions more cleanly
and adding more context.

  .. note::
    A pmd thread on a NUMA node is only created if there is at least one DPDK
    interface from that NUMA node added to OVS. A pmd thread is created by
    default on a core of a NUMA node or when a specified pmd-cpu-mask has
    indicated so.

  .. note::
   On NUMA systems PCI devices are also local to a NUMA node.  Unbound Rx
   queues for a PCI device will assigned to a pmd on it's local NUMA node if a
   non-isolated PMD exists on that NUMA node.  If not, the queue will be
   assigned to a non-isolated pmd on a remote NUMA node.  This will result in
   reduced maximum throughput on that device.  In the case such, a queue
   assignment is made a warning message will be logged: "There's no available
   (non-isolated) pmd thread on numa node N. Queue Q on port P will
   be assigned to the pmd on core C (numa node N'). Expect reduced performance."


    
     A pmd is
    
    > +   created if at least one dpdk interface is added to OVS on that NUMA node

    
    > or

    
    > +   if the pmd-cpu-mask has created a pmd thread on that NUMA node.

    
    
    
     [[BO'M]] 
    
    Is this part of the suggested change in order to emphasize that a PMD is created if specified in pmd-cpu-mask but not actually scheduled by the OS unless at least one rxq is assigned to it? (introduced 2788a1b). That detail is covered somewhat in the preceding paragraph (which is not modified by this patch) "A pmd thread on a NUMA node is only created if there is at least one DPDK interface from that NUMA node added to OVS."  


    
    Which, now I read it should be modified to actually reflect that. What do you think? 


[Darrell] see above comment

    
    
    
    > 

    
    >     + This will result in reduced

    
    >     +   maximum throughput on that device.  In the case such a queue

    
    > assignment

    
    >     +   is made a warning message will be logged: "There's no available (non-

    
    >     +   isolated) pmd thread on numa node N. Queue Q on port P will be

    
    > assigned to

    
    >     +   the pmd on core C (numa node N'). Expect reduced performance."

    
    >     +

    
    >      - QEMU vCPU thread Affinity

    
    > 

    
    >        A VM performing simple packet forwarding or running complex packet

    
    > pipelines

    
    >     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

    
    >     index b3a0806..34f1963 100644

    
    >     --- a/lib/dpif-netdev.c

    
    >     +++ b/lib/dpif-netdev.c

    
    >     @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list *rr,

    
    > int numa_id)

    
    >      }

    
    > 

    
    >      static void

    
    >     -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)

    
    >     +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,

    
    >     +                      int *all_numa_ids, unsigned all_numa_ids_sz,

    
    >     +                      int *num_ids_written)

    
    >      {

    
    >          struct dp_netdev_pmd_thread *pmd;

    
    >          struct rr_numa *numa;

    
    >     +    unsigned idx = 0;

    
    > 

    
    >          hmap_init(&rr->numas);

    
    > 

    
    >     @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev *dp,

    
    > struct rr_numa_list *rr)

    
    >              numa->n_pmds++;

    
    >              numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof

    
    > *numa->pmds);

    
    >              numa->pmds[numa->n_pmds - 1] = pmd;

    
    >     +

    
    >     +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;

    
    >     +        idx++;

    
    >          }

    
    >     +    *num_ids_written = idx;

    
    >      }

    
    > 

    
    >      static struct dp_netdev_pmd_thread *

    
    >     @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool

    
    > pinned) OVS_REQUIRES(dp->port_mutex)

    
    >      {

    
    >          struct dp_netdev_port *port;

    
    >          struct rr_numa_list rr;

    
    >     +    int all_numa_ids [64];

    
    >     +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];

    
    >     +    unsigned all_numa_ids_idx = 0;

    
    >     +    int all_numa_ids_max_idx = 0;

    
    >     +    int num_numa_ids = 0;

    
    > 

    
    >     -    rr_numa_list_populate(dp, &rr);

    
    >     +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,

    
    >     +                          &num_numa_ids);

    
    >     +    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz - 1);

    
    > 

    
    >          HMAP_FOR_EACH (port, node, &dp->ports) {

    
    >              struct rr_numa *numa;

    
    >     @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool

    
    > pinned) OVS_REQUIRES(dp->port_mutex)

    
    >                      }

    
    >                  } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {

    
    >                      if (!numa) {

    
    >     -                    VLOG_WARN("There's no available (non isolated) pmd thread "

    
    >     +                    if (all_numa_ids_max_idx < 0) {

    
    >     +                        VLOG_ERR("There is no available (non-isolated) pmd "

    
    >     +                                 "thread for port \'%s\' queue %d. This queue "

    
    >     +                                 "will not be polled. Is pmd-cpu-mask set to "

    
    >     +                                 "zero? Or are all PMDs isolated to other "

    
    >     +                                 "queues?", netdev_get_name(port->netdev),

    
    >     +                                 qid);

    
    >     +                        continue;

    
    >     +                    }

    
    >     +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];

    
    >     +                    struct rr_numa *alt_numa;

    
    >     +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);

    
    >     +                    q->pmd = rr_numa_get_pmd(alt_numa);

    
    >     +                    VLOG_WARN("There's no available (non-isolated) pmd thread "

    
    >                                    "on numa node %d. Queue %d on port \'%s\' will "

    
    >     -                              "not be polled.",

    
    >     -                              numa_id, qid, netdev_get_name(port->netdev));

    
    >     +                              "be assigned to the pmd on core %d "

    
    >     +                              "(numa node %d). Expect reduced performance.",

    
    >     +                              numa_id, qid, netdev_get_name(port->netdev),

    
    >     +                              q->pmd->core_id, q->pmd->numa_id);

    
    >     +                    all_numa_ids_idx++;

    
    >     +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {

    
    >     +                        all_numa_ids_idx = 0;

    
    >     +                    }

    
    >                      } else {

    
    >                          q->pmd = rr_numa_get_pmd(numa);

    
    >                      }

    
    >     --

    
    >     2.7.4

    
    > 

    
    >     _______________________________________________

    
    >     dev mailing list

    
    >     dev@openvswitch.org

    
    >     https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=gjJwFxs-cPdXvKIlpDPy6Wk6vbKVeJuDjxVKrmLbI_E&s=iK8lUsArOqfRzfqIj6IYOGIAO03YY-a3eKUDZWt18Ig&e= 

    
    >
Billy O'Mahony June 26, 2017, 3:57 p.m. UTC | #6
Sounds good. I'll incorporate those changes. 

> -----Original Message-----

> From: Darrell Ball [mailto:dball@vmware.com]

> Sent: Monday, June 26, 2017 4:53 PM

> To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

> Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-

> local numa node.

> 

> 

> 

> On 6/26/17, 6:52 AM, "O Mahony, Billy" <billy.o.mahony@intel.com> wrote:

> 

>     Hi Darrell,

> 

> 

> 

>     Thanks for reviewing.

> 

> 

> 

>     > -----Original Message-----

> 

>     > From: Darrell Ball [mailto:dball@vmware.com]

> 

>     > Sent: Monday, June 26, 2017 8:04 AM

> 

>     > To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

> 

>     > Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on

> non-

> 

>     > local numa node.

> 

>     >

> 

>     > I think this is helpful in some cases where lower performance is an

> 

>     > acceptable tradeoff with more frugal and/or more flexible usage of cpu

> 

>     > resources.

> 

>     >

> 

>     > I did not test it since Ian has already done that, but I reviewed the code

> 

>     > change and other related code.

> 

>     >

> 

>     > One comment inline regarding the added documentation.

> 

>     >

> 

>     > Otherwise, Acked-by: Darrell Ball <dlu998@gmail.com>

> 

>     >

> 

>     >

> 

>     >

> 

>     > On 5/10/17, 8:59 AM, "ovs-dev-bounces@openvswitch.org on behalf of

> Billy

> 

>     > O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of

> 

>     > billy.o.mahony@intel.com> wrote:

> 

>     >

> 

>     >     From: billyom <billy.o.mahony@intel.com>

> 

>     >

> 

>     >     Previously if there is no available (non-isolated) pmd on the numa node

> 

>     >     for a port then the port is not polled at all. This can result in a

> 

>     >     non-operational system until such time as nics are physically

> 

>     >     repositioned. It is preferable to operate with a pmd on the 'wrong'

> numa

> 

>     >     node albeit with lower performance. Local pmds are still chosen when

> 

>     >     available.

> 

>     >

> 

>     >     Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>

> 

>     >     ---

> 

>     >     v6: Change 'port' to 'queue' in a warning msg

> 

>     >     v5: Fix warning msg; Update same in docs

> 

>     >     v4: Fix a checkpatch error

> 

>     >     v3: Fix warning messages not appearing when using multiqueue

> 

>     >     v2: Add details of warning messages into docs

> 

>     >

> 

>     >      Documentation/intro/install/dpdk.rst | 10 +++++++++

> 

>     >      lib/dpif-netdev.c                    | 43

> +++++++++++++++++++++++++++++++---

> 

>     > --

> 

>     >      2 files changed, 48 insertions(+), 5 deletions(-)

> 

>     >

> 

>     >     diff --git a/Documentation/intro/install/dpdk.rst

> 

>     > b/Documentation/intro/install/dpdk.rst

> 

>     >     index d1c0e65..7a66bff 100644

> 

>     >     --- a/Documentation/intro/install/dpdk.rst

> 

>     >     +++ b/Documentation/intro/install/dpdk.rst

> 

>     >     @@ -460,6 +460,16 @@ affinitized accordingly.

> 

>     >          pmd thread on a NUMA node is only created if there is at least one

> DPDK

> 

>     >          interface from that NUMA node added to OVS.

> 

>     >

> 

>     >     +  .. note::

> 

>     >     +   On NUMA systems PCI devices are also local to a NUMA node.  Rx

> 

>     > queues for

> 

>     >     +   PCI device will assigned to a pmd on it's local NUMA node if pmd-

> cpu-

> 

>     > mask

> 

>     >     +   has created a pmd thread on that NUMA node.  If not the queue will

> be

> 

>     >     +   assigned to a pmd on a remote NUMA node.

> 

>     >

> 

>     >

> 

>     >

> 

>     > I think the below text is a bit more accurate

> 

>     > +  .. note::

> 

>     > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx

> queues

> 

>     > for

> 

>     > +   a PCI device will be assigned to a pmd on it's local NUMA node.

> 

> 

> 

>     [[BO'M]]

> 

>     Re suggested sentence: "Rx queues for a PCI device will be assigned to a

> pmd on it's local NUMA node."

> 

>     However, with this patch that is no longer always the case. Assignment to a

> pmd on it's local NUMA node will be preferred but it that is not possible then

> assignment will be done to a PMD on a non-local NUMA node.

> 

> 

> 

>     Is the suggested change in order to emphasize that a PMD is created if

> specified in pmd-cpu-mask but not actually scheduled by the OS unless at

> least one rxq is assigned to it? That case is covered in the preceeding

> paragraph (which is not modified by this patch) "A pmd thread on a NUMA

> node is only created if there is at least one DPDK interface from that NUMA

> node added to OVS."  Which, now I read it should be modified to read "A

> PMD thread only becomes runnable if there is at least one DPDK interface

> assigned to it."

> 

> 

> [Darrell]

> I unintentionally deleted the below line of text from the beginning of the

> following paragraph – “my bad”.

> “If not the queue will be assigned to a pmd on a remote NUMA node. “

> 

> 

> 

> The full text was intended to be

> 

> +  .. note::

> +   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues

> for

> +   a PCI device will be assigned to a pmd on it's local NUMA node.  A pmd is

> +   created if at least one dpdk interface is added to OVS on that NUMA node

> or

> +   if the pmd-cpu-mask has created a pmd thread on that NUMA node.  If

> not the queue will be

> +   assigned to a pmd on a remote NUMA node. This will result in reduced

> +   maximum throughput on that device.  In the case such a queue

> assignment

> +   is made a warning message will be logged: "There's no available (non-

> +   isolated) pmd thread on numa node N. Queue Q on port P will be assigned

> to

> +   the pmd on core C (numa node N'). Expect reduced performance."

> +

> 

> which is not totally accurate.

> 

> Also, as you pointed out, I also added a redundant line from the above

> paragraph I expected one .. note::

> and you added a second.

> 

> This is getting confusing.

> How about dividing the thread and queue discussions more cleanly and

> adding more context.

> 

>   .. note::

>     A pmd thread on a NUMA node is only created if there is at least one DPDK

>     interface from that NUMA node added to OVS. A pmd thread is created by

>     default on a core of a NUMA node or when a specified pmd-cpu-mask has

>     indicated so.

> 

>   .. note::

>    On NUMA systems PCI devices are also local to a NUMA node.  Unbound Rx

>    queues for a PCI device will assigned to a pmd on it's local NUMA node if a

>    non-isolated PMD exists on that NUMA node.  If not, the queue will be

>    assigned to a non-isolated pmd on a remote NUMA node.  This will result in

>    reduced maximum throughput on that device.  In the case such, a queue

>    assignment is made a warning message will be logged: "There's no available

>    (non-isolated) pmd thread on numa node N. Queue Q on port P will

>    be assigned to the pmd on core C (numa node N'). Expect reduced

> performance."

> 

> 

> 

>      A pmd is

> 

>     > +   created if at least one dpdk interface is added to OVS on that NUMA

> node

> 

>     > or

> 

>     > +   if the pmd-cpu-mask has created a pmd thread on that NUMA node.

> 

> 

> 

>      [[BO'M]]

> 

>     Is this part of the suggested change in order to emphasize that a PMD is

> created if specified in pmd-cpu-mask but not actually scheduled by the OS

> unless at least one rxq is assigned to it? (introduced 2788a1b). That detail is

> covered somewhat in the preceding paragraph (which is not modified by this

> patch) "A pmd thread on a NUMA node is only created if there is at least one

> DPDK interface from that NUMA node added to OVS."

> 

> 

> 

>     Which, now I read it should be modified to actually reflect that. What do

> you think?

> 

> 

> [Darrell] see above comment

> 

> 

> 

> 

>     >

> 

>     >     + This will result in reduced

> 

>     >     +   maximum throughput on that device.  In the case such a queue

> 

>     > assignment

> 

>     >     +   is made a warning message will be logged: "There's no available

> (non-

> 

>     >     +   isolated) pmd thread on numa node N. Queue Q on port P will be

> 

>     > assigned to

> 

>     >     +   the pmd on core C (numa node N'). Expect reduced performance."

> 

>     >     +

> 

>     >      - QEMU vCPU thread Affinity

> 

>     >

> 

>     >        A VM performing simple packet forwarding or running complex

> packet

> 

>     > pipelines

> 

>     >     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

> 

>     >     index b3a0806..34f1963 100644

> 

>     >     --- a/lib/dpif-netdev.c

> 

>     >     +++ b/lib/dpif-netdev.c

> 

>     >     @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list

> *rr,

> 

>     > int numa_id)

> 

>     >      }

> 

>     >

> 

>     >      static void

> 

>     >     -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)

> 

>     >     +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,

> 

>     >     +                      int *all_numa_ids, unsigned all_numa_ids_sz,

> 

>     >     +                      int *num_ids_written)

> 

>     >      {

> 

>     >          struct dp_netdev_pmd_thread *pmd;

> 

>     >          struct rr_numa *numa;

> 

>     >     +    unsigned idx = 0;

> 

>     >

> 

>     >          hmap_init(&rr->numas);

> 

>     >

> 

>     >     @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev

> *dp,

> 

>     > struct rr_numa_list *rr)

> 

>     >              numa->n_pmds++;

> 

>     >              numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof

> 

>     > *numa->pmds);

> 

>     >              numa->pmds[numa->n_pmds - 1] = pmd;

> 

>     >     +

> 

>     >     +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;

> 

>     >     +        idx++;

> 

>     >          }

> 

>     >     +    *num_ids_written = idx;

> 

>     >      }

> 

>     >

> 

>     >      static struct dp_netdev_pmd_thread *

> 

>     >     @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool

> 

>     > pinned) OVS_REQUIRES(dp->port_mutex)

> 

>     >      {

> 

>     >          struct dp_netdev_port *port;

> 

>     >          struct rr_numa_list rr;

> 

>     >     +    int all_numa_ids [64];

> 

>     >     +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];

> 

>     >     +    unsigned all_numa_ids_idx = 0;

> 

>     >     +    int all_numa_ids_max_idx = 0;

> 

>     >     +    int num_numa_ids = 0;

> 

>     >

> 

>     >     -    rr_numa_list_populate(dp, &rr);

> 

>     >     +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,

> 

>     >     +                          &num_numa_ids);

> 

>     >     +    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz

> - 1);

> 

>     >

> 

>     >          HMAP_FOR_EACH (port, node, &dp->ports) {

> 

>     >              struct rr_numa *numa;

> 

>     >     @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool

> 

>     > pinned) OVS_REQUIRES(dp->port_mutex)

> 

>     >                      }

> 

>     >                  } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {

> 

>     >                      if (!numa) {

> 

>     >     -                    VLOG_WARN("There's no available (non isolated) pmd

> thread "

> 

>     >     +                    if (all_numa_ids_max_idx < 0) {

> 

>     >     +                        VLOG_ERR("There is no available (non-isolated) pmd "

> 

>     >     +                                 "thread for port \'%s\' queue %d. This queue "

> 

>     >     +                                 "will not be polled. Is pmd-cpu-mask set to "

> 

>     >     +                                 "zero? Or are all PMDs isolated to other "

> 

>     >     +                                 "queues?", netdev_get_name(port->netdev),

> 

>     >     +                                 qid);

> 

>     >     +                        continue;

> 

>     >     +                    }

> 

>     >     +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];

> 

>     >     +                    struct rr_numa *alt_numa;

> 

>     >     +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);

> 

>     >     +                    q->pmd = rr_numa_get_pmd(alt_numa);

> 

>     >     +                    VLOG_WARN("There's no available (non-isolated) pmd

> thread "

> 

>     >                                    "on numa node %d. Queue %d on port \'%s\' will "

> 

>     >     -                              "not be polled.",

> 

>     >     -                              numa_id, qid, netdev_get_name(port->netdev));

> 

>     >     +                              "be assigned to the pmd on core %d "

> 

>     >     +                              "(numa node %d). Expect reduced performance.",

> 

>     >     +                              numa_id, qid, netdev_get_name(port->netdev),

> 

>     >     +                              q->pmd->core_id, q->pmd->numa_id);

> 

>     >     +                    all_numa_ids_idx++;

> 

>     >     +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {

> 

>     >     +                        all_numa_ids_idx = 0;

> 

>     >     +                    }

> 

>     >                      } else {

> 

>     >                          q->pmd = rr_numa_get_pmd(numa);

> 

>     >                      }

> 

>     >     --

> 

>     >     2.7.4

> 

>     >

> 

>     >     _______________________________________________

> 

>     >     dev mailing list

> 

>     >     dev@openvswitch.org

> 

>     >     https://urldefense.proofpoint.com/v2/url?u=https-

> 3A__mail.openvswitch.org_mailman_listinfo_ovs-

> 2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-

> uZnsw&m=gjJwFxs-

> cPdXvKIlpDPy6Wk6vbKVeJuDjxVKrmLbI_E&s=iK8lUsArOqfRzfqIj6IYOGIAO03

> YY-a3eKUDZWt18Ig&e=

> 

>     >

> 

> 

> 

>
Darrell Ball June 26, 2017, 4:08 p.m. UTC | #7
For the last sentence:

I think
“In the case such, a queue assignment is made”
was meant to be
“In case such a queue assignment is made”


On 6/26/17, 8:57 AM, "O Mahony, Billy" <billy.o.mahony@intel.com> wrote:

    Sounds good. I'll incorporate those changes. 
    
    > -----Original Message-----

    > From: Darrell Ball [mailto:dball@vmware.com]

    > Sent: Monday, June 26, 2017 4:53 PM

    > To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

    > Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-

    > local numa node.

    > 

    > 

    > 

    > On 6/26/17, 6:52 AM, "O Mahony, Billy" <billy.o.mahony@intel.com> wrote:

    > 

    >     Hi Darrell,

    > 

    > 

    > 

    >     Thanks for reviewing.

    > 

    > 

    > 

    >     > -----Original Message-----

    > 

    >     > From: Darrell Ball [mailto:dball@vmware.com]

    > 

    >     > Sent: Monday, June 26, 2017 8:04 AM

    > 

    >     > To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

    > 

    >     > Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on

    > non-

    > 

    >     > local numa node.

    > 

    >     >

    > 

    >     > I think this is helpful in some cases where lower performance is an

    > 

    >     > acceptable tradeoff with more frugal and/or more flexible usage of cpu

    > 

    >     > resources.

    > 

    >     >

    > 

    >     > I did not test it since Ian has already done that, but I reviewed the code

    > 

    >     > change and other related code.

    > 

    >     >

    > 

    >     > One comment inline regarding the added documentation.

    > 

    >     >

    > 

    >     > Otherwise, Acked-by: Darrell Ball <dlu998@gmail.com>

    > 

    >     >

    > 

    >     >

    > 

    >     >

    > 

    >     > On 5/10/17, 8:59 AM, "ovs-dev-bounces@openvswitch.org on behalf of

    > Billy

    > 

    >     > O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of

    > 

    >     > billy.o.mahony@intel.com> wrote:

    > 

    >     >

    > 

    >     >     From: billyom <billy.o.mahony@intel.com>

    > 

    >     >

    > 

    >     >     Previously if there is no available (non-isolated) pmd on the numa node

    > 

    >     >     for a port then the port is not polled at all. This can result in a

    > 

    >     >     non-operational system until such time as nics are physically

    > 

    >     >     repositioned. It is preferable to operate with a pmd on the 'wrong'

    > numa

    > 

    >     >     node albeit with lower performance. Local pmds are still chosen when

    > 

    >     >     available.

    > 

    >     >

    > 

    >     >     Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>

    > 

    >     >     ---

    > 

    >     >     v6: Change 'port' to 'queue' in a warning msg

    > 

    >     >     v5: Fix warning msg; Update same in docs

    > 

    >     >     v4: Fix a checkpatch error

    > 

    >     >     v3: Fix warning messages not appearing when using multiqueue

    > 

    >     >     v2: Add details of warning messages into docs

    > 

    >     >

    > 

    >     >      Documentation/intro/install/dpdk.rst | 10 +++++++++

    > 

    >     >      lib/dpif-netdev.c                    | 43

    > +++++++++++++++++++++++++++++++---

    > 

    >     > --

    > 

    >     >      2 files changed, 48 insertions(+), 5 deletions(-)

    > 

    >     >

    > 

    >     >     diff --git a/Documentation/intro/install/dpdk.rst

    > 

    >     > b/Documentation/intro/install/dpdk.rst

    > 

    >     >     index d1c0e65..7a66bff 100644

    > 

    >     >     --- a/Documentation/intro/install/dpdk.rst

    > 

    >     >     +++ b/Documentation/intro/install/dpdk.rst

    > 

    >     >     @@ -460,6 +460,16 @@ affinitized accordingly.

    > 

    >     >          pmd thread on a NUMA node is only created if there is at least one

    > DPDK

    > 

    >     >          interface from that NUMA node added to OVS.

    > 

    >     >

    > 

    >     >     +  .. note::

    > 

    >     >     +   On NUMA systems PCI devices are also local to a NUMA node.  Rx

    > 

    >     > queues for

    > 

    >     >     +   PCI device will assigned to a pmd on it's local NUMA node if pmd-

    > cpu-

    > 

    >     > mask

    > 

    >     >     +   has created a pmd thread on that NUMA node.  If not the queue will

    > be

    > 

    >     >     +   assigned to a pmd on a remote NUMA node.

    > 

    >     >

    > 

    >     >

    > 

    >     >

    > 

    >     > I think the below text is a bit more accurate

    > 

    >     > +  .. note::

    > 

    >     > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx

    > queues

    > 

    >     > for

    > 

    >     > +   a PCI device will be assigned to a pmd on it's local NUMA node.

    > 

    > 

    > 

    >     [[BO'M]]

    > 

    >     Re suggested sentence: "Rx queues for a PCI device will be assigned to a

    > pmd on it's local NUMA node."

    > 

    >     However, with this patch that is no longer always the case. Assignment to a

    > pmd on it's local NUMA node will be preferred but it that is not possible then

    > assignment will be done to a PMD on a non-local NUMA node.

    > 

    > 

    > 

    >     Is the suggested change in order to emphasize that a PMD is created if

    > specified in pmd-cpu-mask but not actually scheduled by the OS unless at

    > least one rxq is assigned to it? That case is covered in the preceeding

    > paragraph (which is not modified by this patch) "A pmd thread on a NUMA

    > node is only created if there is at least one DPDK interface from that NUMA

    > node added to OVS."  Which, now I read it should be modified to read "A

    > PMD thread only becomes runnable if there is at least one DPDK interface

    > assigned to it."

    > 

    > 

    > [Darrell]

    > I unintentionally deleted the below line of text from the beginning of the

    > following paragraph – “my bad”.

    > “If not the queue will be assigned to a pmd on a remote NUMA node. “

    > 

    > 

    > 

    > The full text was intended to be

    > 

    > +  .. note::

    > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues

    > for

    > +   a PCI device will be assigned to a pmd on it's local NUMA node.  A pmd is

    > +   created if at least one dpdk interface is added to OVS on that NUMA node

    > or

    > +   if the pmd-cpu-mask has created a pmd thread on that NUMA node.  If

    > not the queue will be

    > +   assigned to a pmd on a remote NUMA node. This will result in reduced

    > +   maximum throughput on that device.  In the case such a queue

    > assignment

    > +   is made a warning message will be logged: "There's no available (non-

    > +   isolated) pmd thread on numa node N. Queue Q on port P will be assigned

    > to

    > +   the pmd on core C (numa node N'). Expect reduced performance."

    > +

    > 

    > which is not totally accurate.

    > 

    > Also, as you pointed out, I also added a redundant line from the above

    > paragraph I expected one .. note::

    > and you added a second.

    > 

    > This is getting confusing.

    > How about dividing the thread and queue discussions more cleanly and

    > adding more context.

    > 

    >   .. note::

    >     A pmd thread on a NUMA node is only created if there is at least one DPDK

    >     interface from that NUMA node added to OVS. A pmd thread is created by

    >     default on a core of a NUMA node or when a specified pmd-cpu-mask has

    >     indicated so.

    > 

    >   .. note::

    >    On NUMA systems PCI devices are also local to a NUMA node.  Unbound Rx

    >    queues for a PCI device will assigned to a pmd on it's local NUMA node if a

    >    non-isolated PMD exists on that NUMA node.  If not, the queue will be

    >    assigned to a non-isolated pmd on a remote NUMA node.  This will result in

    >    reduced maximum throughput on that device.  In the case such, a queue

    >    assignment is made a warning message will be logged: "There's no available

    >    (non-isolated) pmd thread on numa node N. Queue Q on port P will

    >    be assigned to the pmd on core C (numa node N'). Expect reduced

    > performance."

    > 

    > 

    > 

    >      A pmd is

    > 

    >     > +   created if at least one dpdk interface is added to OVS on that NUMA

    > node

    > 

    >     > or

    > 

    >     > +   if the pmd-cpu-mask has created a pmd thread on that NUMA node.

    > 

    > 

    > 

    >      [[BO'M]]

    > 

    >     Is this part of the suggested change in order to emphasize that a PMD is

    > created if specified in pmd-cpu-mask but not actually scheduled by the OS

    > unless at least one rxq is assigned to it? (introduced 2788a1b). That detail is

    > covered somewhat in the preceding paragraph (which is not modified by this

    > patch) "A pmd thread on a NUMA node is only created if there is at least one

    > DPDK interface from that NUMA node added to OVS."

    > 

    > 

    > 

    >     Which, now I read it should be modified to actually reflect that. What do

    > you think?

    > 

    > 

    > [Darrell] see above comment

    > 

    > 

    > 

    > 

    >     >

    > 

    >     >     + This will result in reduced

    > 

    >     >     +   maximum throughput on that device.  In the case such a queue

    > 

    >     > assignment

    > 

    >     >     +   is made a warning message will be logged: "There's no available

    > (non-

    > 

    >     >     +   isolated) pmd thread on numa node N. Queue Q on port P will be

    > 

    >     > assigned to

    > 

    >     >     +   the pmd on core C (numa node N'). Expect reduced performance."

    > 

    >     >     +

    > 

    >     >      - QEMU vCPU thread Affinity

    > 

    >     >

    > 

    >     >        A VM performing simple packet forwarding or running complex

    > packet

    > 

    >     > pipelines

    > 

    >     >     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

    > 

    >     >     index b3a0806..34f1963 100644

    > 

    >     >     --- a/lib/dpif-netdev.c

    > 

    >     >     +++ b/lib/dpif-netdev.c

    > 

    >     >     @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct rr_numa_list

    > *rr,

    > 

    >     > int numa_id)

    > 

    >     >      }

    > 

    >     >

    > 

    >     >      static void

    > 

    >     >     -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)

    > 

    >     >     +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,

    > 

    >     >     +                      int *all_numa_ids, unsigned all_numa_ids_sz,

    > 

    >     >     +                      int *num_ids_written)

    > 

    >     >      {

    > 

    >     >          struct dp_netdev_pmd_thread *pmd;

    > 

    >     >          struct rr_numa *numa;

    > 

    >     >     +    unsigned idx = 0;

    > 

    >     >

    > 

    >     >          hmap_init(&rr->numas);

    > 

    >     >

    > 

    >     >     @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct dp_netdev

    > *dp,

    > 

    >     > struct rr_numa_list *rr)

    > 

    >     >              numa->n_pmds++;

    > 

    >     >              numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof

    > 

    >     > *numa->pmds);

    > 

    >     >              numa->pmds[numa->n_pmds - 1] = pmd;

    > 

    >     >     +

    > 

    >     >     +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;

    > 

    >     >     +        idx++;

    > 

    >     >          }

    > 

    >     >     +    *num_ids_written = idx;

    > 

    >     >      }

    > 

    >     >

    > 

    >     >      static struct dp_netdev_pmd_thread *

    > 

    >     >     @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp, bool

    > 

    >     > pinned) OVS_REQUIRES(dp->port_mutex)

    > 

    >     >      {

    > 

    >     >          struct dp_netdev_port *port;

    > 

    >     >          struct rr_numa_list rr;

    > 

    >     >     +    int all_numa_ids [64];

    > 

    >     >     +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];

    > 

    >     >     +    unsigned all_numa_ids_idx = 0;

    > 

    >     >     +    int all_numa_ids_max_idx = 0;

    > 

    >     >     +    int num_numa_ids = 0;

    > 

    >     >

    > 

    >     >     -    rr_numa_list_populate(dp, &rr);

    > 

    >     >     +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,

    > 

    >     >     +                          &num_numa_ids);

    > 

    >     >     +    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz

    > - 1);

    > 

    >     >

    > 

    >     >          HMAP_FOR_EACH (port, node, &dp->ports) {

    > 

    >     >              struct rr_numa *numa;

    > 

    >     >     @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp, bool

    > 

    >     > pinned) OVS_REQUIRES(dp->port_mutex)

    > 

    >     >                      }

    > 

    >     >                  } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {

    > 

    >     >                      if (!numa) {

    > 

    >     >     -                    VLOG_WARN("There's no available (non isolated) pmd

    > thread "

    > 

    >     >     +                    if (all_numa_ids_max_idx < 0) {

    > 

    >     >     +                        VLOG_ERR("There is no available (non-isolated) pmd "

    > 

    >     >     +                                 "thread for port \'%s\' queue %d. This queue "

    > 

    >     >     +                                 "will not be polled. Is pmd-cpu-mask set to "

    > 

    >     >     +                                 "zero? Or are all PMDs isolated to other "

    > 

    >     >     +                                 "queues?", netdev_get_name(port->netdev),

    > 

    >     >     +                                 qid);

    > 

    >     >     +                        continue;

    > 

    >     >     +                    }

    > 

    >     >     +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];

    > 

    >     >     +                    struct rr_numa *alt_numa;

    > 

    >     >     +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);

    > 

    >     >     +                    q->pmd = rr_numa_get_pmd(alt_numa);

    > 

    >     >     +                    VLOG_WARN("There's no available (non-isolated) pmd

    > thread "

    > 

    >     >                                    "on numa node %d. Queue %d on port \'%s\' will "

    > 

    >     >     -                              "not be polled.",

    > 

    >     >     -                              numa_id, qid, netdev_get_name(port->netdev));

    > 

    >     >     +                              "be assigned to the pmd on core %d "

    > 

    >     >     +                              "(numa node %d). Expect reduced performance.",

    > 

    >     >     +                              numa_id, qid, netdev_get_name(port->netdev),

    > 

    >     >     +                              q->pmd->core_id, q->pmd->numa_id);

    > 

    >     >     +                    all_numa_ids_idx++;

    > 

    >     >     +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {

    > 

    >     >     +                        all_numa_ids_idx = 0;

    > 

    >     >     +                    }

    > 

    >     >                      } else {

    > 

    >     >                          q->pmd = rr_numa_get_pmd(numa);

    > 

    >     >                      }

    > 

    >     >     --

    > 

    >     >     2.7.4

    > 

    >     >

    > 

    >     >     _______________________________________________

    > 

    >     >     dev mailing list

    > 

    >     >     dev@openvswitch.org

    > 

    >     >     https://urldefense.proofpoint.com/v2/url?u=https-

    > 3A__mail.openvswitch.org_mailman_listinfo_ovs-

    > 2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-

    > uZnsw&m=gjJwFxs-

    > cPdXvKIlpDPy6Wk6vbKVeJuDjxVKrmLbI_E&s=iK8lUsArOqfRzfqIj6IYOGIAO03

    > YY-a3eKUDZWt18Ig&e=

    > 

    >     >

    > 

    > 

    > 

    >
Billy O'Mahony June 26, 2017, 4:40 p.m. UTC | #8
No problem, Darrell.

My network is acting up at the moment so I'll resubmit  the documentation section tomorrow. 

/Billy

> -----Original Message-----

> From: Darrell Ball [mailto:dball@vmware.com]

> Sent: Monday, June 26, 2017 5:08 PM

> To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

> Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on non-

> local numa node.

> 

> For the last sentence:

> 

> I think

> “In the case such, a queue assignment is made”

> was meant to be

> “In case such a queue assignment is made”

> 

> 

> On 6/26/17, 8:57 AM, "O Mahony, Billy" <billy.o.mahony@intel.com> wrote:

> 

>     Sounds good. I'll incorporate those changes.

> 

>     > -----Original Message-----

>     > From: Darrell Ball [mailto:dball@vmware.com]

>     > Sent: Monday, June 26, 2017 4:53 PM

>     > To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

>     > Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds on

> non-

>     > local numa node.

>     >

>     >

>     >

>     > On 6/26/17, 6:52 AM, "O Mahony, Billy" <billy.o.mahony@intel.com>

> wrote:

>     >

>     >     Hi Darrell,

>     >

>     >

>     >

>     >     Thanks for reviewing.

>     >

>     >

>     >

>     >     > -----Original Message-----

>     >

>     >     > From: Darrell Ball [mailto:dball@vmware.com]

>     >

>     >     > Sent: Monday, June 26, 2017 8:04 AM

>     >

>     >     > To: O Mahony, Billy <billy.o.mahony@intel.com>;

> dev@openvswitch.org

>     >

>     >     > Subject: Re: [ovs-dev] [PATCH v6] dpif-netdev: Assign ports to pmds

> on

>     > non-

>     >

>     >     > local numa node.

>     >

>     >     >

>     >

>     >     > I think this is helpful in some cases where lower performance is an

>     >

>     >     > acceptable tradeoff with more frugal and/or more flexible usage of

> cpu

>     >

>     >     > resources.

>     >

>     >     >

>     >

>     >     > I did not test it since Ian has already done that, but I reviewed the

> code

>     >

>     >     > change and other related code.

>     >

>     >     >

>     >

>     >     > One comment inline regarding the added documentation.

>     >

>     >     >

>     >

>     >     > Otherwise, Acked-by: Darrell Ball <dlu998@gmail.com>

>     >

>     >     >

>     >

>     >     >

>     >

>     >     >

>     >

>     >     > On 5/10/17, 8:59 AM, "ovs-dev-bounces@openvswitch.org on behalf

> of

>     > Billy

>     >

>     >     > O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of

>     >

>     >     > billy.o.mahony@intel.com> wrote:

>     >

>     >     >

>     >

>     >     >     From: billyom <billy.o.mahony@intel.com>

>     >

>     >     >

>     >

>     >     >     Previously if there is no available (non-isolated) pmd on the numa

> node

>     >

>     >     >     for a port then the port is not polled at all. This can result in a

>     >

>     >     >     non-operational system until such time as nics are physically

>     >

>     >     >     repositioned. It is preferable to operate with a pmd on the 'wrong'

>     > numa

>     >

>     >     >     node albeit with lower performance. Local pmds are still chosen

> when

>     >

>     >     >     available.

>     >

>     >     >

>     >

>     >     >     Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>

>     >

>     >     >     ---

>     >

>     >     >     v6: Change 'port' to 'queue' in a warning msg

>     >

>     >     >     v5: Fix warning msg; Update same in docs

>     >

>     >     >     v4: Fix a checkpatch error

>     >

>     >     >     v3: Fix warning messages not appearing when using multiqueue

>     >

>     >     >     v2: Add details of warning messages into docs

>     >

>     >     >

>     >

>     >     >      Documentation/intro/install/dpdk.rst | 10 +++++++++

>     >

>     >     >      lib/dpif-netdev.c                    | 43

>     > +++++++++++++++++++++++++++++++---

>     >

>     >     > --

>     >

>     >     >      2 files changed, 48 insertions(+), 5 deletions(-)

>     >

>     >     >

>     >

>     >     >     diff --git a/Documentation/intro/install/dpdk.rst

>     >

>     >     > b/Documentation/intro/install/dpdk.rst

>     >

>     >     >     index d1c0e65..7a66bff 100644

>     >

>     >     >     --- a/Documentation/intro/install/dpdk.rst

>     >

>     >     >     +++ b/Documentation/intro/install/dpdk.rst

>     >

>     >     >     @@ -460,6 +460,16 @@ affinitized accordingly.

>     >

>     >     >          pmd thread on a NUMA node is only created if there is at least

> one

>     > DPDK

>     >

>     >     >          interface from that NUMA node added to OVS.

>     >

>     >     >

>     >

>     >     >     +  .. note::

>     >

>     >     >     +   On NUMA systems PCI devices are also local to a NUMA node.

> Rx

>     >

>     >     > queues for

>     >

>     >     >     +   PCI device will assigned to a pmd on it's local NUMA node if pmd-

>     > cpu-

>     >

>     >     > mask

>     >

>     >     >     +   has created a pmd thread on that NUMA node.  If not the queue

> will

>     > be

>     >

>     >     >     +   assigned to a pmd on a remote NUMA node.

>     >

>     >     >

>     >

>     >     >

>     >

>     >     >

>     >

>     >     > I think the below text is a bit more accurate

>     >

>     >     > +  .. note::

>     >

>     >     > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx

>     > queues

>     >

>     >     > for

>     >

>     >     > +   a PCI device will be assigned to a pmd on it's local NUMA node.

>     >

>     >

>     >

>     >     [[BO'M]]

>     >

>     >     Re suggested sentence: "Rx queues for a PCI device will be assigned to

> a

>     > pmd on it's local NUMA node."

>     >

>     >     However, with this patch that is no longer always the case. Assignment

> to a

>     > pmd on it's local NUMA node will be preferred but it that is not possible

> then

>     > assignment will be done to a PMD on a non-local NUMA node.

>     >

>     >

>     >

>     >     Is the suggested change in order to emphasize that a PMD is created if

>     > specified in pmd-cpu-mask but not actually scheduled by the OS unless at

>     > least one rxq is assigned to it? That case is covered in the preceeding

>     > paragraph (which is not modified by this patch) "A pmd thread on a

> NUMA

>     > node is only created if there is at least one DPDK interface from that

> NUMA

>     > node added to OVS."  Which, now I read it should be modified to read "A

>     > PMD thread only becomes runnable if there is at least one DPDK

> interface

>     > assigned to it."

>     >

>     >

>     > [Darrell]

>     > I unintentionally deleted the below line of text from the beginning of the

>     > following paragraph – “my bad”.

>     > “If not the queue will be assigned to a pmd on a remote NUMA node. “

>     >

>     >

>     >

>     > The full text was intended to be

>     >

>     > +  .. note::

>     > +   On NUMA systems PCI devices are also local to a NUMA node.  Rx

> queues

>     > for

>     > +   a PCI device will be assigned to a pmd on it's local NUMA node.  A pmd

> is

>     > +   created if at least one dpdk interface is added to OVS on that NUMA

> node

>     > or

>     > +   if the pmd-cpu-mask has created a pmd thread on that NUMA node.

> If

>     > not the queue will be

>     > +   assigned to a pmd on a remote NUMA node. This will result in reduced

>     > +   maximum throughput on that device.  In the case such a queue

>     > assignment

>     > +   is made a warning message will be logged: "There's no available (non-

>     > +   isolated) pmd thread on numa node N. Queue Q on port P will be

> assigned

>     > to

>     > +   the pmd on core C (numa node N'). Expect reduced performance."

>     > +

>     >

>     > which is not totally accurate.

>     >

>     > Also, as you pointed out, I also added a redundant line from the above

>     > paragraph I expected one .. note::

>     > and you added a second.

>     >

>     > This is getting confusing.

>     > How about dividing the thread and queue discussions more cleanly and

>     > adding more context.

>     >

>     >   .. note::

>     >     A pmd thread on a NUMA node is only created if there is at least one

> DPDK

>     >     interface from that NUMA node added to OVS. A pmd thread is

> created by

>     >     default on a core of a NUMA node or when a specified pmd-cpu-mask

> has

>     >     indicated so.

>     >

>     >   .. note::

>     >    On NUMA systems PCI devices are also local to a NUMA node.

> Unbound Rx

>     >    queues for a PCI device will assigned to a pmd on it's local NUMA node

> if a

>     >    non-isolated PMD exists on that NUMA node.  If not, the queue will be

>     >    assigned to a non-isolated pmd on a remote NUMA node.  This will

> result in

>     >    reduced maximum throughput on that device.  In the case such, a

> queue

>     >    assignment is made a warning message will be logged: "There's no

> available

>     >    (non-isolated) pmd thread on numa node N. Queue Q on port P will

>     >    be assigned to the pmd on core C (numa node N'). Expect reduced

>     > performance."

>     >

>     >

>     >

>     >      A pmd is

>     >

>     >     > +   created if at least one dpdk interface is added to OVS on that

> NUMA

>     > node

>     >

>     >     > or

>     >

>     >     > +   if the pmd-cpu-mask has created a pmd thread on that NUMA

> node.

>     >

>     >

>     >

>     >      [[BO'M]]

>     >

>     >     Is this part of the suggested change in order to emphasize that a PMD is

>     > created if specified in pmd-cpu-mask but not actually scheduled by the

> OS

>     > unless at least one rxq is assigned to it? (introduced 2788a1b). That detail

> is

>     > covered somewhat in the preceding paragraph (which is not modified by

> this

>     > patch) "A pmd thread on a NUMA node is only created if there is at least

> one

>     > DPDK interface from that NUMA node added to OVS."

>     >

>     >

>     >

>     >     Which, now I read it should be modified to actually reflect that. What

> do

>     > you think?

>     >

>     >

>     > [Darrell] see above comment

>     >

>     >

>     >

>     >

>     >     >

>     >

>     >     >     + This will result in reduced

>     >

>     >     >     +   maximum throughput on that device.  In the case such a queue

>     >

>     >     > assignment

>     >

>     >     >     +   is made a warning message will be logged: "There's no available

>     > (non-

>     >

>     >     >     +   isolated) pmd thread on numa node N. Queue Q on port P will

> be

>     >

>     >     > assigned to

>     >

>     >     >     +   the pmd on core C (numa node N'). Expect reduced

> performance."

>     >

>     >     >     +

>     >

>     >     >      - QEMU vCPU thread Affinity

>     >

>     >     >

>     >

>     >     >        A VM performing simple packet forwarding or running complex

>     > packet

>     >

>     >     > pipelines

>     >

>     >     >     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

>     >

>     >     >     index b3a0806..34f1963 100644

>     >

>     >     >     --- a/lib/dpif-netdev.c

>     >

>     >     >     +++ b/lib/dpif-netdev.c

>     >

>     >     >     @@ -3149,10 +3149,13 @@ rr_numa_list_lookup(struct

> rr_numa_list

>     > *rr,

>     >

>     >     > int numa_id)

>     >

>     >     >      }

>     >

>     >     >

>     >

>     >     >      static void

>     >

>     >     >     -rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list

> *rr)

>     >

>     >     >     +rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list

> *rr,

>     >

>     >     >     +                      int *all_numa_ids, unsigned all_numa_ids_sz,

>     >

>     >     >     +                      int *num_ids_written)

>     >

>     >     >      {

>     >

>     >     >          struct dp_netdev_pmd_thread *pmd;

>     >

>     >     >          struct rr_numa *numa;

>     >

>     >     >     +    unsigned idx = 0;

>     >

>     >     >

>     >

>     >     >          hmap_init(&rr->numas);

>     >

>     >     >

>     >

>     >     >     @@ -3170,7 +3173,11 @@ rr_numa_list_populate(struct

> dp_netdev

>     > *dp,

>     >

>     >     > struct rr_numa_list *rr)

>     >

>     >     >              numa->n_pmds++;

>     >

>     >     >              numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof

>     >

>     >     > *numa->pmds);

>     >

>     >     >              numa->pmds[numa->n_pmds - 1] = pmd;

>     >

>     >     >     +

>     >

>     >     >     +        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;

>     >

>     >     >     +        idx++;

>     >

>     >     >          }

>     >

>     >     >     +    *num_ids_written = idx;

>     >

>     >     >      }

>     >

>     >     >

>     >

>     >     >      static struct dp_netdev_pmd_thread *

>     >

>     >     >     @@ -3202,8 +3209,15 @@ rxq_scheduling(struct dp_netdev *dp,

> bool

>     >

>     >     > pinned) OVS_REQUIRES(dp->port_mutex)

>     >

>     >     >      {

>     >

>     >     >          struct dp_netdev_port *port;

>     >

>     >     >          struct rr_numa_list rr;

>     >

>     >     >     +    int all_numa_ids [64];

>     >

>     >     >     +    int all_numa_ids_sz = sizeof all_numa_ids / sizeof

> all_numa_ids[0];

>     >

>     >     >     +    unsigned all_numa_ids_idx = 0;

>     >

>     >     >     +    int all_numa_ids_max_idx = 0;

>     >

>     >     >     +    int num_numa_ids = 0;

>     >

>     >     >

>     >

>     >     >     -    rr_numa_list_populate(dp, &rr);

>     >

>     >     >     +    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,

>     >

>     >     >     +                          &num_numa_ids);

>     >

>     >     >     +    all_numa_ids_max_idx = MIN(num_numa_ids - 1,

> all_numa_ids_sz

>     > - 1);

>     >

>     >     >

>     >

>     >     >          HMAP_FOR_EACH (port, node, &dp->ports) {

>     >

>     >     >              struct rr_numa *numa;

>     >

>     >     >     @@ -3234,10 +3248,29 @@ rxq_scheduling(struct dp_netdev *dp,

> bool

>     >

>     >     > pinned) OVS_REQUIRES(dp->port_mutex)

>     >

>     >     >                      }

>     >

>     >     >                  } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {

>     >

>     >     >                      if (!numa) {

>     >

>     >     >     -                    VLOG_WARN("There's no available (non isolated) pmd

>     > thread "

>     >

>     >     >     +                    if (all_numa_ids_max_idx < 0) {

>     >

>     >     >     +                        VLOG_ERR("There is no available (non-isolated) pmd "

>     >

>     >     >     +                                 "thread for port \'%s\' queue %d. This queue "

>     >

>     >     >     +                                 "will not be polled. Is pmd-cpu-mask set to "

>     >

>     >     >     +                                 "zero? Or are all PMDs isolated to other "

>     >

>     >     >     +                                 "queues?", netdev_get_name(port->netdev),

>     >

>     >     >     +                                 qid);

>     >

>     >     >     +                        continue;

>     >

>     >     >     +                    }

>     >

>     >     >     +                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];

>     >

>     >     >     +                    struct rr_numa *alt_numa;

>     >

>     >     >     +                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);

>     >

>     >     >     +                    q->pmd = rr_numa_get_pmd(alt_numa);

>     >

>     >     >     +                    VLOG_WARN("There's no available (non-isolated) pmd

>     > thread "

>     >

>     >     >                                    "on numa node %d. Queue %d on port \'%s\' will "

>     >

>     >     >     -                              "not be polled.",

>     >

>     >     >     -                              numa_id, qid, netdev_get_name(port->netdev));

>     >

>     >     >     +                              "be assigned to the pmd on core %d "

>     >

>     >     >     +                              "(numa node %d). Expect reduced performance.",

>     >

>     >     >     +                              numa_id, qid, netdev_get_name(port->netdev),

>     >

>     >     >     +                              q->pmd->core_id, q->pmd->numa_id);

>     >

>     >     >     +                    all_numa_ids_idx++;

>     >

>     >     >     +                    if (all_numa_ids_idx > all_numa_ids_max_idx) {

>     >

>     >     >     +                        all_numa_ids_idx = 0;

>     >

>     >     >     +                    }

>     >

>     >     >                      } else {

>     >

>     >     >                          q->pmd = rr_numa_get_pmd(numa);

>     >

>     >     >                      }

>     >

>     >     >     --

>     >

>     >     >     2.7.4

>     >

>     >     >

>     >

>     >     >     _______________________________________________

>     >

>     >     >     dev mailing list

>     >

>     >     >     dev@openvswitch.org

>     >

>     >     >     https://urldefense.proofpoint.com/v2/url?u=https-

>     > 3A__mail.openvswitch.org_mailman_listinfo_ovs-

>     >

> 2Ddev&d=DwIGaQ&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-

>     > uZnsw&m=gjJwFxs-

>     >

> cPdXvKIlpDPy6Wk6vbKVeJuDjxVKrmLbI_E&s=iK8lUsArOqfRzfqIj6IYOGIAO03

>     > YY-a3eKUDZWt18Ig&e=

>     >

>     >     >

>     >

>     >

>     >

>     >

> 

>
diff mbox

Patch

diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst
index d1c0e65..7a66bff 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -460,6 +460,16 @@  affinitized accordingly.
     pmd thread on a NUMA node is only created if there is at least one DPDK
     interface from that NUMA node added to OVS.
 
+  .. note::
+   On NUMA systems PCI devices are also local to a NUMA node.  Rx queues for
+   PCI device will assigned to a pmd on it's local NUMA node if pmd-cpu-mask
+   has created a pmd thread on that NUMA node.  If not the queue will be
+   assigned to a pmd on a remote NUMA node.  This will result in reduced
+   maximum throughput on that device.  In the case such a queue assignment
+   is made a warning message will be logged: "There's no available (non-
+   isolated) pmd thread on numa node N. Queue Q on port P will be assigned to
+   the pmd on core C (numa node N'). Expect reduced performance."
+
 - QEMU vCPU thread Affinity
 
   A VM performing simple packet forwarding or running complex packet pipelines
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b3a0806..34f1963 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3149,10 +3149,13 @@  rr_numa_list_lookup(struct rr_numa_list *rr, int numa_id)
 }
 
 static void
-rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
+rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr,
+                      int *all_numa_ids, unsigned all_numa_ids_sz,
+                      int *num_ids_written)
 {
     struct dp_netdev_pmd_thread *pmd;
     struct rr_numa *numa;
+    unsigned idx = 0;
 
     hmap_init(&rr->numas);
 
@@ -3170,7 +3173,11 @@  rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
         numa->n_pmds++;
         numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
         numa->pmds[numa->n_pmds - 1] = pmd;
+
+        all_numa_ids[idx % all_numa_ids_sz] = pmd->numa_id;
+        idx++;
     }
+    *num_ids_written = idx;
 }
 
 static struct dp_netdev_pmd_thread *
@@ -3202,8 +3209,15 @@  rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
 {
     struct dp_netdev_port *port;
     struct rr_numa_list rr;
+    int all_numa_ids [64];
+    int all_numa_ids_sz = sizeof all_numa_ids / sizeof all_numa_ids[0];
+    unsigned all_numa_ids_idx = 0;
+    int all_numa_ids_max_idx = 0;
+    int num_numa_ids = 0;
 
-    rr_numa_list_populate(dp, &rr);
+    rr_numa_list_populate(dp, &rr, all_numa_ids, all_numa_ids_sz,
+                          &num_numa_ids);
+    all_numa_ids_max_idx = MIN(num_numa_ids - 1, all_numa_ids_sz - 1);
 
     HMAP_FOR_EACH (port, node, &dp->ports) {
         struct rr_numa *numa;
@@ -3234,10 +3248,29 @@  rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
                 }
             } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
                 if (!numa) {
-                    VLOG_WARN("There's no available (non isolated) pmd thread "
+                    if (all_numa_ids_max_idx < 0) {
+                        VLOG_ERR("There is no available (non-isolated) pmd "
+                                 "thread for port \'%s\' queue %d. This queue "
+                                 "will not be polled. Is pmd-cpu-mask set to "
+                                 "zero? Or are all PMDs isolated to other "
+                                 "queues?", netdev_get_name(port->netdev),
+                                 qid);
+                        continue;
+                    }
+                    int alt_numa_id = all_numa_ids[all_numa_ids_idx];
+                    struct rr_numa *alt_numa;
+                    alt_numa = rr_numa_list_lookup(&rr, alt_numa_id);
+                    q->pmd = rr_numa_get_pmd(alt_numa);
+                    VLOG_WARN("There's no available (non-isolated) pmd thread "
                               "on numa node %d. Queue %d on port \'%s\' will "
-                              "not be polled.",
-                              numa_id, qid, netdev_get_name(port->netdev));
+                              "be assigned to the pmd on core %d "
+                              "(numa node %d). Expect reduced performance.",
+                              numa_id, qid, netdev_get_name(port->netdev),
+                              q->pmd->core_id, q->pmd->numa_id);
+                    all_numa_ids_idx++;
+                    if (all_numa_ids_idx > all_numa_ids_max_idx) {
+                        all_numa_ids_idx = 0;
+                    }
                 } else {
                     q->pmd = rr_numa_get_pmd(numa);
                 }