diff mbox

[ovs-dev,v2,5/7] dpif-netdev: Change rxq_scheduling to use rxq processing cycles.

Message ID 1500627885-503-6-git-send-email-ktraynor@redhat.com
State Superseded
Headers show

Commit Message

Kevin Traynor July 21, 2017, 9:04 a.m. UTC
Previously rxqs were assigned to pmds by round robin in
port/queue order.

Now that we have the processing cycles used for existing rxqs,
use that information to try and produced a better balanced
distribution of rxqs across pmds. i.e. given multiple pmds, the
rxqs which have consumed the largest amount of processing cycles
will be placed on different pmds.

The rxqs are sorted by their processing cycles and assigned (in
sorted order) round robin across pmds.

Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
---
 Documentation/howto/dpdk.rst | 10 +++++++
 lib/dpif-netdev.c            | 67 +++++++++++++++++++++++++++++++++++---------
 2 files changed, 63 insertions(+), 14 deletions(-)

Comments

Stokes, Ian July 22, 2017, 2:52 p.m. UTC | #1
> Previously rxqs were assigned to pmds by round robin in port/queue order.
> 
> Now that we have the processing cycles used for existing rxqs, use that
> information to try and produced a better balanced distribution of rxqs
> across pmds. i.e. given multiple pmds, the rxqs which have consumed the
> largest amount of processing cycles will be placed on different pmds.
> 
> The rxqs are sorted by their processing cycles and assigned (in sorted
> order) round robin across pmds.
> 
> Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
> ---
>  Documentation/howto/dpdk.rst | 10 +++++++
>  lib/dpif-netdev.c            | 67 +++++++++++++++++++++++++++++++++++----
> -----
>  2 files changed, 63 insertions(+), 14 deletions(-)
> 
> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
> index af01d3e..d9ac8d3 100644
> --- a/Documentation/howto/dpdk.rst
> +++ b/Documentation/howto/dpdk.rst
> @@ -119,4 +119,14 @@ After that PMD threads on cores where RX queues was
> pinned will become
>    thread.
> 
> +If pmd-rxq-affinity is not set for rxqs, they will be assigned to pmds
> +automatically. The processing cycles that have been required for each
> +rxq will be used where known to assign rxqs with the highest
> +consumption of processing cycles to different pmds.
> +
> +Rxq to pmds assignment takes place whenever there are configuration
> +changes or can be triggered by using::
> +
> +    $ ovs-appctl dpif-netdev/pmd-rxq-rebalance
> +
I think an illustrated example of the expected assignment behavior would be beneficial here to give users a feel for what's happening under the hood.

Something simple like how 4 queues would be distributed over 3 pmds, although this change might make more sense to be rolled in with patch 6 when the pmd selection process is modified.

>  QoS
>  ---
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 185de9b..7663dba
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3289,8 +3289,29 @@ rr_numa_list_destroy(struct rr_numa_list *rr)  }
> 
> +/* Sort Rx Queues by the processing cycles they are consuming. */
> +static int rxq_cycle_sort(const void *a, const void *b) {
> +    struct dp_netdev_rxq * qa;
> +    struct dp_netdev_rxq * qb;
> +
> +    qa = *(struct dp_netdev_rxq **) a;
> +    qb = *(struct dp_netdev_rxq **) b;
> +
> +    if (dp_netdev_rxq_get_cycles(qa, RXQ_CYCLES_PROC_LAST) >=
> +            dp_netdev_rxq_get_cycles(qb, RXQ_CYCLES_PROC_LAST)) {
> +        return -1;
> +    }
> +
> +    return 1;
> +}
> +
>  /* Assign pmds to queues.  If 'pinned' is true, assign pmds to pinned
>   * queues and marks the pmds as isolated.  Otherwise, assign non isolated
>   * pmds to unpinned queues.
>   *
> + * If 'pinned' is false queues will be sorted by processing cycles they
> + are
> + * consuming and then assigned to pmds in round robin order.
> + *
>   * The function doesn't touch the pmd threads, it just stores the
> assignment
>   * in the 'pmd' member of each rxq. */
> @@ -3300,18 +3321,14 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned)
> OVS_REQUIRES(dp->port_mutex)
>      struct dp_netdev_port *port;
>      struct rr_numa_list rr;
> -
> -    rr_numa_list_populate(dp, &rr);
> +    struct dp_netdev_rxq ** rxqs = NULL;
> +    int i, n_rxqs = 0;
> +    struct rr_numa *numa = NULL;
> +    int numa_id;
> 
>      HMAP_FOR_EACH (port, node, &dp->ports) {
> -        struct rr_numa *numa;
> -        int numa_id;
> -
>          if (!netdev_is_pmd(port->netdev)) {
>              continue;
>          }
> 
> -        numa_id = netdev_get_numa_id(port->netdev);
> -        numa = rr_numa_list_lookup(&rr, numa_id);
> -
>          for (int qid = 0; qid < port->n_rxq; qid++) {
>              struct dp_netdev_rxq *q = &port->rxqs[qid]; @@ -3331,17
> +3348,39 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned)
> OVS_REQUIRES(dp->port_mutex)
>                  }
>              } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
> -                if (!numa) {
> -                    VLOG_WARN("There's no available (non isolated) pmd
> thread "
> -                              "on numa node %d. Queue %d on port \'%s\'
> will "
> -                              "not be polled.",
> -                              numa_id, qid, netdev_get_name(port-
> >netdev));
> +                if (n_rxqs == 0) {
> +                    rxqs = xmalloc(sizeof *rxqs);
>                  } else {
> -                    q->pmd = rr_numa_get_pmd(numa);
> +                    rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1));
>                  }
> +                /* Store the queue. */
> +                rxqs[n_rxqs++] = q;
>              }
>          }
>      }
> 
> +    if (n_rxqs > 1) {
> +        /* Sort the queues in order of the processing cycles
> +         * they consumed during their last pmd interval. */
> +        qsort(rxqs, n_rxqs, sizeof *rxqs, rxq_cycle_sort);
> +    }
> +
> +    rr_numa_list_populate(dp, &rr);
> +    /* Assign the sorted queues to pmds in round robin. */
> +    for (i = 0; i < n_rxqs; i++) {
> +        numa_id = netdev_get_numa_id(rxqs[i]->port->netdev);
> +        numa = rr_numa_list_lookup(&rr, numa_id);
> +        if (!numa) {
> +            VLOG_WARN("There's no available (non isolated) pmd thread "
> +                      "on numa node %d. Queue %d on port \'%s\' will "
> +                      "not be polled.",
> +                      numa_id, netdev_rxq_get_queue_id(rxqs[i]->rx),
> +                      netdev_get_name(rxqs[i]->port->netdev));
> +            continue;
> +        }
> +        rxqs[i]->pmd = rr_numa_get_pmd(numa);
> +    }
> +
>      rr_numa_list_destroy(&rr);
> +    free(rxqs);
>  }
> 
> --
> 1.8.3.1
Kevin Traynor Aug. 1, 2017, 3:51 p.m. UTC | #2
On 07/22/2017 03:52 PM, Stokes, Ian wrote:
>> Previously rxqs were assigned to pmds by round robin in port/queue order.
>>
>> Now that we have the processing cycles used for existing rxqs, use that
>> information to try and produced a better balanced distribution of rxqs
>> across pmds. i.e. given multiple pmds, the rxqs which have consumed the
>> largest amount of processing cycles will be placed on different pmds.
>>
>> The rxqs are sorted by their processing cycles and assigned (in sorted
>> order) round robin across pmds.
>>
>> Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
>> ---
>>  Documentation/howto/dpdk.rst | 10 +++++++
>>  lib/dpif-netdev.c            | 67 +++++++++++++++++++++++++++++++++++----
>> -----
>>  2 files changed, 63 insertions(+), 14 deletions(-)
>>
>> diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
>> index af01d3e..d9ac8d3 100644
>> --- a/Documentation/howto/dpdk.rst
>> +++ b/Documentation/howto/dpdk.rst
>> @@ -119,4 +119,14 @@ After that PMD threads on cores where RX queues was
>> pinned will become
>>    thread.
>>
>> +If pmd-rxq-affinity is not set for rxqs, they will be assigned to pmds
>> +automatically. The processing cycles that have been required for each
>> +rxq will be used where known to assign rxqs with the highest
>> +consumption of processing cycles to different pmds.
>> +
>> +Rxq to pmds assignment takes place whenever there are configuration
>> +changes or can be triggered by using::
>> +
>> +    $ ovs-appctl dpif-netdev/pmd-rxq-rebalance
>> +
> I think an illustrated example of the expected assignment behavior would be beneficial here to give users a feel for what's happening under the hood.
> 
> Something simple like how 4 queues would be distributed over 3 pmds, although this change might make more sense to be rolled in with patch 6 when the pmd selection process is modified.
> 

Sure. Yeah, I agree it makes more sense when the algorithm is finalized,
so I added it there.

>>  QoS
>>  ---
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 185de9b..7663dba
>> 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -3289,8 +3289,29 @@ rr_numa_list_destroy(struct rr_numa_list *rr)  }
>>
>> +/* Sort Rx Queues by the processing cycles they are consuming. */
>> +static int rxq_cycle_sort(const void *a, const void *b) {
>> +    struct dp_netdev_rxq * qa;
>> +    struct dp_netdev_rxq * qb;
>> +
>> +    qa = *(struct dp_netdev_rxq **) a;
>> +    qb = *(struct dp_netdev_rxq **) b;
>> +
>> +    if (dp_netdev_rxq_get_cycles(qa, RXQ_CYCLES_PROC_LAST) >=
>> +            dp_netdev_rxq_get_cycles(qb, RXQ_CYCLES_PROC_LAST)) {
>> +        return -1;
>> +    }
>> +
>> +    return 1;
>> +}
>> +
>>  /* Assign pmds to queues.  If 'pinned' is true, assign pmds to pinned
>>   * queues and marks the pmds as isolated.  Otherwise, assign non isolated
>>   * pmds to unpinned queues.
>>   *
>> + * If 'pinned' is false queues will be sorted by processing cycles they
>> + are
>> + * consuming and then assigned to pmds in round robin order.
>> + *
>>   * The function doesn't touch the pmd threads, it just stores the
>> assignment
>>   * in the 'pmd' member of each rxq. */
>> @@ -3300,18 +3321,14 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned)
>> OVS_REQUIRES(dp->port_mutex)
>>      struct dp_netdev_port *port;
>>      struct rr_numa_list rr;
>> -
>> -    rr_numa_list_populate(dp, &rr);
>> +    struct dp_netdev_rxq ** rxqs = NULL;
>> +    int i, n_rxqs = 0;
>> +    struct rr_numa *numa = NULL;
>> +    int numa_id;
>>
>>      HMAP_FOR_EACH (port, node, &dp->ports) {
>> -        struct rr_numa *numa;
>> -        int numa_id;
>> -
>>          if (!netdev_is_pmd(port->netdev)) {
>>              continue;
>>          }
>>
>> -        numa_id = netdev_get_numa_id(port->netdev);
>> -        numa = rr_numa_list_lookup(&rr, numa_id);
>> -
>>          for (int qid = 0; qid < port->n_rxq; qid++) {
>>              struct dp_netdev_rxq *q = &port->rxqs[qid]; @@ -3331,17
>> +3348,39 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned)
>> OVS_REQUIRES(dp->port_mutex)
>>                  }
>>              } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
>> -                if (!numa) {
>> -                    VLOG_WARN("There's no available (non isolated) pmd
>> thread "
>> -                              "on numa node %d. Queue %d on port \'%s\'
>> will "
>> -                              "not be polled.",
>> -                              numa_id, qid, netdev_get_name(port-
>>> netdev));
>> +                if (n_rxqs == 0) {
>> +                    rxqs = xmalloc(sizeof *rxqs);
>>                  } else {
>> -                    q->pmd = rr_numa_get_pmd(numa);
>> +                    rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1));
>>                  }
>> +                /* Store the queue. */
>> +                rxqs[n_rxqs++] = q;
>>              }
>>          }
>>      }
>>
>> +    if (n_rxqs > 1) {
>> +        /* Sort the queues in order of the processing cycles
>> +         * they consumed during their last pmd interval. */
>> +        qsort(rxqs, n_rxqs, sizeof *rxqs, rxq_cycle_sort);
>> +    }
>> +
>> +    rr_numa_list_populate(dp, &rr);
>> +    /* Assign the sorted queues to pmds in round robin. */
>> +    for (i = 0; i < n_rxqs; i++) {
>> +        numa_id = netdev_get_numa_id(rxqs[i]->port->netdev);
>> +        numa = rr_numa_list_lookup(&rr, numa_id);
>> +        if (!numa) {
>> +            VLOG_WARN("There's no available (non isolated) pmd thread "
>> +                      "on numa node %d. Queue %d on port \'%s\' will "
>> +                      "not be polled.",
>> +                      numa_id, netdev_rxq_get_queue_id(rxqs[i]->rx),
>> +                      netdev_get_name(rxqs[i]->port->netdev));
>> +            continue;
>> +        }
>> +        rxqs[i]->pmd = rr_numa_get_pmd(numa);
>> +    }
>> +
>>      rr_numa_list_destroy(&rr);
>> +    free(rxqs);
>>  }
>>
>> --
>> 1.8.3.1
>
diff mbox

Patch

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index af01d3e..d9ac8d3 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -119,4 +119,14 @@  After that PMD threads on cores where RX queues was pinned will become
   thread.
 
+If pmd-rxq-affinity is not set for rxqs, they will be assigned to pmds
+automatically. The processing cycles that have been required for each rxq
+will be used where known to assign rxqs with the highest consumption of
+processing cycles to different pmds.
+
+Rxq to pmds assignment takes place whenever there are configuration changes
+or can be triggered by using::
+
+    $ ovs-appctl dpif-netdev/pmd-rxq-rebalance
+
 QoS
 ---
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 185de9b..7663dba 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3289,8 +3289,29 @@  rr_numa_list_destroy(struct rr_numa_list *rr)
 }
 
+/* Sort Rx Queues by the processing cycles they are consuming. */
+static int
+rxq_cycle_sort(const void *a, const void *b)
+{
+    struct dp_netdev_rxq * qa;
+    struct dp_netdev_rxq * qb;
+
+    qa = *(struct dp_netdev_rxq **) a;
+    qb = *(struct dp_netdev_rxq **) b;
+
+    if (dp_netdev_rxq_get_cycles(qa, RXQ_CYCLES_PROC_LAST) >=
+            dp_netdev_rxq_get_cycles(qb, RXQ_CYCLES_PROC_LAST)) {
+        return -1;
+    }
+
+    return 1;
+}
+
 /* Assign pmds to queues.  If 'pinned' is true, assign pmds to pinned
  * queues and marks the pmds as isolated.  Otherwise, assign non isolated
  * pmds to unpinned queues.
  *
+ * If 'pinned' is false queues will be sorted by processing cycles they are
+ * consuming and then assigned to pmds in round robin order.
+ *
  * The function doesn't touch the pmd threads, it just stores the assignment
  * in the 'pmd' member of each rxq. */
@@ -3300,18 +3321,14 @@  rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
     struct dp_netdev_port *port;
     struct rr_numa_list rr;
-
-    rr_numa_list_populate(dp, &rr);
+    struct dp_netdev_rxq ** rxqs = NULL;
+    int i, n_rxqs = 0;
+    struct rr_numa *numa = NULL;
+    int numa_id;
 
     HMAP_FOR_EACH (port, node, &dp->ports) {
-        struct rr_numa *numa;
-        int numa_id;
-
         if (!netdev_is_pmd(port->netdev)) {
             continue;
         }
 
-        numa_id = netdev_get_numa_id(port->netdev);
-        numa = rr_numa_list_lookup(&rr, numa_id);
-
         for (int qid = 0; qid < port->n_rxq; qid++) {
             struct dp_netdev_rxq *q = &port->rxqs[qid];
@@ -3331,17 +3348,39 @@  rxq_scheduling(struct dp_netdev *dp, bool pinned) OVS_REQUIRES(dp->port_mutex)
                 }
             } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
-                if (!numa) {
-                    VLOG_WARN("There's no available (non isolated) pmd thread "
-                              "on numa node %d. Queue %d on port \'%s\' will "
-                              "not be polled.",
-                              numa_id, qid, netdev_get_name(port->netdev));
+                if (n_rxqs == 0) {
+                    rxqs = xmalloc(sizeof *rxqs);
                 } else {
-                    q->pmd = rr_numa_get_pmd(numa);
+                    rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1));
                 }
+                /* Store the queue. */
+                rxqs[n_rxqs++] = q;
             }
         }
     }
 
+    if (n_rxqs > 1) {
+        /* Sort the queues in order of the processing cycles
+         * they consumed during their last pmd interval. */
+        qsort(rxqs, n_rxqs, sizeof *rxqs, rxq_cycle_sort);
+    }
+
+    rr_numa_list_populate(dp, &rr);
+    /* Assign the sorted queues to pmds in round robin. */
+    for (i = 0; i < n_rxqs; i++) {
+        numa_id = netdev_get_numa_id(rxqs[i]->port->netdev);
+        numa = rr_numa_list_lookup(&rr, numa_id);
+        if (!numa) {
+            VLOG_WARN("There's no available (non isolated) pmd thread "
+                      "on numa node %d. Queue %d on port \'%s\' will "
+                      "not be polled.",
+                      numa_id, netdev_rxq_get_queue_id(rxqs[i]->rx),
+                      netdev_get_name(rxqs[i]->port->netdev));
+            continue;
+        }
+        rxqs[i]->pmd = rr_numa_get_pmd(numa);
+    }
+
     rr_numa_list_destroy(&rr);
+    free(rxqs);
 }