diff mbox

[ovs-dev,v2,6/7] dpif-netdev: Change pmd selection order.

Message ID 1500627885-503-7-git-send-email-ktraynor@redhat.com
State Superseded
Headers show

Commit Message

Kevin Traynor July 21, 2017, 9:04 a.m. UTC
Up to his point rxqs are sorted by processing cycles they
consumed and assigned to pmds in a round robin manner.

Ian pointed out that on wrap around the most loaded pmd will be
the next one to be assigned an additional rxq and that it would be
better to reverse the pmd order when wraparound occurs.

In other words, change from assigning by rr to assigning in a forward
and reverse cycle through pmds.

Suggested-by: Ian Stokes <ian.stokes@intel.com>
Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
---
 lib/dpif-netdev.c | 21 ++++++++++++++++++++-
 tests/pmd.at      |  2 +-
 2 files changed, 21 insertions(+), 2 deletions(-)

Comments

Stokes, Ian July 22, 2017, 2:52 p.m. UTC | #1
> Up to his point rxqs are sorted by processing cycles they consumed and
> assigned to pmds in a round robin manner.
> 
> Ian pointed out that on wrap around the most loaded pmd will be the next
> one to be assigned an additional rxq and that it would be better to
> reverse the pmd order when wraparound occurs.
> 
> In other words, change from assigning by rr to assigning in a forward and
> reverse cycle through pmds.
> 
> Suggested-by: Ian Stokes <ian.stokes@intel.com>
> Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
> ---
>  lib/dpif-netdev.c | 21 ++++++++++++++++++++-
>  tests/pmd.at      |  2 +-
>  2 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7663dba..31f0ed4
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3230,4 +3230,5 @@ struct rr_numa {
> 
>      int cur_index;
> +    bool idx_inc;
>  };
> 
> @@ -3268,4 +3269,7 @@ rr_numa_list_populate(struct dp_netdev *dp, struct
> rr_numa_list *rr)
>          numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa-
> >pmds);
>          numa->pmds[numa->n_pmds - 1] = pmd;
> +        /* At least one pmd so initialise curr_idx and idx_inc. */
> +        numa->cur_index = 0;
> +        numa->idx_inc = true;
>      }
>  }
> @@ -3274,5 +3278,20 @@ static struct dp_netdev_pmd_thread *
> rr_numa_get_pmd(struct rr_numa *numa)  {
> -    return numa->pmds[numa->cur_index++ % numa->n_pmds];
> +    int numa_idx = numa->cur_index;
> +
> +    if (numa->idx_inc == true) {
> +        if (numa->cur_index == numa->n_pmds-1) {
> +            numa->idx_inc = false;
> +        } else {
> +            numa->cur_index++;
> +        }
> +    } else {
> +        if (numa->cur_index == 0) {
> +            numa->idx_inc = true;
> +        } else {
> +            numa->cur_index--;
> +        }
> +    }
> +    return numa->pmds[numa_idx];
>  }
> 
I like the above approach, as there's a bit more complexity introduced into the numa selection process, I'm wondering does it make sense to add a VLOG_DBG message here regarding the current index and index increments/decrements?

I'm just thinking that this could be an area that has an impact on performance and could be useful for someone to help debug the assignments.

> diff --git a/tests/pmd.at b/tests/pmd.at index f95a016..73a8be1 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -54,5 +54,5 @@ m4_define([CHECK_PMD_THREADS_CREATED], [
> 
>  m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id
> \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
> -m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
> core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)0 2 4
> 6/\1<cleared>/;s/\(queue-id: \)1 3 5 7/\1<cleared>/"])
> +m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
> +core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)1 2 5
> +6/\1<cleared>/;s/\(queue-id: \)0 3 4 7/\1<cleared>/"])
>  m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])
> 
> --
> 1.8.3.1
Kevin Traynor Aug. 1, 2017, 3:52 p.m. UTC | #2
On 07/22/2017 03:52 PM, Stokes, Ian wrote:
>> Up to his point rxqs are sorted by processing cycles they consumed and
>> assigned to pmds in a round robin manner.
>>
>> Ian pointed out that on wrap around the most loaded pmd will be the next
>> one to be assigned an additional rxq and that it would be better to
>> reverse the pmd order when wraparound occurs.
>>
>> In other words, change from assigning by rr to assigning in a forward and
>> reverse cycle through pmds.
>>
>> Suggested-by: Ian Stokes <ian.stokes@intel.com>
>> Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
>> ---
>>  lib/dpif-netdev.c | 21 ++++++++++++++++++++-
>>  tests/pmd.at      |  2 +-
>>  2 files changed, 21 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7663dba..31f0ed4
>> 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -3230,4 +3230,5 @@ struct rr_numa {
>>
>>      int cur_index;
>> +    bool idx_inc;
>>  };
>>
>> @@ -3268,4 +3269,7 @@ rr_numa_list_populate(struct dp_netdev *dp, struct
>> rr_numa_list *rr)
>>          numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa-
>>> pmds);
>>          numa->pmds[numa->n_pmds - 1] = pmd;
>> +        /* At least one pmd so initialise curr_idx and idx_inc. */
>> +        numa->cur_index = 0;
>> +        numa->idx_inc = true;
>>      }
>>  }
>> @@ -3274,5 +3278,20 @@ static struct dp_netdev_pmd_thread *
>> rr_numa_get_pmd(struct rr_numa *numa)  {
>> -    return numa->pmds[numa->cur_index++ % numa->n_pmds];
>> +    int numa_idx = numa->cur_index;
>> +
>> +    if (numa->idx_inc == true) {
>> +        if (numa->cur_index == numa->n_pmds-1) {
>> +            numa->idx_inc = false;
>> +        } else {
>> +            numa->cur_index++;
>> +        }
>> +    } else {
>> +        if (numa->cur_index == 0) {
>> +            numa->idx_inc = true;
>> +        } else {
>> +            numa->cur_index--;
>> +        }
>> +    }
>> +    return numa->pmds[numa_idx];
>>  }
>>
> I like the above approach, as there's a bit more complexity introduced into the numa selection process, I'm wondering does it make sense to add a VLOG_DBG message here regarding the current index and index increments/decrements?
> 
> I'm just thinking that this could be an area that has an impact on performance and could be useful for someone to help debug the assignments.
> 

Good idea. I didn't add it here as this function just knows about the
pmd index. I've added it to the rxq_scheduling function as part of 4/6
where all the information about the pmds, queues, cycles etc. is
available and put it at info level.

>> diff --git a/tests/pmd.at b/tests/pmd.at index f95a016..73a8be1 100644
>> --- a/tests/pmd.at
>> +++ b/tests/pmd.at
>> @@ -54,5 +54,5 @@ m4_define([CHECK_PMD_THREADS_CREATED], [
>>
>>  m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id
>> \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
>> -m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
>> core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)0 2 4
>> 6/\1<cleared>/;s/\(queue-id: \)1 3 5 7/\1<cleared>/"])
>> +m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
>> +core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)1 2 5
>> +6/\1<cleared>/;s/\(queue-id: \)0 3 4 7/\1<cleared>/"])
>>  m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])
>>
>> --
>> 1.8.3.1
>
diff mbox

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7663dba..31f0ed4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3230,4 +3230,5 @@  struct rr_numa {
 
     int cur_index;
+    bool idx_inc;
 };
 
@@ -3268,4 +3269,7 @@  rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
         numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
         numa->pmds[numa->n_pmds - 1] = pmd;
+        /* At least one pmd so initialise curr_idx and idx_inc. */
+        numa->cur_index = 0;
+        numa->idx_inc = true;
     }
 }
@@ -3274,5 +3278,20 @@  static struct dp_netdev_pmd_thread *
 rr_numa_get_pmd(struct rr_numa *numa)
 {
-    return numa->pmds[numa->cur_index++ % numa->n_pmds];
+    int numa_idx = numa->cur_index;
+
+    if (numa->idx_inc == true) {
+        if (numa->cur_index == numa->n_pmds-1) {
+            numa->idx_inc = false;
+        } else {
+            numa->cur_index++;
+        }
+    } else {
+        if (numa->cur_index == 0) {
+            numa->idx_inc = true;
+        } else {
+            numa->cur_index--;
+        }
+    }
+    return numa->pmds[numa_idx];
 }
 
diff --git a/tests/pmd.at b/tests/pmd.at
index f95a016..73a8be1 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -54,5 +54,5 @@  m4_define([CHECK_PMD_THREADS_CREATED], [
 
 m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
-m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)0 2 4 6/\1<cleared>/;s/\(queue-id: \)1 3 5 7/\1<cleared>/"])
+m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)1 2 5 6/\1<cleared>/;s/\(queue-id: \)0 3 4 7/\1<cleared>/"])
 m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])