[ovs-dev,v2,6/7] dpif-netdev: Change pmd selection order.

Message ID	1500627885-503-7-git-send-email-ktraynor@redhat.com
State	Superseded
Headers	show Return-Path: <ovs-dev-bounces@openvswitch.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 747333B728 From: Kevin Traynor <ktraynor@redhat.com> To: dev@openvswitch.org, ian.stokes@intel.com, jan.scheurich@ericsson.com, bhanuprakash.bodireddy@intel.com, ciara.loftus@intel.com Date: Fri, 21 Jul 2017 10:04:44 +0100 Message-Id: <1500627885-503-7-git-send-email-ktraynor@redhat.com> In-Reply-To: <1500627885-503-1-git-send-email-ktraynor@redhat.com> References: <1498653773-13757-1-git-send-email-ktraynor@redhat.com> <1500627885-503-1-git-send-email-ktraynor@redhat.com> Subject: [ovs-dev] [PATCH v2 6/7] dpif-netdev: Change pmd selection order. Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org

Message ID

1500627885-503-7-git-send-email-ktraynor@redhat.com

State

Superseded

Headers

DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 747333B728
From: Kevin Traynor <ktraynor@redhat.com>
To: dev@openvswitch.org, ian.stokes@intel.com, jan.scheurich@ericsson.com,
	bhanuprakash.bodireddy@intel.com, ciara.loftus@intel.com
Date: Fri, 21 Jul 2017 10:04:44 +0100
Message-Id: <1500627885-503-7-git-send-email-ktraynor@redhat.com>
In-Reply-To: <1500627885-503-1-git-send-email-ktraynor@redhat.com>
References: <1498653773-13757-1-git-send-email-ktraynor@redhat.com>
	<1500627885-503-1-git-send-email-ktraynor@redhat.com>
Subject: [ovs-dev] [PATCH v2 6/7] dpif-netdev: Change pmd selection order.
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Sender: ovs-dev-bounces@openvswitch.org
Errors-To: ovs-dev-bounces@openvswitch.org

Commit Message

Kevin Traynor July 21, 2017, 9:04 a.m. UTC

Up to his point rxqs are sorted by processing cycles they
consumed and assigned to pmds in a round robin manner.

Ian pointed out that on wrap around the most loaded pmd will be
the next one to be assigned an additional rxq and that it would be
better to reverse the pmd order when wraparound occurs.

In other words, change from assigning by rr to assigning in a forward
and reverse cycle through pmds.

Suggested-by: Ian Stokes <ian.stokes@intel.com>
Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
---
 lib/dpif-netdev.c | 21 ++++++++++++++++++++-
 tests/pmd.at      |  2 +-
 2 files changed, 21 insertions(+), 2 deletions(-)

Comments

Stokes, Ian July 22, 2017, 2:52 p.m. UTC | #1

> Up to his point rxqs are sorted by processing cycles they consumed and
> assigned to pmds in a round robin manner.
> 
> Ian pointed out that on wrap around the most loaded pmd will be the next
> one to be assigned an additional rxq and that it would be better to
> reverse the pmd order when wraparound occurs.
> 
> In other words, change from assigning by rr to assigning in a forward and
> reverse cycle through pmds.
> 
> Suggested-by: Ian Stokes <ian.stokes@intel.com>
> Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
> ---
>  lib/dpif-netdev.c | 21 ++++++++++++++++++++-
>  tests/pmd.at      |  2 +-
>  2 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7663dba..31f0ed4
> 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -3230,4 +3230,5 @@ struct rr_numa {
> 
>      int cur_index;
> +    bool idx_inc;
>  };
> 
> @@ -3268,4 +3269,7 @@ rr_numa_list_populate(struct dp_netdev *dp, struct
> rr_numa_list *rr)
>          numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa-
> >pmds);
>          numa->pmds[numa->n_pmds - 1] = pmd;
> +        /* At least one pmd so initialise curr_idx and idx_inc. */
> +        numa->cur_index = 0;
> +        numa->idx_inc = true;
>      }
>  }
> @@ -3274,5 +3278,20 @@ static struct dp_netdev_pmd_thread *
> rr_numa_get_pmd(struct rr_numa *numa)  {
> -    return numa->pmds[numa->cur_index++ % numa->n_pmds];
> +    int numa_idx = numa->cur_index;
> +
> +    if (numa->idx_inc == true) {
> +        if (numa->cur_index == numa->n_pmds-1) {
> +            numa->idx_inc = false;
> +        } else {
> +            numa->cur_index++;
> +        }
> +    } else {
> +        if (numa->cur_index == 0) {
> +            numa->idx_inc = true;
> +        } else {
> +            numa->cur_index--;
> +        }
> +    }
> +    return numa->pmds[numa_idx];
>  }
> 
I like the above approach, as there's a bit more complexity introduced into the numa selection process, I'm wondering does it make sense to add a VLOG_DBG message here regarding the current index and index increments/decrements?

I'm just thinking that this could be an area that has an impact on performance and could be useful for someone to help debug the assignments.

> diff --git a/tests/pmd.at b/tests/pmd.at index f95a016..73a8be1 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -54,5 +54,5 @@ m4_define([CHECK_PMD_THREADS_CREATED], [
> 
>  m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id
> \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
> -m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
> core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)0 2 4
> 6/\1<cleared>/;s/\(queue-id: \)1 3 5 7/\1<cleared>/"])
> +m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
> +core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)1 2 5
> +6/\1<cleared>/;s/\(queue-id: \)0 3 4 7/\1<cleared>/"])
>  m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])
> 
> --
> 1.8.3.1

Kevin Traynor Aug. 1, 2017, 3:52 p.m. UTC | #2

On 07/22/2017 03:52 PM, Stokes, Ian wrote:
>> Up to his point rxqs are sorted by processing cycles they consumed and
>> assigned to pmds in a round robin manner.
>>
>> Ian pointed out that on wrap around the most loaded pmd will be the next
>> one to be assigned an additional rxq and that it would be better to
>> reverse the pmd order when wraparound occurs.
>>
>> In other words, change from assigning by rr to assigning in a forward and
>> reverse cycle through pmds.
>>
>> Suggested-by: Ian Stokes <ian.stokes@intel.com>
>> Signed-off-by: Kevin Traynor <ktraynor@redhat.com>
>> ---
>>  lib/dpif-netdev.c | 21 ++++++++++++++++++++-
>>  tests/pmd.at      |  2 +-
>>  2 files changed, 21 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7663dba..31f0ed4
>> 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -3230,4 +3230,5 @@ struct rr_numa {
>>
>>      int cur_index;
>> +    bool idx_inc;
>>  };
>>
>> @@ -3268,4 +3269,7 @@ rr_numa_list_populate(struct dp_netdev *dp, struct
>> rr_numa_list *rr)
>>          numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa-
>>> pmds);
>>          numa->pmds[numa->n_pmds - 1] = pmd;
>> +        /* At least one pmd so initialise curr_idx and idx_inc. */
>> +        numa->cur_index = 0;
>> +        numa->idx_inc = true;
>>      }
>>  }
>> @@ -3274,5 +3278,20 @@ static struct dp_netdev_pmd_thread *
>> rr_numa_get_pmd(struct rr_numa *numa)  {
>> -    return numa->pmds[numa->cur_index++ % numa->n_pmds];
>> +    int numa_idx = numa->cur_index;
>> +
>> +    if (numa->idx_inc == true) {
>> +        if (numa->cur_index == numa->n_pmds-1) {
>> +            numa->idx_inc = false;
>> +        } else {
>> +            numa->cur_index++;
>> +        }
>> +    } else {
>> +        if (numa->cur_index == 0) {
>> +            numa->idx_inc = true;
>> +        } else {
>> +            numa->cur_index--;
>> +        }
>> +    }
>> +    return numa->pmds[numa_idx];
>>  }
>>
> I like the above approach, as there's a bit more complexity introduced into the numa selection process, I'm wondering does it make sense to add a VLOG_DBG message here regarding the current index and index increments/decrements?
> 
> I'm just thinking that this could be an area that has an impact on performance and could be useful for someone to help debug the assignments.
> 

Good idea. I didn't add it here as this function just knows about the
pmd index. I've added it to the rxq_scheduling function as part of 4/6
where all the information about the pmds, queues, cycles etc. is
available and put it at info level.

>> diff --git a/tests/pmd.at b/tests/pmd.at index f95a016..73a8be1 100644
>> --- a/tests/pmd.at
>> +++ b/tests/pmd.at
>> @@ -54,5 +54,5 @@ m4_define([CHECK_PMD_THREADS_CREATED], [
>>
>>  m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id
>> \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
>> -m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
>> core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)0 2 4
>> 6/\1<cleared>/;s/\(queue-id: \)1 3 5 7/\1<cleared>/"])
>> +m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\(
>> +core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)1 2 5
>> +6/\1<cleared>/;s/\(queue-id: \)0 3 4 7/\1<cleared>/"])
>>  m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])
>>
>> --
>> 1.8.3.1
>

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7663dba..31f0ed4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3230,4 +3230,5 @@  struct rr_numa {
 
     int cur_index;
+    bool idx_inc;
 };
 
@@ -3268,4 +3269,7 @@  rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
         numa->pmds = xrealloc(numa->pmds, numa->n_pmds * sizeof *numa->pmds);
         numa->pmds[numa->n_pmds - 1] = pmd;
+        /* At least one pmd so initialise curr_idx and idx_inc. */
+        numa->cur_index = 0;
+        numa->idx_inc = true;
     }
 }
@@ -3274,5 +3278,20 @@  static struct dp_netdev_pmd_thread *
 rr_numa_get_pmd(struct rr_numa *numa)
 {
-    return numa->pmds[numa->cur_index++ % numa->n_pmds];
+    int numa_idx = numa->cur_index;
+
+    if (numa->idx_inc == true) {
+        if (numa->cur_index == numa->n_pmds-1) {
+            numa->idx_inc = false;
+        } else {
+            numa->cur_index++;
+        }
+    } else {
+        if (numa->cur_index == 0) {
+            numa->idx_inc = true;
+        } else {
+            numa->cur_index--;
+        }
+    }
+    return numa->pmds[numa_idx];
 }
 
diff --git a/tests/pmd.at b/tests/pmd.at
index f95a016..73a8be1 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -54,5 +54,5 @@  m4_define([CHECK_PMD_THREADS_CREATED], [
 
 m4_define([SED_NUMA_CORE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/"])
-m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)0 2 4 6/\1<cleared>/;s/\(queue-id: \)1 3 5 7/\1<cleared>/"])
+m4_define([SED_NUMA_CORE_QUEUE_PATTERN], ["s/\(numa_id \)[[0-9]]*\( core_id \)[[0-9]]*:/\1<cleared>\2<cleared>:/;s/\(queue-id: \)1 2 5 6/\1<cleared>/;s/\(queue-id: \)0 3 4 7/\1<cleared>/"])
 m4_define([DUMMY_NUMA], [--dummy-numa="0,0,0,0"])

[ovs-dev,v2,6/7] dpif-netdev: Change pmd selection order.

Commit Message

Comments

Patch