diff mbox series

[net-next] openvswitch: eliminate cpu_used_mask from sw_flow

Message ID 1532678637-15079-1-git-send-email-lirongqing@baidu.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [net-next] openvswitch: eliminate cpu_used_mask from sw_flow | expand

Commit Message

Li RongQing July 27, 2018, 8:03 a.m. UTC
The size of struct cpumask varies with CONFIG_NR_CPUS, some config
CONFIG_NR_CPUS is very larger, like 5120, struct cpumask will take
640 bytes, if there is thousands of flows, it will take lots of
memory

cpu_used_mask has two purposes
1: Assume first cpu as cpu0 which maybe not true; now use
   cpumask_first(cpu_possible_mask)
2: when get/clear statistic, reduce the iteratation; but it
   is not hot path, so use for_each_possible_cpu

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 net/openvswitch/flow.c       | 11 +++++------
 net/openvswitch/flow.h       |  5 ++---
 net/openvswitch/flow_table.c | 11 +++++------
 3 files changed, 12 insertions(+), 15 deletions(-)

Comments

Pravin Shelar July 28, 2018, 7:05 p.m. UTC | #1
On Fri, Jul 27, 2018 at 1:03 AM, Li RongQing <lirongqing@baidu.com> wrote:
> The size of struct cpumask varies with CONFIG_NR_CPUS, some config
> CONFIG_NR_CPUS is very larger, like 5120, struct cpumask will take
> 640 bytes, if there is thousands of flows, it will take lots of
> memory
>
I am fine with removing cpumask bitmap from flow struct.

> cpu_used_mask has two purposes
> 1: Assume first cpu as cpu0 which maybe not true; now use
>    cpumask_first(cpu_possible_mask)

I am not sure about this, most of system would have cpu zero, so why
this change is done in this patch ? This adds overhead of calculating
first cpu when updating stats in fast path.

> 2: when get/clear statistic, reduce the iteratation; but it
>    is not hot path, so use for_each_possible_cpu
>
David Miller July 29, 2018, 6:12 a.m. UTC | #2
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 27 Jul 2018 16:03:57 +0800

> The size of struct cpumask varies with CONFIG_NR_CPUS, some config
> CONFIG_NR_CPUS is very larger, like 5120, struct cpumask will take
> 640 bytes, if there is thousands of flows, it will take lots of
> memory
> 
> cpu_used_mask has two purposes
> 1: Assume first cpu as cpu0 which maybe not true; now use
>    cpumask_first(cpu_possible_mask)
> 2: when get/clear statistic, reduce the iteratation; but it
>    is not hot path, so use for_each_possible_cpu
> 
> Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
> Signed-off-by: Li RongQing <lirongqing@baidu.com>

This seems to completely undo the optimization done by:

commit c4b2bf6b4a35348fe6d1eb06928eb68d7b9d99a9
Author: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date:   Mon Jul 17 23:28:06 2017 -0700

    openvswitch: Optimize operations for OvS flow_stats.

And in that commit message it states clearly that flow_free()
performance matters, and that the iteration over cpu_possible_mask in
the for() loop is the problem.

At a minimum, we can't apply this unless you explain why the
above performance issue won't be reintroudced by your change.

Thank you.
diff mbox series

Patch

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 56b8e7167790..ad580bec00fb 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -85,7 +85,9 @@  void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 		if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
 			flow->stats_last_writer = cpu;
 	} else {
-		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+		int cpu1 = cpumask_first(cpu_possible_mask);
+
+		stats = rcu_dereference(flow->stats[cpu1]); /* Pre-allocated. */
 		spin_lock(&stats->lock);
 
 		/* If the current CPU is the only writer on the
@@ -118,7 +120,6 @@  void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 
 					rcu_assign_pointer(flow->stats[cpu],
 							   new_stats);
-					cpumask_set_cpu(cpu, &flow->cpu_used_mask);
 					goto unlock;
 				}
 			}
@@ -145,8 +146,7 @@  void ovs_flow_stats_get(const struct sw_flow *flow,
 	*tcp_flags = 0;
 	memset(ovs_stats, 0, sizeof(*ovs_stats));
 
-	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+	for_each_possible_cpu(cpu) {
 		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
@@ -169,8 +169,7 @@  void ovs_flow_stats_clear(struct sw_flow *flow)
 {
 	int cpu;
 
-	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+	for_each_possible_cpu(cpu) {
 		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index c670dd24b8b7..d0ea5d6ced3e 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -223,17 +223,16 @@  struct sw_flow {
 		u32 hash;
 	} flow_table, ufid_table;
 	int stats_last_writer;		/* CPU id of the last writer on
-					 * 'stats[0]'.
+					 * 'stats[first cpu id]'.
 					 */
 	struct sw_flow_key key;
 	struct sw_flow_id id;
-	struct cpumask cpu_used_mask;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
 					   * is allocated at flow creation time,
 					   * the rest are allocated on demand
-					   * while holding the 'stats[0].lock'.
+					   * while holding the 'stats[first cpu id].lock'
 					   */
 };
 
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 80ea2a71852e..e4dbd65c308a 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -80,6 +80,7 @@  struct sw_flow *ovs_flow_alloc(void)
 {
 	struct sw_flow *flow;
 	struct flow_stats *stats;
+	int cpu = cpumask_first(cpu_possible_mask);
 
 	flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
 	if (!flow)
@@ -90,15 +91,13 @@  struct sw_flow *ovs_flow_alloc(void)
 	/* Initialize the default stat node. */
 	stats = kmem_cache_alloc_node(flow_stats_cache,
 				      GFP_KERNEL | __GFP_ZERO,
-				      node_online(0) ? 0 : NUMA_NO_NODE);
+				      cpu_to_node(cpu));
 	if (!stats)
 		goto err;
 
 	spin_lock_init(&stats->lock);
 
-	RCU_INIT_POINTER(flow->stats[0], stats);
-
-	cpumask_set_cpu(0, &flow->cpu_used_mask);
+	RCU_INIT_POINTER(flow->stats[cpu], stats);
 
 	return flow;
 err:
@@ -142,11 +141,11 @@  static void flow_free(struct sw_flow *flow)
 		kfree(flow->id.unmasked_key);
 	if (flow->sf_acts)
 		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
-	/* We open code this to make sure cpu 0 is always considered */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
+	for_each_possible_cpu(cpu) {
 		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
 					(struct flow_stats __force *)flow->stats[cpu]);
+	}
 	kmem_cache_free(flow_cache, flow);
 }