diff mbox

[net-next-2.6] flow: better memory management

Message ID 1284138025.24675.100.camel@edumazet-laptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Sept. 10, 2010, 5 p.m. UTC
Allocate hash tables for every online cpus, not every possible ones.

NUMA aware allocations.

Dont use a full page on arches where PAGE_SIZE > 1024*sizeof(void *)

misc:
  __percpu , __read_mostly, __cpuinit annotations
  flow_compare_t is just an "unsigned long"

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/flow.c |   78 ++++++++++++++++++++++++----------------------
 1 files changed, 42 insertions(+), 36 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Andi Kleen Sept. 12, 2010, 10:28 p.m. UTC | #1
Eric Dumazet <eric.dumazet@gmail.com> writes:

> Allocate hash tables for every online cpus, not every possible ones.

There are some setups that boot most of the CPUs after boot.
On those this heuristic would be very wrong.

-Andi
Eric Dumazet Sept. 13, 2010, 5:51 a.m. UTC | #2
Le lundi 13 septembre 2010 à 00:28 +0200, Andi Kleen a écrit :
> Eric Dumazet <eric.dumazet@gmail.com> writes:
> 
> > Allocate hash tables for every online cpus, not every possible ones.
> 
> There are some setups that boot most of the CPUs after boot.
> On those this heuristic would be very wrong.
> 

Why ?

I dont get your argument Andi.

I coded following obvious thing :

At boot : Allocate tables for online cpus

When bringing up a cpu online : allocate table for this "new" cpu.

What could be wrong with this ?

On my machine, this works well and save 16 "tables", because I have 16
online cpus, while they are 32 possible cpus (Its a lie, since I have
two quad core cpus, and a total of 16 threads)





--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andi Kleen Sept. 13, 2010, 8:43 a.m. UTC | #3
> 
> At boot : Allocate tables for online cpus
> 
> When bringing up a cpu online : allocate table for this "new" cpu.

You're right, I misread the patch. I thought that part was missing.

> 
> What could be wrong with this ?

Nothing, that's fine.

-Andi
Eric Dumazet Sept. 13, 2010, 9:52 a.m. UTC | #4
Le lundi 13 septembre 2010 à 10:43 +0200, Andi Kleen a écrit :
> > 
> > At boot : Allocate tables for online cpus
> > 
> > When bringing up a cpu online : allocate table for this "new" cpu.
> 
> You're right, I misread the patch. I thought that part was missing.
> 

Ah, OK, thanks for clarification !



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Sept. 14, 2010, 3:03 a.m. UTC | #5
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 10 Sep 2010 19:00:25 +0200

> Allocate hash tables for every online cpus, not every possible ones.
> 
> NUMA aware allocations.
> 
> Dont use a full page on arches where PAGE_SIZE > 1024*sizeof(void *)
> 
> misc:
>   __percpu , __read_mostly, __cpuinit annotations
>   flow_compare_t is just an "unsigned long"
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied, thanks Eric.

> +			return notifier_from_errno(res);	

Trailing whitespace, tsk tsk :-)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/flow.c b/net/core/flow.c
index f67dcbf..a092b9c 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -53,8 +53,7 @@  struct flow_flush_info {
 
 struct flow_cache {
 	u32				hash_shift;
-	unsigned long			order;
-	struct flow_cache_percpu	*percpu;
+	struct flow_cache_percpu __percpu *percpu;
 	struct notifier_block		hotcpu_notifier;
 	int				low_watermark;
 	int				high_watermark;
@@ -64,7 +63,7 @@  struct flow_cache {
 atomic_t flow_cache_genid = ATOMIC_INIT(0);
 EXPORT_SYMBOL(flow_cache_genid);
 static struct flow_cache flow_cache_global;
-static struct kmem_cache *flow_cachep;
+static struct kmem_cache *flow_cachep __read_mostly;
 
 static DEFINE_SPINLOCK(flow_cache_gc_lock);
 static LIST_HEAD(flow_cache_gc_list);
@@ -181,11 +180,7 @@  static u32 flow_hash_code(struct flow_cache *fc,
 		& (flow_cache_hash_size(fc) - 1));
 }
 
-#if (BITS_PER_LONG == 64)
-typedef u64 flow_compare_t;
-#else
-typedef u32 flow_compare_t;
-#endif
+typedef unsigned long flow_compare_t;
 
 /* I hear what you're saying, use memcmp.  But memcmp cannot make
  * important assumptions that we can here, such as alignment and
@@ -357,62 +352,73 @@  void flow_cache_flush(void)
 	put_online_cpus();
 }
 
-static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
-					  struct flow_cache_percpu *fcp)
+static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
 {
-	fcp->hash_table = (struct hlist_head *)
-		__get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
-	if (!fcp->hash_table)
-		panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+	size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
 
-	fcp->hash_rnd_recalc = 1;
-	fcp->hash_count = 0;
-	tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+	if (!fcp->hash_table) {
+		fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
+		if (!fcp->hash_table) {
+			pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
+			return -ENOMEM;
+		}
+		fcp->hash_rnd_recalc = 1;
+		fcp->hash_count = 0;
+		tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+	}
+	return 0;
 }
 
-static int flow_cache_cpu(struct notifier_block *nfb,
+static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
 			  unsigned long action,
 			  void *hcpu)
 {
 	struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
-	int cpu = (unsigned long) hcpu;
+	int res, cpu = (unsigned long) hcpu;
 	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
 
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
+	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		res = flow_cache_cpu_prepare(fc, cpu);
+		if (res)
+			return notifier_from_errno(res);	
+		break;
+	case CPU_DEAD:
+	case CPU_DEAD_FROZEN:
 		__flow_cache_shrink(fc, fcp, 0);
+		break;
+	}
 	return NOTIFY_OK;
 }
 
-static int flow_cache_init(struct flow_cache *fc)
+static int __init flow_cache_init(struct flow_cache *fc)
 {
-	unsigned long order;
 	int i;
 
 	fc->hash_shift = 10;
 	fc->low_watermark = 2 * flow_cache_hash_size(fc);
 	fc->high_watermark = 4 * flow_cache_hash_size(fc);
 
-	for (order = 0;
-	     (PAGE_SIZE << order) <
-		     (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
-	     order++)
-		/* NOTHING */;
-	fc->order = order;
 	fc->percpu = alloc_percpu(struct flow_cache_percpu);
+	if (!fc->percpu)
+		return -ENOMEM;
 
-	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
-		    (unsigned long) fc);
-	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&fc->rnd_timer);
-
-	for_each_possible_cpu(i)
-		flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
-
+	for_each_online_cpu(i) {
+		if (flow_cache_cpu_prepare(fc, i))
+			return -ENOMEM;
+	}
 	fc->hotcpu_notifier = (struct notifier_block){
 		.notifier_call = flow_cache_cpu,
 	};
 	register_hotcpu_notifier(&fc->hotcpu_notifier);
 
+	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+		    (unsigned long) fc);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
+
 	return 0;
 }