diff mbox

[4/7] flow: delayed deletion of flow cache entries

Message ID 1269871964-5412-5-git-send-email-timo.teras@iki.fi
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Timo Teras March 29, 2010, 2:12 p.m. UTC
Speed up lookups by freeing flow cache entries later. This is also in
preparation to have virtual entry destructor that might do more
work.

As gc_list is more effective with double linked list, the flow cache
is converted to use common hlist and list macroes where appropriate.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
---
 net/core/flow.c |  112 ++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 79 insertions(+), 33 deletions(-)

Comments

Herbert Xu March 30, 2010, 12:22 p.m. UTC | #1
On Mon, Mar 29, 2010 at 05:12:41PM +0300, Timo Teras wrote:
> Speed up lookups by freeing flow cache entries later. This is also in
> preparation to have virtual entry destructor that might do more
> work.

So how does this speed up lookups exactly?

Cheers,
Timo Teras March 30, 2010, 12:32 p.m. UTC | #2
Herbert Xu wrote:
> On Mon, Mar 29, 2010 at 05:12:41PM +0300, Timo Teras wrote:
>> Speed up lookups by freeing flow cache entries later. This is also in
>> preparation to have virtual entry destructor that might do more
>> work.
> 
> So how does this speed up lookups exactly?

If flow cache regeneration or shrinking is triggered in lookup,
it would previously free it in place. Now that is deferred. But
yes, it's more useful after the next patches that call the
virtual destructor. Should have explained this better.

Like said in the general description, patches 4-7 go together
and still have some problem cases. But should show where I'm
trying to go.

I'd be interested hear if the idea of patches 4-7 is good
or we could things somehow better.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Herbert Xu March 30, 2010, 12:36 p.m. UTC | #3
On Tue, Mar 30, 2010 at 03:32:58PM +0300, Timo Teräs wrote:
> If flow cache regeneration or shrinking is triggered in lookup,
> it would previously free it in place. Now that is deferred. But
> yes, it's more useful after the next patches that call the
> virtual destructor. Should have explained this better.

Any chance you can refactor them so that this comes after the
virtual get/put patch?

That way can evaluate this on its own merit rather than being
a prerequisite for the more important stuff.

Thanks,
Timo Teras March 30, 2010, 12:43 p.m. UTC | #4
Herbert Xu wrote:
> On Tue, Mar 30, 2010 at 03:32:58PM +0300, Timo Teräs wrote:
>> If flow cache regeneration or shrinking is triggered in lookup,
>> it would previously free it in place. Now that is deferred. But
>> yes, it's more useful after the next patches that call the
>> virtual destructor. Should have explained this better.
> 
> Any chance you can refactor them so that this comes after the
> virtual get/put patch?
> 
> That way can evaluate this on its own merit rather than being
> a prerequisite for the more important stuff.

I thought it's not good to have possible speed regressions even
temporarily in the tree, so I figured this should go first.

But sure, I'll refactor this to be a later commit for the next
iteration.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/flow.c b/net/core/flow.c
index 104078d..760f93d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,7 +26,10 @@ 
 #include <linux/security.h>
 
 struct flow_cache_entry {
-	struct flow_cache_entry	*next;
+	union {
+		struct hlist_node	hlist;
+		struct list_head	gc_list;
+	} u;
 	u16			family;
 	u8			dir;
 	u32			genid;
@@ -36,7 +39,7 @@  struct flow_cache_entry {
 };
 
 struct flow_cache_percpu {
-	struct flow_cache_entry **	hash_table;
+	struct hlist_head *		hash_table;
 	int				hash_count;
 	u32				hash_rnd;
 	int				hash_rnd_recalc;
@@ -63,6 +66,9 @@  atomic_t flow_cache_genid = ATOMIC_INIT(0);
 static struct flow_cache flow_cache_global;
 static struct kmem_cache *flow_cachep;
 
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
+
 #define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
 #define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
 
@@ -78,36 +84,62 @@  static void flow_cache_new_hashrnd(unsigned long arg)
 	add_timer(&fc->rnd_timer);
 }
 
-static void flow_entry_kill(struct flow_cache *fc,
-			    struct flow_cache_percpu *fcp,
-			    struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
 {
 	if (fle->object)
 		atomic_dec(fle->object_ref);
 	kmem_cache_free(flow_cachep, fle);
-	fcp->hash_count--;
 }
 
+static void flow_cache_gc_task(struct work_struct *work)
+{
+	struct list_head gc_list;
+	struct flow_cache_entry *fce, *n;
+
+	INIT_LIST_HEAD(&gc_list);
+	spin_lock_bh(&flow_cache_gc_lock);
+	list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+	spin_unlock_bh(&flow_cache_gc_lock);
+
+	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+		flow_entry_kill(fce);
+}
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
+
 static void __flow_cache_shrink(struct flow_cache *fc,
 				struct flow_cache_percpu *fcp,
 				int shrink_to)
 {
-	struct flow_cache_entry *fle, **flp;
-	int i;
+	struct flow_cache_entry *fce;
+	struct hlist_node *entry, *tmp;
+	struct list_head gc_list;
+	int i, deleted = 0;
 
+	INIT_LIST_HEAD(&gc_list);
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		int k = 0;
-
-		flp = &fcp->hash_table[i];
-		while ((fle = *flp) != NULL && k < shrink_to) {
-			k++;
-			flp = &fle->next;
-		}
-		while ((fle = *flp) != NULL) {
-			*flp = fle->next;
-			flow_entry_kill(fc, fcp, fle);
+		int saved = 0;
+
+		hlist_for_each_entry_safe(fce, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
+			if (saved < shrink_to) {
+				saved++;
+			} else {
+				deleted++;
+				hlist_del(&fce->u.hlist);
+				list_add_tail(&fce->u.gc_list, &gc_list);
+			}
 		}
 	}
+
+	if (deleted) {
+		fcp->hash_count -= deleted;
+
+		spin_lock_bh(&flow_cache_gc_lock);
+		list_splice_tail(&gc_list, &flow_cache_gc_list);
+		spin_unlock_bh(&flow_cache_gc_lock);
+
+		schedule_work(&flow_cache_gc_work);
+	}
 }
 
 static void flow_cache_shrink(struct flow_cache *fc,
@@ -171,7 +203,8 @@  void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 {
 	struct flow_cache *fc = &flow_cache_global;
 	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, **head;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry;
 	unsigned int hash;
 
 	local_bh_disable();
@@ -187,8 +220,7 @@  void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 		flow_new_hash_rnd(fc, fcp);
 	hash = flow_hash_code(fc, fcp, key);
 
-	head = &fcp->hash_table[hash];
-	for (fle = *head; fle; fle = fle->next) {
+	hlist_for_each_entry(fle, entry, &fcp->hash_table[hash], u.hlist) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
 		    flow_key_compare(key, &fle->key) == 0) {
@@ -211,12 +243,12 @@  void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 
 		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 		if (fle) {
-			fle->next = *head;
-			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
+
+			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
 			fcp->hash_count++;
 		}
 	}
@@ -253,24 +285,38 @@  static void flow_cache_flush_tasklet(unsigned long data)
 	struct flow_flush_info *info = (void *)data;
 	struct flow_cache *fc = info->cache;
 	struct flow_cache_percpu *fcp;
-	int i;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	struct list_head gc_list;
+	int i, deleted = 0;
+	unsigned genid;
 
+	INIT_LIST_HEAD(&gc_list);
 	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		struct flow_cache_entry *fle;
-
-		fle = fcp->hash_table[i];
-		for (; fle; fle = fle->next) {
-			unsigned genid = atomic_read(&flow_cache_genid);
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
+			genid = atomic_read(&flow_cache_genid);
 
 			if (!fle->object || fle->genid == genid)
 				continue;
 
-			fle->object = NULL;
-			atomic_dec(fle->object_ref);
+			deleted++;
+			hlist_del(&fle->u.hlist);
+			list_add_tail(&fle->u.gc_list, &gc_list);
 		}
 	}
 
+	if (deleted) {
+		fcp->hash_count -= deleted;
+
+		spin_lock_bh(&flow_cache_gc_lock);
+		list_splice_tail(&gc_list, &flow_cache_gc_list);
+		spin_unlock_bh(&flow_cache_gc_lock);
+
+		schedule_work(&flow_cache_gc_work);
+	}
+
 	if (atomic_dec_and_test(&info->cpuleft))
 		complete(&info->completion);
 }
@@ -312,7 +358,7 @@  void flow_cache_flush(void)
 static void __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc,
 					     struct flow_cache_percpu *fcp)
 {
-	fcp->hash_table = (struct flow_cache_entry **)
+	fcp->hash_table = (struct hlist_head *)
 		__get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
 	fcp->hash_rnd_recalc = 1;
 	fcp->hash_count = 0;
@@ -359,7 +405,7 @@  static int flow_cache_init(struct flow_cache *fc)
 
 	for (order = 0;
 	     (PAGE_SIZE << order) <
-		     (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc));
+		     (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
 	     order++)
 		/* NOTHING */;
 	fc->order = order;