diff mbox

[v2,net-next-2.6] netfilter: ip_tables: dont block BH while reading counters

Message ID 1292518436.2883.393.camel@edumazet-laptop
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Dec. 16, 2010, 4:53 p.m. UTC
Le jeudi 16 décembre 2010 à 17:07 +0100, Eric Dumazet a écrit :

> Here is a tested version : no need for a (buggy in previous patch)
> memset() if we use vzalloc()
> 
> Note : We miss a this_cpu_write_seqcount_begin() interface.
> I'll bug lkml to get it asap.

Well, we have a faster solution :

Add seqcount in "struct xt_info_lock"
so that we make the increment pair once per table, not once per rule,
and we already have the seq address, so no need for
this_cpu_write_seqcount_begin() interface.


[PATCH v2 net-next-2.6] netfilter: ip_tables: dont block BH while reading counters

Using "iptables -L" with a lot of rules might have a too big BH latency.
Jesper mentioned ~6 ms and worried of frame drops.

Switch to a per_cpu seqcount scheme, so that taking a snapshot of
counters doesnt need to block BH (for this cpu, but also other cpus).

Reported-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/netfilter/x_tables.h |    9 ++++-
 net/ipv4/netfilter/ip_tables.c     |   45 ++++++++-------------------
 2 files changed, 21 insertions(+), 33 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

stephen hemminger Dec. 16, 2010, 5:31 p.m. UTC | #1
On Thu, 16 Dec 2010 17:53:56 +0100
Eric Dumazet <eric.dumazet@gmail.com> wrote:

>  	spinlock_t lock;
> +	seqcount_t seq;

Since lock and seqcount_t are associated together, why isn't this a seqlock instead?
diff mbox

Patch

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 742bec0..7027762 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -473,6 +473,7 @@  extern void xt_free_table_info(struct xt_table_info *info);
  */
 struct xt_info_lock {
 	spinlock_t lock;
+	seqcount_t seq;
 	unsigned char readers;
 };
 DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
@@ -496,16 +497,20 @@  static inline void xt_info_rdlock_bh(void)
 
 	local_bh_disable();
 	lock = &__get_cpu_var(xt_info_locks);
-	if (likely(!lock->readers++))
+	if (likely(!lock->readers++)) {
 		spin_lock(&lock->lock);
+		write_seqcount_begin(&lock->seq);
+	}
 }
 
 static inline void xt_info_rdunlock_bh(void)
 {
 	struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
 
-	if (likely(!--lock->readers))
+	if (likely(!--lock->readers)) {
+		write_seqcount_end(&lock->seq);
 		spin_unlock(&lock->lock);
+	}
 	local_bh_enable();
 }
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a846d63..7fe3d7c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -884,42 +884,25 @@  get_counters(const struct xt_table_info *t,
 	struct ipt_entry *iter;
 	unsigned int cpu;
 	unsigned int i;
-	unsigned int curcpu = get_cpu();
-
-	/* Instead of clearing (by a previous call to memset())
-	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU.
-	 *
-	 * Bottom half has to be disabled to prevent deadlock
-	 * if new softirq were to run and call ipt_do_table
-	 */
-	local_bh_disable();
-	i = 0;
-	xt_entry_foreach(iter, t->entries[curcpu], t->size) {
-		SET_COUNTER(counters[i], iter->counters.bcnt,
-			    iter->counters.pcnt);
-		++i;
-	}
-	local_bh_enable();
-	/* Processing counters from other cpus, we can let bottom half enabled,
-	 * (preemption is disabled)
-	 */
 
 	for_each_possible_cpu(cpu) {
-		if (cpu == curcpu)
-			continue;
+		seqcount_t *seq = &per_cpu(xt_info_locks, cpu).seq;
+
 		i = 0;
-		local_bh_disable();
-		xt_info_wrlock(cpu);
 		xt_entry_foreach(iter, t->entries[cpu], t->size) {
-			ADD_COUNTER(counters[i], iter->counters.bcnt,
-				    iter->counters.pcnt);
+			u64 bcnt, pcnt;
+			unsigned int start;
+
+			do {
+				start = read_seqcount_begin(seq);
+				bcnt = iter->counters.bcnt;
+				pcnt = iter->counters.pcnt;
+			} while (read_seqcount_retry(seq, start));
+
+			ADD_COUNTER(counters[i], bcnt, pcnt);
 			++i; /* macro does multi eval of i */
 		}
-		xt_info_wrunlock(cpu);
-		local_bh_enable();
 	}
-	put_cpu();
 }
 
 static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -932,7 +915,7 @@  static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc(countersize);
+	counters = vzalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1203,7 +1186,7 @@  __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ipt_entry *iter;
 
 	ret = 0;
-	counters = vmalloc(num_counters * sizeof(struct xt_counters));
+	counters = vzalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;