Patchwork percpu: convert SNMP mibs to new infra

login
register
mail settings
Submitter Eric Dumazet
Date April 2, 2009, 8:07 a.m.
Message ID <49D4724C.7010200@cosmosbay.com>
Download mbox | patch
Permalink /patch/25520/
State RFC
Delegated to: David Miller
Headers show

Comments

Eric Dumazet - April 2, 2009, 8:07 a.m.
Ingo Molnar a écrit :
> * Tejun Heo <htejun@gmail.com> wrote:
> 
>> Hello, Eric, Ingo.
>>
>> Eric Dumazet wrote:
>>> diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
>>> index aee103b..6b82f6b 100644
>>> --- a/arch/x86/include/asm/percpu.h
>>> +++ b/arch/x86/include/asm/percpu.h
>>> @@ -135,6 +135,9 @@ do {							\
>>>  #define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
>>>  #define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
>>>  #define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
>>> +#define indir_percpu_add(var, val)	percpu_to_op("add", *(var), val)
>>> +#define indir_percpu_inc(var)       percpu_to_op("add", *(var), 1)
>>> +#define indir_percpu_dec(var)       percpu_to_op("add", *(var), -1)
>>>  #define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
>>>  #define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
>>>  #define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
>> The final goal is to unify static and dynamic accesses but we 
>> aren't there yet, so, for the time being, we'll need some interim 
>> solutions. I would prefer percpu_ptr_add() tho.
> 
> Yep, that's the standard naming scheme for new APIs: generic to 
> specific, left to right.
> 

Here is a second version of the patch, with percpu_ptr_xxx convention,
and more polished form (snmp_mib_free() was forgoten in previous RFC)

Thank you all

[PATCH] percpu: convert SNMP mibs to new infra

Some arches can use percpu infrastructure for safe changes to mibs.
(percpu_add() is safe against preemption and interrupts), but
we want the real thing (a single instruction), not an emulation.

On arches still using an emulation, its better to keep the two views
per mib and preemption disable/enable

This shrinks size of mibs by 50%, but also shrinks vmlinux text size
(minimum IPV4 config)

$ size vmlinux.old vmlinux.new
   text    data     bss     dec     hex filename
4308458  561092 1728512 6598062  64adae vmlinux.old
4303834  561092 1728512 6593438  649b9e vmlinux.new



Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
 arch/x86/include/asm/percpu.h |    3 +++
 include/net/snmp.h            |   27 ++++++++++++++++++++++-----
 net/ipv4/af_inet.c            |   31 ++++++++++++++++++-------------
 3 files changed, 43 insertions(+), 18 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tejun Heo - April 3, 2009, 12:39 a.m.
Eric Dumazet wrote:
...
>  #define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
>  #define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
>  #define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
> +#define percpu_ptr_add(var, val)	percpu_to_op("add", *(var), val)
> +#define percpu_ptr_inc(var)       percpu_ptr_add(var, 1)
> +#define percpu_ptr_dec(var)       percpu_ptr_add(var, -1)
>  #define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
>  #define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
>  #define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)

x86 part looks fine to me.

> diff --git a/include/net/snmp.h b/include/net/snmp.h
> index 57c9362..1ba584b 100644
> --- a/include/net/snmp.h
> +++ b/include/net/snmp.h
> @@ -123,15 +123,31 @@ struct linux_xfrm_mib {
>  };
>  
>  /* 
> - * FIXME: On x86 and some other CPUs the split into user and softirq parts
> + * On x86 and some other CPUs the split into user and softirq parts
>   * is not needed because addl $1,memory is atomic against interrupts (but 
> - * atomic_inc would be overkill because of the lock cycles). Wants new 
> - * nonlocked_atomic_inc() primitives -AK
> + * atomic_inc would be overkill because of the lock cycles).
>   */ 
> +#ifdef CONFIG_X86
> +# define SNMP_ARRAY_SZ 1
> +#else
> +# define SNMP_ARRAY_SZ 2
> +#endif

This is quite hacky but, well, for the time being...

Thanks.
Ingo Molnar - April 3, 2009, 5:10 p.m.
* Eric Dumazet <dada1@cosmosbay.com> wrote:

> Ingo Molnar a écrit :
> > * Tejun Heo <htejun@gmail.com> wrote:
> > 
> >> Hello, Eric, Ingo.
> >>
> >> Eric Dumazet wrote:
> >>> diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
> >>> index aee103b..6b82f6b 100644
> >>> --- a/arch/x86/include/asm/percpu.h
> >>> +++ b/arch/x86/include/asm/percpu.h
> >>> @@ -135,6 +135,9 @@ do {							\
> >>>  #define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
> >>>  #define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
> >>>  #define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
> >>> +#define indir_percpu_add(var, val)	percpu_to_op("add", *(var), val)
> >>> +#define indir_percpu_inc(var)       percpu_to_op("add", *(var), 1)
> >>> +#define indir_percpu_dec(var)       percpu_to_op("add", *(var), -1)
> >>>  #define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
> >>>  #define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
> >>>  #define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
> >> The final goal is to unify static and dynamic accesses but we 
> >> aren't there yet, so, for the time being, we'll need some interim 
> >> solutions. I would prefer percpu_ptr_add() tho.
> > 
> > Yep, that's the standard naming scheme for new APIs: generic to 
> > specific, left to right.
> > 
> 
> Here is a second version of the patch, with percpu_ptr_xxx convention,
> and more polished form (snmp_mib_free() was forgoten in previous RFC)
> 
> Thank you all
> 
> [PATCH] percpu: convert SNMP mibs to new infra
> 
> Some arches can use percpu infrastructure for safe changes to mibs.
> (percpu_add() is safe against preemption and interrupts), but
> we want the real thing (a single instruction), not an emulation.
> 
> On arches still using an emulation, its better to keep the two views
> per mib and preemption disable/enable
> 
> This shrinks size of mibs by 50%, but also shrinks vmlinux text size
> (minimum IPV4 config)
> 
> $ size vmlinux.old vmlinux.new
>    text    data     bss     dec     hex filename
> 4308458  561092 1728512 6598062  64adae vmlinux.old
> 4303834  561092 1728512 6593438  649b9e vmlinux.new

Wow, that's pretty impressive!

> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
> ---
>  arch/x86/include/asm/percpu.h |    3 +++

Acked-by: Ingo Molnar <mingo@elte.hu>

As far as x86 goes, feel free to pick it up into any of the 
networking trees, these bits are easily merged and it's probably 
best if the patch stays in a single piece - it looks compact enough 
and if it breaks it's going to break in networking code.

	Ingo
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index aee103b..f8081e4 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -135,6 +135,9 @@  do {							\
 #define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
 #define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
 #define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
+#define percpu_ptr_add(var, val)	percpu_to_op("add", *(var), val)
+#define percpu_ptr_inc(var)       percpu_ptr_add(var, 1)
+#define percpu_ptr_dec(var)       percpu_ptr_add(var, -1)
 #define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
 #define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
 #define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 57c9362..1ba584b 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -123,15 +123,31 @@  struct linux_xfrm_mib {
 };
 
 /* 
- * FIXME: On x86 and some other CPUs the split into user and softirq parts
+ * On x86 and some other CPUs the split into user and softirq parts
  * is not needed because addl $1,memory is atomic against interrupts (but 
- * atomic_inc would be overkill because of the lock cycles). Wants new 
- * nonlocked_atomic_inc() primitives -AK
+ * atomic_inc would be overkill because of the lock cycles).
  */ 
+#ifdef CONFIG_X86
+# define SNMP_ARRAY_SZ 1
+#else
+# define SNMP_ARRAY_SZ 2
+#endif
+
 #define DEFINE_SNMP_STAT(type, name)	\
-	__typeof__(type) *name[2]
+	__typeof__(type) *name[SNMP_ARRAY_SZ]
 #define DECLARE_SNMP_STAT(type, name)	\
-	extern __typeof__(type) *name[2]
+	extern __typeof__(type) *name[SNMP_ARRAY_SZ]
+
+#if SNMP_ARRAY_SZ == 1
+#define SNMP_INC_STATS(mib, field)	percpu_ptr_inc(&mib[0]->mibs[field])
+#define SNMP_INC_STATS_BH(mib, field)	SNMP_INC_STATS(mib, field)
+#define SNMP_INC_STATS_USER(mib, field) SNMP_INC_STATS(mib, field)
+#define SNMP_DEC_STATS(mib, field)	percpu_ptr_dec(&mib[0]->mibs[field])
+#define SNMP_ADD_STATS_BH(mib, field, addend) 	\
+				percpu_ptr_add(&mib[0]->mibs[field], addend)
+#define SNMP_ADD_STATS_USER(mib, field, addend) 	\
+				percpu_ptr_add(&mib[0]->mibs[field], addend)
+#else
 
 #define SNMP_STAT_BHPTR(name)	(name[0])
 #define SNMP_STAT_USRPTR(name)	(name[1])
@@ -160,5 +176,6 @@  struct linux_xfrm_mib {
 		per_cpu_ptr(mib[1], get_cpu())->mibs[field] += addend; \
 		put_cpu(); \
 	} while (0)
+#endif
 
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7f03373..4df3a76 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1366,36 +1366,41 @@  unsigned long snmp_fold_field(void *mib[], int offt)
 
 	for_each_possible_cpu(i) {
 		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+#if SNMP_ARRAY_SZ == 2
 		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+#endif
 	}
 	return res;
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void *ptr[2], size_t mibsize)
+int snmp_mib_init(void *ptr[SNMP_ARRAY_SZ], size_t mibsize)
 {
 	BUG_ON(ptr == NULL);
 	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
 	if (!ptr[0])
-		goto err0;
+		return -ENOMEM;
+#if SNMP_ARRAY_SZ == 2
 	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
-	if (!ptr[1])
-		goto err1;
+	if (!ptr[1]) {
+		free_percpu(ptr[0]);
+		ptr[0] = NULL;
+		return -ENOMEM;
+	}
+#endif
 	return 0;
-err1:
-	free_percpu(ptr[0]);
-	ptr[0] = NULL;
-err0:
-	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(snmp_mib_init);
 
-void snmp_mib_free(void *ptr[2])
+void snmp_mib_free(void *ptr[SNMP_ARRAY_SZ])
 {
+	int i;
+
 	BUG_ON(ptr == NULL);
-	free_percpu(ptr[0]);
-	free_percpu(ptr[1]);
-	ptr[0] = ptr[1] = NULL;
+	for (i = 0 ; i < SNMP_ARRAY_SZ; i++) {
+		free_percpu(ptr[i]);
+		ptr[i] = NULL;
+	}
 }
 EXPORT_SYMBOL_GPL(snmp_mib_free);