diff mbox

net: fix /proc/net/snmp as memory corruptor

Message ID 4915295B.4050102@cosmosbay.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Nov. 8, 2008, 5:53 a.m. UTC
Alexey Dobriyan a écrit :
> On Sat, Nov 08, 2008 at 05:52:56AM +0300, Alexey Dobriyan wrote:
>> On Sat, Nov 08, 2008 at 04:02:37AM +0300, Alexey Dobriyan wrote:
>>> On Sat, Nov 08, 2008 at 01:22:08AM +0100, Eric Sesterhenn wrote:
>>>> running a bunch of network related stresstests (isic, isicng, ...) 
>>>> and trying to read all files in /proc afterwards gave me two
>>>> oopses. I was able to reproduce them on another box with
>>>> a different config. I was able to reproduce this on 2.6.24 too,
>>>> so this is no regression. The icmpsic is version 0.06. 
>>>> The minimal testcase to trigger this:
>>>>
>>>> ------------8<----------------
>>>> #!/bin/bash
>>>>
>>>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
>>>>
>>>> find /proc/net/ | xargs cat > /dev/null
>>>>
>>>> cat /proc/net/ip_mr_cache
>>>> cat /proc/net/ip_mr_vif
>>>> ------------8<----------------
>>>>
>>>>
>>>> root@computer-desktop:~/testing# cat /proc/338/net/ip_mr_cache
>>>>
>>>> [ 1572.702100] BUG: unable to handle kernel NULL pointer dereferenceat 000001c1
>>>> [ 1572.702588] IP: [<c05942c6>] ipmr_mfc_seq_show+0x26/0xf0
>>> Reproduced.
>> 	icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
>> 	cat /proc/net/snmp				# sic
>> 	cat /proc/net/ip_mr_cache
>>
>> mfc_cache_array is full of small integers
>>
>> 	[0] = 0x1a8
>> 	[1] = 0x1a9
>>
>> and so on.
> 
> OK, this minimally fixes mfc_cache_array corruption.
> 
> Someone was scared of 16 integers on stack. :^)

Good spot Alexey :)

This should be fixed as well, or multiple threads reading /proc/net/snmp
could get mixed results without proper locking.

ICMPMSG_MIB_MAX being 512, "int" can also be changed to "short",
so that "short out[PERLINE]" only use 32 bytes on stack. 

Frankly, snmp_fold_field() results should be cached in a local array too,
because this function can be expensive if machine has a lot of cpus.

Is 128 + 32 bytes on stack considered evil ?
Using a lock here sounds overkill, and dynamic allocation overkill too...

[PATCH] net: fix /proc/net/snmp as memory corruptor

icmpmsg_put() can happily corrupt kernel memory, using a static
table and forgetting to reset an array index in a loop.

Remove the static array since its not safe without proper locking.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
 net/ipv4/proc.c |   58 +++++++++++++++++++++++-----------------------
 1 files changed, 30 insertions(+), 28 deletions(-)

Comments

Alexey Dobriyan Nov. 8, 2008, 6:22 a.m. UTC | #1
On Sat, Nov 08, 2008 at 06:53:31AM +0100, Eric Dumazet wrote:
> Alexey Dobriyan a écrit :
>> On Sat, Nov 08, 2008 at 05:52:56AM +0300, Alexey Dobriyan wrote:
>>> On Sat, Nov 08, 2008 at 04:02:37AM +0300, Alexey Dobriyan wrote:
>>>> On Sat, Nov 08, 2008 at 01:22:08AM +0100, Eric Sesterhenn wrote:
>>>>> running a bunch of network related stresstests (isic, isicng, 
>>>>> ...) and trying to read all files in /proc afterwards gave me two
>>>>> oopses. I was able to reproduce them on another box with
>>>>> a different config. I was able to reproduce this on 2.6.24 too,
>>>>> so this is no regression. The icmpsic is version 0.06. The 
>>>>> minimal testcase to trigger this:
>>>>>
>>>>> ------------8<----------------
>>>>> #!/bin/bash
>>>>>
>>>>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
>>>>>
>>>>> find /proc/net/ | xargs cat > /dev/null
>>>>>
>>>>> cat /proc/net/ip_mr_cache
>>>>> cat /proc/net/ip_mr_vif
>>>>> ------------8<----------------
>>>>>
>>>>>
>>>>> root@computer-desktop:~/testing# cat /proc/338/net/ip_mr_cache
>>>>>
>>>>> [ 1572.702100] BUG: unable to handle kernel NULL pointer dereferenceat 000001c1
>>>>> [ 1572.702588] IP: [<c05942c6>] ipmr_mfc_seq_show+0x26/0xf0
>>>> Reproduced.
>>> 	icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
>>> 	cat /proc/net/snmp				# sic
>>> 	cat /proc/net/ip_mr_cache
>>>
>>> mfc_cache_array is full of small integers
>>>
>>> 	[0] = 0x1a8
>>> 	[1] = 0x1a9
>>>
>>> and so on.
>>
>> OK, this minimally fixes mfc_cache_array corruption.
>>
>> Someone was scared of 16 integers on stack. :^)
>
> Good spot Alexey :)
>
> This should be fixed as well, or multiple threads reading /proc/net/snmp
> could get mixed results without proper locking.

If they live in different netns, otherwise it was fine, it seems.

> ICMPMSG_MIB_MAX being 512, "int" can also be changed to "short",
> so that "short out[PERLINE]" only use 32 bytes on stack. 
>
> Frankly, snmp_fold_field() results should be cached in a local array too,
> because this function can be expensive if machine has a lot of cpus.
>
> Is 128 + 32 bytes on stack considered evil ?
> Using a lock here sounds overkill, and dynamic allocation overkill too...
>
> [PATCH] net: fix /proc/net/snmp as memory corruptor
>
> icmpmsg_put() can happily corrupt kernel memory, using a static
> table and forgetting to reset an array index in a loop.
>
> Remove the static array since its not safe without proper locking.

> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index 8f5a403..a631a1f 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -237,43 +237,45 @@ static const struct snmp_mib snmp4_net_list[] = {
>  	SNMP_MIB_SENTINEL
>  };
>  
> +static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
> +			     unsigned short *type, int count)
> +{
> +	int j;
> +
> +	if (count) {
> +		seq_printf(seq, "\nIcmpMsg:");
> +		for (j = 0; j < count; ++j)
> +			seq_printf(seq, " %sType%u",
> +				type[j] & 0x100 ? "Out" : "In",
> +				type[j] & 0xff);
> +		seq_printf(seq, "\nIcmpMsg:");
> +		for (j = 0; j < count; ++j)
> +			seq_printf(seq, " %lu", vals[j]);
> +	}
> +}
> +
>  static void icmpmsg_put(struct seq_file *seq)
>  {
>  #define PERLINE	16
>  
> -	int j, i, count;
> -	static int out[PERLINE];
> +	int i, count;
> +	unsigned short type[PERLINE];
> +	unsigned long vals[PERLINE], val;
>  	struct net *net = seq->private;
>  
>  	count = 0;
>  	for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
> -
> -		if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i))
> -			out[count++] = i;
> -		if (count < PERLINE)
> -			continue;
> -
> -		seq_printf(seq, "\nIcmpMsg:");
> -		for (j = 0; j < PERLINE; ++j)
> -			seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In",
> -					i & 0xff);
> -		seq_printf(seq, "\nIcmpMsg: ");
> -		for (j = 0; j < PERLINE; ++j)
> -			seq_printf(seq, " %lu",
> -				snmp_fold_field((void **) net->mib.icmpmsg_statistics,
> -				out[j]));
> -		seq_putc(seq, '\n');
> -	}
> -	if (count) {
> -		seq_printf(seq, "\nIcmpMsg:");
> -		for (j = 0; j < count; ++j)
> -			seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" :
> -				"In", out[j] & 0xff);
> -		seq_printf(seq, "\nIcmpMsg:");
> -		for (j = 0; j < count; ++j)
> -			seq_printf(seq, " %lu", snmp_fold_field((void **)
> -				net->mib.icmpmsg_statistics, out[j]));
> +		val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i);
> +		if (val) {
> +			type[count] = i;
> +			vals[count++] = val;
> +		}
> +		if (count == PERLINE) {
> +			icmpmsg_put_line(seq, vals, type, count);
> +			count = 0;
> +		}
>  	}
> +	icmpmsg_put_line(seq, vals, type, count);
>  
>  #undef PERLINE
>  }

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Dobriyan Nov. 8, 2008, 6:42 a.m. UTC | #2
On Sat, Nov 08, 2008 at 06:53:31AM +0100, Eric Dumazet wrote:
> Alexey Dobriyan a écrit :
>> On Sat, Nov 08, 2008 at 05:52:56AM +0300, Alexey Dobriyan wrote:
>>> On Sat, Nov 08, 2008 at 04:02:37AM +0300, Alexey Dobriyan wrote:
>>>> On Sat, Nov 08, 2008 at 01:22:08AM +0100, Eric Sesterhenn wrote:
>>>>> running a bunch of network related stresstests (isic, isicng, 
>>>>> ...) and trying to read all files in /proc afterwards gave me two
>>>>> oopses. I was able to reproduce them on another box with
>>>>> a different config. I was able to reproduce this on 2.6.24 too,
>>>>> so this is no regression. The icmpsic is version 0.06. The 
>>>>> minimal testcase to trigger this:
>>>>>
>>>>> ------------8<----------------
>>>>> #!/bin/bash
>>>>>
>>>>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
>>>>>
>>>>> find /proc/net/ | xargs cat > /dev/null
>>>>>
>>>>> cat /proc/net/ip_mr_cache
>>>>> cat /proc/net/ip_mr_vif
>>>>> ------------8<----------------
>>>>>
>>>>>
>>>>> root@computer-desktop:~/testing# cat /proc/338/net/ip_mr_cache
>>>>>
>>>>> [ 1572.702100] BUG: unable to handle kernel NULL pointer dereferenceat 000001c1
>>>>> [ 1572.702588] IP: [<c05942c6>] ipmr_mfc_seq_show+0x26/0xf0
>>>> Reproduced.
>>> 	icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
>>> 	cat /proc/net/snmp				# sic
>>> 	cat /proc/net/ip_mr_cache
>>>
>>> mfc_cache_array is full of small integers
>>>
>>> 	[0] = 0x1a8
>>> 	[1] = 0x1a9
>>>
>>> and so on.
>>
>> OK, this minimally fixes mfc_cache_array corruption.
>>
>> Someone was scared of 16 integers on stack. :^)
>
> Good spot Alexey :)

This patch works too.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sesterhenn Nov. 8, 2008, 9:48 a.m. UTC | #3
* Alexey Dobriyan (adobriyan@gmail.com) wrote:
> On Sat, Nov 08, 2008 at 06:53:31AM +0100, Eric Dumazet wrote:
> > Alexey Dobriyan a écrit :
> >> On Sat, Nov 08, 2008 at 05:52:56AM +0300, Alexey Dobriyan wrote:
> >>> On Sat, Nov 08, 2008 at 04:02:37AM +0300, Alexey Dobriyan wrote:
> >>>> On Sat, Nov 08, 2008 at 01:22:08AM +0100, Eric Sesterhenn wrote:
> >>>>> running a bunch of network related stresstests (isic, isicng, 
> >>>>> ...) and trying to read all files in /proc afterwards gave me two
> >>>>> oopses. I was able to reproduce them on another box with
> >>>>> a different config. I was able to reproduce this on 2.6.24 too,
> >>>>> so this is no regression. The icmpsic is version 0.06. The 
> >>>>> minimal testcase to trigger this:
> >>>>>
> >>>>> ------------8<----------------
> >>>>> #!/bin/bash
> >>>>>
> >>>>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
> >>>>>
> >>>>> find /proc/net/ | xargs cat > /dev/null
> >>>>>
> >>>>> cat /proc/net/ip_mr_cache
> >>>>> cat /proc/net/ip_mr_vif
> >>>>> ------------8<----------------
> >>>>>
> >>>>>
> >>>>> root@computer-desktop:~/testing# cat /proc/338/net/ip_mr_cache
> >>>>>
> >>>>> [ 1572.702100] BUG: unable to handle kernel NULL pointer dereferenceat 000001c1
> >>>>> [ 1572.702588] IP: [<c05942c6>] ipmr_mfc_seq_show+0x26/0xf0
> >>>> Reproduced.
> >>> 	icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000
> >>> 	cat /proc/net/snmp				# sic
> >>> 	cat /proc/net/ip_mr_cache
> >>>
> >>> mfc_cache_array is full of small integers
> >>>
> >>> 	[0] = 0x1a8
> >>> 	[1] = 0x1a9
> >>>
> >>> and so on.
> >>
> >> OK, this minimally fixes mfc_cache_array corruption.
> >>
> >> Someone was scared of 16 integers on stack. :^)
> >
> > Good spot Alexey :)
> 
> This patch works too.

Wow, that was fast :-) Also verified that the patch fixes the issue.

Thanks, Eric
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Stevens Nov. 8, 2008, 7:53 p.m. UTC | #4
Oh, that was me! Thank you, Alexey!

> This should be fixed as well, or multiple threads reading /proc/net/snmp
> could get mixed results without proper locking.

I don't believe locking is an issue here. If the values
change between the first and second tests, being counters,
they are still nonzero. If they are different in different
threads, it reflects an actual change in the counter. So
I'm not sure what you're talking about here.

I don't think they should be on the stack (obviously, or
I wouldn't have written it this way). So, FWIW, I like
Alexey's fix, which is what the code should've been.

For Alexey's patch:

Acked-by: David L Stevens <dlstevens@us.ibm.com>

                                                +-DLS

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Nov. 8, 2008, 8:46 p.m. UTC | #5
David Stevens a écrit :
> Oh, that was me! Thank you, Alexey!
> 
>> This should be fixed as well, or multiple threads reading /proc/net/snmp
>> could get mixed results without proper locking.
> 
> I don't believe locking is an issue here. If the values
> change between the first and second tests, being counters,
> they are still nonzero. If they are different in different
> threads, it reflects an actual change in the counter. So
> I'm not sure what you're talking about here.

If you are not sure what I am talking about, then you should probably
not use static variables at all. I found this fix quite obvious...

> 
> I don't think they should be on the stack (obviously, or
> I wouldn't have written it this way). So, FWIW, I like
> Alexey's fix, which is what the code should've been.

You apparently missed the fact that with your new code, we can have more than
16 different ICMP counters > 0.

thread 1 on CPU 1
-----------------

- fills 16 indexes in static table
- print them. good.

- fills *next* 16 indexes in static table
... preempted by some IRQ or something....

thread 2 on CPU 2
------------------

fills 16 indexes in static table, overwriting
the values that thread 1 was trying to put.
...

thread 1:

print the values of thread 2.
(it will probably prints a copy of its first line)

bang : User application missed some critical information.


> 
> For Alexey's patch:
> 
> Acked-by: David L Stevens <dlstevens@us.ibm.com>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Stevens Nov. 8, 2008, 9:05 p.m. UTC | #6
> 
> If you are not sure what I am talking about, then you should probably
> not use static variables at all. I found this fix quite obvious...

        Actually, I didn't realize "out" was static -- was looking at just
the patch, and obviously missing your point.
        I don't have a problem with 16 ints on the stack (or shorts, as
you pointed out)-- I didn't want the data on the stack, which may be
64-bit ints. In your patch, you're collecting all of it on the stack
(doubling its size).
        If there is no interlocking at a higher layer (and I haven't 
looked
at this in a long time...) (ie, exclusive opens), then I agree, it
shouldn't be static.

        Why not just that? (ie, add count=0 as Alexey did and remove the
static qualifier from "out")

                                +-DLS

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Nov. 9, 2008, 8:25 a.m. UTC | #7
David Stevens a écrit :
>> If you are not sure what I am talking about, then you should probably
>> not use static variables at all. I found this fix quite obvious...
> 
>         Actually, I didn't realize "out" was static -- was looking at just
> the patch, and obviously missing your point.
>         I don't have a problem with 16 ints on the stack (or shorts, as
> you pointed out)-- I didn't want the data on the stack, which may be
> 64-bit ints. In your patch, you're collecting all of it on the stack
> (doubling its size).
>         If there is no interlocking at a higher layer (and I haven't 
> looked
> at this in a long time...) (ie, exclusive opens), then I agree, it
> shouldn't be static.
> 
>         Why not just that? (ie, add count=0 as Alexey did and remove the
> static qualifier from "out")

Well, this patch also saves 143+64 bytes because of the cleanup.

before :
# size net/ipv4/proc.o
   text    data     bss     dec     hex filename
   5191      16      64    5271    1497 net/ipv4/proc.o

After :
# size net/ipv4/proc.o
   text    data     bss     dec     hex filename
   5048      16       0    5064    13c8 net/ipv4/proc.o

1) bug correction from Alexey
2) bug correction about using a static array without exclusion
3) minor cleanup to save 143 bytes of text and 64 bytes of data
4) save thousand of cpu cycles if many possible cpus.
5) Actually works, even if using 128 bytes on stack, in a function
   that only calls seq_printf() (we are not in the network stack)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Nov. 11, 2008, 5:43 a.m. UTC | #8
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sat, 08 Nov 2008 06:53:31 +0100

> [PATCH] net: fix /proc/net/snmp as memory corruptor
> 
> icmpmsg_put() can happily corrupt kernel memory, using a static
> table and forgetting to reset an array index in a loop.
> 
> Remove the static array since its not safe without proper locking.
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

I've applied this version of the fix.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8f5a403..a631a1f 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -237,43 +237,45 @@  static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
+static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
+			     unsigned short *type, int count)
+{
+	int j;
+
+	if (count) {
+		seq_printf(seq, "\nIcmpMsg:");
+		for (j = 0; j < count; ++j)
+			seq_printf(seq, " %sType%u",
+				type[j] & 0x100 ? "Out" : "In",
+				type[j] & 0xff);
+		seq_printf(seq, "\nIcmpMsg:");
+		for (j = 0; j < count; ++j)
+			seq_printf(seq, " %lu", vals[j]);
+	}
+}
+
 static void icmpmsg_put(struct seq_file *seq)
 {
 #define PERLINE	16
 
-	int j, i, count;
-	static int out[PERLINE];
+	int i, count;
+	unsigned short type[PERLINE];
+	unsigned long vals[PERLINE], val;
 	struct net *net = seq->private;
 
 	count = 0;
 	for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
-
-		if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i))
-			out[count++] = i;
-		if (count < PERLINE)
-			continue;
-
-		seq_printf(seq, "\nIcmpMsg:");
-		for (j = 0; j < PERLINE; ++j)
-			seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In",
-					i & 0xff);
-		seq_printf(seq, "\nIcmpMsg: ");
-		for (j = 0; j < PERLINE; ++j)
-			seq_printf(seq, " %lu",
-				snmp_fold_field((void **) net->mib.icmpmsg_statistics,
-				out[j]));
-		seq_putc(seq, '\n');
-	}
-	if (count) {
-		seq_printf(seq, "\nIcmpMsg:");
-		for (j = 0; j < count; ++j)
-			seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" :
-				"In", out[j] & 0xff);
-		seq_printf(seq, "\nIcmpMsg:");
-		for (j = 0; j < count; ++j)
-			seq_printf(seq, " %lu", snmp_fold_field((void **)
-				net->mib.icmpmsg_statistics, out[j]));
+		val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i);
+		if (val) {
+			type[count] = i;
+			vals[count++] = val;
+		}
+		if (count == PERLINE) {
+			icmpmsg_put_line(seq, vals, type, count);
+			count = 0;
+		}
 	}
+	icmpmsg_put_line(seq, vals, type, count);
 
 #undef PERLINE
 }