Message ID | 4915295B.4050102@cosmosbay.com |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
On Sat, Nov 08, 2008 at 06:53:31AM +0100, Eric Dumazet wrote: > Alexey Dobriyan a écrit : >> On Sat, Nov 08, 2008 at 05:52:56AM +0300, Alexey Dobriyan wrote: >>> On Sat, Nov 08, 2008 at 04:02:37AM +0300, Alexey Dobriyan wrote: >>>> On Sat, Nov 08, 2008 at 01:22:08AM +0100, Eric Sesterhenn wrote: >>>>> running a bunch of network related stresstests (isic, isicng, >>>>> ...) and trying to read all files in /proc afterwards gave me two >>>>> oopses. I was able to reproduce them on another box with >>>>> a different config. I was able to reproduce this on 2.6.24 too, >>>>> so this is no regression. The icmpsic is version 0.06. The >>>>> minimal testcase to trigger this: >>>>> >>>>> ------------8<---------------- >>>>> #!/bin/bash >>>>> >>>>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000 >>>>> >>>>> find /proc/net/ | xargs cat > /dev/null >>>>> >>>>> cat /proc/net/ip_mr_cache >>>>> cat /proc/net/ip_mr_vif >>>>> ------------8<---------------- >>>>> >>>>> >>>>> root@computer-desktop:~/testing# cat /proc/338/net/ip_mr_cache >>>>> >>>>> [ 1572.702100] BUG: unable to handle kernel NULL pointer dereferenceat 000001c1 >>>>> [ 1572.702588] IP: [<c05942c6>] ipmr_mfc_seq_show+0x26/0xf0 >>>> Reproduced. >>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000 >>> cat /proc/net/snmp # sic >>> cat /proc/net/ip_mr_cache >>> >>> mfc_cache_array is full of small integers >>> >>> [0] = 0x1a8 >>> [1] = 0x1a9 >>> >>> and so on. >> >> OK, this minimally fixes mfc_cache_array corruption. >> >> Someone was scared of 16 integers on stack. :^) > > Good spot Alexey :) > > This should be fixed as well, or multiple threads reading /proc/net/snmp > could get mixed results without proper locking. If they live in different netns, otherwise it was fine, it seems. > ICMPMSG_MIB_MAX being 512, "int" can also be changed to "short", > so that "short out[PERLINE]" only use 32 bytes on stack. > > Frankly, snmp_fold_field() results should be cached in a local array too, > because this function can be expensive if machine has a lot of cpus. > > Is 128 + 32 bytes on stack considered evil ? > Using a lock here sounds overkill, and dynamic allocation overkill too... > > [PATCH] net: fix /proc/net/snmp as memory corruptor > > icmpmsg_put() can happily corrupt kernel memory, using a static > table and forgetting to reset an array index in a loop. > > Remove the static array since its not safe without proper locking. > diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c > index 8f5a403..a631a1f 100644 > --- a/net/ipv4/proc.c > +++ b/net/ipv4/proc.c > @@ -237,43 +237,45 @@ static const struct snmp_mib snmp4_net_list[] = { > SNMP_MIB_SENTINEL > }; > > +static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals, > + unsigned short *type, int count) > +{ > + int j; > + > + if (count) { > + seq_printf(seq, "\nIcmpMsg:"); > + for (j = 0; j < count; ++j) > + seq_printf(seq, " %sType%u", > + type[j] & 0x100 ? "Out" : "In", > + type[j] & 0xff); > + seq_printf(seq, "\nIcmpMsg:"); > + for (j = 0; j < count; ++j) > + seq_printf(seq, " %lu", vals[j]); > + } > +} > + > static void icmpmsg_put(struct seq_file *seq) > { > #define PERLINE 16 > > - int j, i, count; > - static int out[PERLINE]; > + int i, count; > + unsigned short type[PERLINE]; > + unsigned long vals[PERLINE], val; > struct net *net = seq->private; > > count = 0; > for (i = 0; i < ICMPMSG_MIB_MAX; i++) { > - > - if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) > - out[count++] = i; > - if (count < PERLINE) > - continue; > - > - seq_printf(seq, "\nIcmpMsg:"); > - for (j = 0; j < PERLINE; ++j) > - seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", > - i & 0xff); > - seq_printf(seq, "\nIcmpMsg: "); > - for (j = 0; j < PERLINE; ++j) > - seq_printf(seq, " %lu", > - snmp_fold_field((void **) net->mib.icmpmsg_statistics, > - out[j])); > - seq_putc(seq, '\n'); > - } > - if (count) { > - seq_printf(seq, "\nIcmpMsg:"); > - for (j = 0; j < count; ++j) > - seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : > - "In", out[j] & 0xff); > - seq_printf(seq, "\nIcmpMsg:"); > - for (j = 0; j < count; ++j) > - seq_printf(seq, " %lu", snmp_fold_field((void **) > - net->mib.icmpmsg_statistics, out[j])); > + val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); > + if (val) { > + type[count] = i; > + vals[count++] = val; > + } > + if (count == PERLINE) { > + icmpmsg_put_line(seq, vals, type, count); > + count = 0; > + } > } > + icmpmsg_put_line(seq, vals, type, count); > > #undef PERLINE > } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Nov 08, 2008 at 06:53:31AM +0100, Eric Dumazet wrote: > Alexey Dobriyan a écrit : >> On Sat, Nov 08, 2008 at 05:52:56AM +0300, Alexey Dobriyan wrote: >>> On Sat, Nov 08, 2008 at 04:02:37AM +0300, Alexey Dobriyan wrote: >>>> On Sat, Nov 08, 2008 at 01:22:08AM +0100, Eric Sesterhenn wrote: >>>>> running a bunch of network related stresstests (isic, isicng, >>>>> ...) and trying to read all files in /proc afterwards gave me two >>>>> oopses. I was able to reproduce them on another box with >>>>> a different config. I was able to reproduce this on 2.6.24 too, >>>>> so this is no regression. The icmpsic is version 0.06. The >>>>> minimal testcase to trigger this: >>>>> >>>>> ------------8<---------------- >>>>> #!/bin/bash >>>>> >>>>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000 >>>>> >>>>> find /proc/net/ | xargs cat > /dev/null >>>>> >>>>> cat /proc/net/ip_mr_cache >>>>> cat /proc/net/ip_mr_vif >>>>> ------------8<---------------- >>>>> >>>>> >>>>> root@computer-desktop:~/testing# cat /proc/338/net/ip_mr_cache >>>>> >>>>> [ 1572.702100] BUG: unable to handle kernel NULL pointer dereferenceat 000001c1 >>>>> [ 1572.702588] IP: [<c05942c6>] ipmr_mfc_seq_show+0x26/0xf0 >>>> Reproduced. >>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000 >>> cat /proc/net/snmp # sic >>> cat /proc/net/ip_mr_cache >>> >>> mfc_cache_array is full of small integers >>> >>> [0] = 0x1a8 >>> [1] = 0x1a9 >>> >>> and so on. >> >> OK, this minimally fixes mfc_cache_array corruption. >> >> Someone was scared of 16 integers on stack. :^) > > Good spot Alexey :) This patch works too. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
* Alexey Dobriyan (adobriyan@gmail.com) wrote: > On Sat, Nov 08, 2008 at 06:53:31AM +0100, Eric Dumazet wrote: > > Alexey Dobriyan a écrit : > >> On Sat, Nov 08, 2008 at 05:52:56AM +0300, Alexey Dobriyan wrote: > >>> On Sat, Nov 08, 2008 at 04:02:37AM +0300, Alexey Dobriyan wrote: > >>>> On Sat, Nov 08, 2008 at 01:22:08AM +0100, Eric Sesterhenn wrote: > >>>>> running a bunch of network related stresstests (isic, isicng, > >>>>> ...) and trying to read all files in /proc afterwards gave me two > >>>>> oopses. I was able to reproduce them on another box with > >>>>> a different config. I was able to reproduce this on 2.6.24 too, > >>>>> so this is no regression. The icmpsic is version 0.06. The > >>>>> minimal testcase to trigger this: > >>>>> > >>>>> ------------8<---------------- > >>>>> #!/bin/bash > >>>>> > >>>>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000 > >>>>> > >>>>> find /proc/net/ | xargs cat > /dev/null > >>>>> > >>>>> cat /proc/net/ip_mr_cache > >>>>> cat /proc/net/ip_mr_vif > >>>>> ------------8<---------------- > >>>>> > >>>>> > >>>>> root@computer-desktop:~/testing# cat /proc/338/net/ip_mr_cache > >>>>> > >>>>> [ 1572.702100] BUG: unable to handle kernel NULL pointer dereferenceat 000001c1 > >>>>> [ 1572.702588] IP: [<c05942c6>] ipmr_mfc_seq_show+0x26/0xf0 > >>>> Reproduced. > >>> icmpsic -s 127.0.0.1 -d 127.0.0.1 -p 100000 > >>> cat /proc/net/snmp # sic > >>> cat /proc/net/ip_mr_cache > >>> > >>> mfc_cache_array is full of small integers > >>> > >>> [0] = 0x1a8 > >>> [1] = 0x1a9 > >>> > >>> and so on. > >> > >> OK, this minimally fixes mfc_cache_array corruption. > >> > >> Someone was scared of 16 integers on stack. :^) > > > > Good spot Alexey :) > > This patch works too. Wow, that was fast :-) Also verified that the patch fixes the issue. Thanks, Eric -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Oh, that was me! Thank you, Alexey! > This should be fixed as well, or multiple threads reading /proc/net/snmp > could get mixed results without proper locking. I don't believe locking is an issue here. If the values change between the first and second tests, being counters, they are still nonzero. If they are different in different threads, it reflects an actual change in the counter. So I'm not sure what you're talking about here. I don't think they should be on the stack (obviously, or I wouldn't have written it this way). So, FWIW, I like Alexey's fix, which is what the code should've been. For Alexey's patch: Acked-by: David L Stevens <dlstevens@us.ibm.com> +-DLS -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Stevens a écrit : > Oh, that was me! Thank you, Alexey! > >> This should be fixed as well, or multiple threads reading /proc/net/snmp >> could get mixed results without proper locking. > > I don't believe locking is an issue here. If the values > change between the first and second tests, being counters, > they are still nonzero. If they are different in different > threads, it reflects an actual change in the counter. So > I'm not sure what you're talking about here. If you are not sure what I am talking about, then you should probably not use static variables at all. I found this fix quite obvious... > > I don't think they should be on the stack (obviously, or > I wouldn't have written it this way). So, FWIW, I like > Alexey's fix, which is what the code should've been. You apparently missed the fact that with your new code, we can have more than 16 different ICMP counters > 0. thread 1 on CPU 1 ----------------- - fills 16 indexes in static table - print them. good. - fills *next* 16 indexes in static table ... preempted by some IRQ or something.... thread 2 on CPU 2 ------------------ fills 16 indexes in static table, overwriting the values that thread 1 was trying to put. ... thread 1: print the values of thread 2. (it will probably prints a copy of its first line) bang : User application missed some critical information. > > For Alexey's patch: > > Acked-by: David L Stevens <dlstevens@us.ibm.com> -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > If you are not sure what I am talking about, then you should probably > not use static variables at all. I found this fix quite obvious... Actually, I didn't realize "out" was static -- was looking at just the patch, and obviously missing your point. I don't have a problem with 16 ints on the stack (or shorts, as you pointed out)-- I didn't want the data on the stack, which may be 64-bit ints. In your patch, you're collecting all of it on the stack (doubling its size). If there is no interlocking at a higher layer (and I haven't looked at this in a long time...) (ie, exclusive opens), then I agree, it shouldn't be static. Why not just that? (ie, add count=0 as Alexey did and remove the static qualifier from "out") +-DLS -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
David Stevens a écrit : >> If you are not sure what I am talking about, then you should probably >> not use static variables at all. I found this fix quite obvious... > > Actually, I didn't realize "out" was static -- was looking at just > the patch, and obviously missing your point. > I don't have a problem with 16 ints on the stack (or shorts, as > you pointed out)-- I didn't want the data on the stack, which may be > 64-bit ints. In your patch, you're collecting all of it on the stack > (doubling its size). > If there is no interlocking at a higher layer (and I haven't > looked > at this in a long time...) (ie, exclusive opens), then I agree, it > shouldn't be static. > > Why not just that? (ie, add count=0 as Alexey did and remove the > static qualifier from "out") Well, this patch also saves 143+64 bytes because of the cleanup. before : # size net/ipv4/proc.o text data bss dec hex filename 5191 16 64 5271 1497 net/ipv4/proc.o After : # size net/ipv4/proc.o text data bss dec hex filename 5048 16 0 5064 13c8 net/ipv4/proc.o 1) bug correction from Alexey 2) bug correction about using a static array without exclusion 3) minor cleanup to save 143 bytes of text and 64 bytes of data 4) save thousand of cpu cycles if many possible cpus. 5) Actually works, even if using 128 bytes on stack, in a function that only calls seq_printf() (we are not in the network stack) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Eric Dumazet <dada1@cosmosbay.com> Date: Sat, 08 Nov 2008 06:53:31 +0100 > [PATCH] net: fix /proc/net/snmp as memory corruptor > > icmpmsg_put() can happily corrupt kernel memory, using a static > table and forgetting to reset an array index in a loop. > > Remove the static array since its not safe without proper locking. > > Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> > Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> I've applied this version of the fix. Thanks! -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8f5a403..a631a1f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -237,43 +237,45 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_SENTINEL }; +static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals, + unsigned short *type, int count) +{ + int j; + + if (count) { + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %sType%u", + type[j] & 0x100 ? "Out" : "In", + type[j] & 0xff); + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %lu", vals[j]); + } +} + static void icmpmsg_put(struct seq_file *seq) { #define PERLINE 16 - int j, i, count; - static int out[PERLINE]; + int i, count; + unsigned short type[PERLINE]; + unsigned long vals[PERLINE], val; struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - - if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) - out[count++] = i; - if (count < PERLINE) - continue; - - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", - i & 0xff); - seq_printf(seq, "\nIcmpMsg: "); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, - out[j])); - seq_putc(seq, '\n'); - } - if (count) { - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : - "In", out[j] & 0xff); - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %lu", snmp_fold_field((void **) - net->mib.icmpmsg_statistics, out[j])); + val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); + if (val) { + type[count] = i; + vals[count++] = val; + } + if (count == PERLINE) { + icmpmsg_put_line(seq, vals, type, count); + count = 0; + } } + icmpmsg_put_line(seq, vals, type, count); #undef PERLINE }