diff mbox

ipv6 hitting route max_size

Message ID 20110607.005645.1883989402770213985.davem@davemloft.net
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

David Miller June 7, 2011, 7:56 a.m. UTC
From: Simon Kirby <sim@hostway.ca>
Date: Mon, 6 Jun 2011 16:15:21 -0700

> Ok, makes sense, but the result is now that ipv4 loads a full Internet
> table with no adjustments, while ipv6 does not. Would it make sense to
> change 4096 to 1048576, or would it be better to count only clones of
> the actual route or something along those lines?

Simon can you give this patch a try?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Simon Kirby June 9, 2011, 4:40 a.m. UTC | #1
On Tue, Jun 07, 2011 at 12:56:45AM -0700, David Miller wrote:

> From: Simon Kirby <sim@hostway.ca>
> Date: Mon, 6 Jun 2011 16:15:21 -0700
> 
> > Ok, makes sense, but the result is now that ipv4 loads a full Internet
> > table with no adjustments, while ipv6 does not. Would it make sense to
> > change 4096 to 1048576, or would it be better to count only clones of
> > the actual route or something along those lines?
> 
> Simon can you give this patch a try?

Didn't apply to 2.6.39, so I tried 3.0-rc2, but I get an Oops when
running the example reproduction case I gave before (

for ((i = 0;i < 4200;i++)); do ip route add unreachable 2000::$i; done

) both with and without your patch applied:

BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0
IP: [<ffffffff8143e2b7>] ip6_route_add+0xe7/0x6b0
PGD 3ed7c8067 PUD 3ed5a1067 PMD 0
Oops: 0002 [#1] SMP
CPU 0
Modules linked in: nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack tg3 e100 libphy

Pid: 8932, comm: ip Not tainted 3.0.0-rc2-amd64-net #1 To Be Filled By O.E.M. To Be Filled By O.E.M./TYAN High-End Dual AMD Opteron, S2882
RIP: 0010:[<ffffffff8143e2b7>]  [<ffffffff8143e2b7>] ip6_route_add+0xe7/0x6b0
RSP: 0018:ffff8803e59939f8  EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffff8803e5993a58 RCX: 0000000000000038
RDX: 00000000000000a0 RSI: 0000000000000008 RDI: 00000000000000a0
RBP: ffffffff817b3300 R08: ffffffff816c8980 R09: 0000000000000000
R10: 0000000000000001 R11: dead000000200200 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 00000000fffffff4
FS:  00007f5f11908700(0000) GS:ffff8803ffc00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000000a0 CR3: 00000003edfdd000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process ip (pid: 8932, threadinfo ffff8803e5992000, task ffff8803ed70dfa0)
Stack:
 0000000000000000 0000000000000000 ffff8803eecd2a80 0000000000000000
 0000000000000000 ffff8803eedabe00 ffff8803ee671600 ffffffff813b8a30
 ffff8803fe00ac00 ffff8803e5993b50 0000000000000000 ffffffff8143e89c
Call Trace:
 [<ffffffff813b8a30>] ? rtnetlink_rcv+0x30/0x30
 [<ffffffff8143e89c>] ? inet6_rtm_newroute+0x1c/0x30
 [<ffffffff813cb3b9>] ? netlink_rcv_skb+0x89/0xb0
 [<ffffffff813b8a1f>] ? rtnetlink_rcv+0x1f/0x30
 [<ffffffff813cb013>] ? netlink_unicast+0x283/0x2d0
 [<ffffffff813cb930>] ? netlink_sendmsg+0x230/0x390
 [<ffffffff8139639b>] ? sock_sendmsg+0xab/0xe0
 [<ffffffff810925eb>] ? __alloc_pages_nodemask+0x10b/0x700
 [<ffffffff810a3fc2>] ? __do_fault+0x3e2/0x4c0
 [<ffffffff81395b9e>] ? move_addr_to_kernel+0x2e/0x40
 [<ffffffff813a1fd9>] ? verify_iovec+0x69/0xd0
 [<ffffffff813972e2>] ? __sys_sendmsg+0x172/0x300
 [<ffffffff81027465>] ? do_page_fault+0x1a5/0x430
 [<ffffffff813cb6be>] ? netlink_autobind+0x8e/0xd0
 [<ffffffff81395bfc>] ? move_addr_to_user+0x4c/0x60
 [<ffffffff81396f55>] ? sys_getsockname+0xd5/0xe0
 [<ffffffff81397634>] ? sys_sendmsg+0x44/0x80
 [<ffffffff814a35bb>] ? system_call_fastpath+0x16/0x1b
Code: 31 c9 31 d2 45 31 c0 31 f6 41 bf f4 ff ff ff e8 b0 2d f7 ff 48 8d 90 a0 00 00 00 49 89 c4 b9 38 00 00 00 31 c0 4d 85 e4 48 89 d7 <f3> ab 0f 84 06 03 00 00 66 41 c7 44 24 6a ff ff 31 c0 f6 43 16
RIP  [<ffffffff8143e2b7>] ip6_route_add+0xe7/0x6b0
 RSP <ffff8803e59939f8>
CR2: 00000000000000a0
---[ end trace 370907621d87fefc ]---

I don't see many changes to ip6_route_add other than c3968a857a6b6c3.
Checking shortly once I get a git tree on this box, but no ipmi and I'm
remote at the moment.

Btw, maybe rt6_alloc_clone or rt6_alloc_cow needs to clear the DST_NOCOUNT
flag from rt->dst.flags for it to count any of them? Didn't verify.

Simon-
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller June 24, 2011, 9:35 p.m. UTC | #2
From: Simon Kirby <sim@hostway.ca>
Date: Wed, 8 Jun 2011 21:40:42 -0700

> Didn't apply to 2.6.39, so I tried 3.0-rc2, but I get an Oops when
> running the example reproduction case I gave before (

I'll try to debug this.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller June 24, 2011, 9:57 p.m. UTC | #3
From: Simon Kirby <sim@hostway.ca>
Date: Wed, 8 Jun 2011 21:40:42 -0700

> Didn't apply to 2.6.39, so I tried 3.0-rc2, but I get an Oops when
> running the example reproduction case I gave before (
> 
> for ((i = 0;i < 4200;i++)); do ip route add unreachable 2000::$i; done
> 
> ) both with and without your patch applied:

I tried to reproduce this with Linus's current tree but I cannot.

Here is what I did:

-------------------- hex.c --------------------
#include <stdio.h>

int main(void)
{
	int i;

	for (i = 0; i < 0x4200; i++) {
		printf("%04x ", i);
	}
	printf("\n");
	return 0;
}
-------------------- hex.c --------------------
bash$ gcc -o hex hex.c
bash$ for i in $(./hex); do ip route add unreachable 2000::$i; done
bash$

It takes a bit of time to run, but no crash. :-)

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/dst.h b/include/net/dst.h
index 7d15d23..e12ddfb 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -77,6 +77,7 @@  struct dst_entry {
 #define DST_NOPOLICY		0x0004
 #define DST_NOHASH		0x0008
 #define DST_NOCACHE		0x0010
+#define DST_NOCOUNT		0x0020
 	union {
 		struct dst_entry	*next;
 		struct rtable __rcu	*rt_next;
diff --git a/net/core/dst.c b/net/core/dst.c
index 9ccca03..6135f36 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -190,7 +190,8 @@  void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 	dst->lastuse = jiffies;
 	dst->flags = flags;
 	dst->next = NULL;
-	dst_entries_add(ops, 1);
+	if (!(flags & DST_NOCOUNT))
+		dst_entries_add(ops, 1);
 	return dst;
 }
 EXPORT_SYMBOL(dst_alloc);
@@ -243,7 +244,8 @@  again:
 		neigh_release(neigh);
 	}
 
-	dst_entries_add(dst->ops, -1);
+	if (!(dst->flags & DST_NOCOUNT))
+		dst_entries_add(dst->ops, -1);
 
 	if (dst->ops->destroy)
 		dst->ops->destroy(dst);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index de2b1de..7fb44b0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -228,7 +228,8 @@  static struct rt6_info ip6_blk_hole_entry_template = {
 
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
-					     struct net_device *dev)
+					     struct net_device *dev,
+					     int flags)
 {
 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0);
 
@@ -1042,7 +1043,7 @@  struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	if (unlikely(idev == NULL))
 		return NULL;
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
 	if (unlikely(rt == NULL)) {
 		in6_dev_put(idev);
 		goto out;
@@ -1214,7 +1215,7 @@  int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
 
 	if (rt == NULL) {
 		err = -ENOMEM;
@@ -1734,7 +1735,7 @@  static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 {
 	struct net *net = dev_net(ort->rt6i_dev);
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    ort->dst.dev);
+					    ort->dst.dev, 0);
 
 	if (rt) {
 		rt->dst.input = ort->dst.input;
@@ -2013,7 +2014,7 @@  struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 {
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
-					    net->loopback_dev);
+					    net->loopback_dev, 0);
 	struct neighbour *neigh;
 
 	if (rt == NULL) {