diff mbox series

[RFC,net-next,08/20] net/ipv6: Defer initialization of dst to data path

Message ID 20180225194730.30063-9-dsahern@gmail.com
State RFC, archived
Delegated to: David Miller
Headers show
Series net/ipv6: Separate data structures for FIB and data path | expand

Commit Message

David Ahern Feb. 25, 2018, 7:47 p.m. UTC
Defer setting dst input, output and error until fib entry is copied.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/route.c | 115 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 74 insertions(+), 41 deletions(-)

Comments

David Miller Feb. 26, 2018, 7:17 p.m. UTC | #1
From: David Ahern <dsahern@gmail.com>
Date: Sun, 25 Feb 2018 11:47:18 -0800

> +static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
> +{
 ...
> +	rt->dst.error = 0;
> +	rt->dst.output = ip6_output;
 ...
> @@ -930,14 +999,12 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
>  
>  static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
>  {
> -	rt->dst.input = ort->dst.input;
> -	rt->dst.output = ort->dst.output;
> +	ip6_rt_init_dst(rt, ort);
> +
>  	rt->rt6i_dst = ort->rt6i_dst;
> -	rt->dst.error = ort->dst.error;
>  	rt->rt6i_idev = ort->rt6i_idev;
>  	if (rt->rt6i_idev)
>  		in6_dev_hold(rt->rt6i_idev);
> -	rt->dst.lastuse = jiffies;
>  	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
>  	rt->rt6i_flags = ort->rt6i_flags;
>  	rt6_set_from(rt, ort);

This seems to change behavior.

In the old code, the dst error value is propagated from 'ort' into 'rt'.

Here you set it to zero and that's it.

Is it set somewhere else?

I don't think you can assume that all routes that go via this copy
path are not reject routes or other kinds that need the error code
set, if that is what you were thinking.

Thanks.
David Ahern Feb. 26, 2018, 8:20 p.m. UTC | #2
On 2/26/18 12:17 PM, David Miller wrote:
> From: David Ahern <dsahern@gmail.com>
> Date: Sun, 25 Feb 2018 11:47:18 -0800
> 
>> +static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
>> +{
>  ...
>> +	rt->dst.error = 0;
>> +	rt->dst.output = ip6_output;
>  ...
>> @@ -930,14 +999,12 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
>>  
>>  static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
>>  {
>> -	rt->dst.input = ort->dst.input;
>> -	rt->dst.output = ort->dst.output;
>> +	ip6_rt_init_dst(rt, ort);
>> +
>>  	rt->rt6i_dst = ort->rt6i_dst;
>> -	rt->dst.error = ort->dst.error;
>>  	rt->rt6i_idev = ort->rt6i_idev;
>>  	if (rt->rt6i_idev)
>>  		in6_dev_hold(rt->rt6i_idev);
>> -	rt->dst.lastuse = jiffies;
>>  	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
>>  	rt->rt6i_flags = ort->rt6i_flags;
>>  	rt6_set_from(rt, ort);
> 
> This seems to change behavior.
> 
> In the old code, the dst error value is propagated from 'ort' into 'rt'.
> 
> Here you set it to zero and that's it.
> 
> Is it set somewhere else?
> 
> I don't think you can assume that all routes that go via this copy
> path are not reject routes or other kinds that need the error code
> set, if that is what you were thinking.

Only REJECT routes have dst->error set and the only place that happens
is ip6_route_info_create:

$ egrep -r 'dst.error = |dst->error = ' include net
net/core/dst.c:	dst->error = 0;
net/ipv6/route.c:			rt->dst.error = -EINVAL;
net/ipv6/route.c:			rt->dst.error = -EACCES;
net/ipv6/route.c:			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
net/ipv6/route.c:	rt->dst.error = ort->dst.error;


You cut the context, so I will add the diff here:

+static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info
*ort)
+{
+	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+
+	switch (ort->fib6_type) {
+	case RTN_BLACKHOLE:
+		rt->dst.output = dst_discard_out;
+		rt->dst.input = dst_discard;
+		break;
+	case RTN_PROHIBIT:
+		rt->dst.output = ip6_pkt_prohibit_out;
+		rt->dst.input = ip6_pkt_prohibit;
+		break;
+	case RTN_THROW:
+	case RTN_UNREACHABLE:
+	default:
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		break;
+	}
+}
+
+static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
+{
+	if (ort->rt6i_flags & RTF_REJECT) {
+		ip6_rt_init_dst_reject(rt, ort);
+		return;
+	}
+
+	rt->dst.error = 0;
+	rt->dst.output = ip6_output;
+
+...

So for reject routes we have the above helper which is basically a code
move from ip6_route_info_create.

For non-reject routes dst.error is 0 which is the rest of ip6_rt_init_dst.
David Miller Feb. 26, 2018, 8:22 p.m. UTC | #3
From: David Ahern <dsahern@gmail.com>
Date: Mon, 26 Feb 2018 13:20:27 -0700

> +static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
> +{
> +	if (ort->rt6i_flags & RTF_REJECT) {
> +		ip6_rt_init_dst_reject(rt, ort);
> +		return;
> +	}
> +
> +	rt->dst.error = 0;
> +	rt->dst.output = ip6_output;
> +
> +...
> 
> So for reject routes we have the above helper which is basically a code
> move from ip6_route_info_create.
> 
> For non-reject routes dst.error is 0 which is the rest of ip6_rt_init_dst.

My bad, thanks for explaining things to me.  The flag bit test above
completely escaped my eyes for some reason. :)
diff mbox series

Patch

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ff809ee930c7..b56f56508970 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -918,6 +918,75 @@  static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
 	return dev;
 }
 
+static const int fib6_prop[RTN_MAX + 1] = {
+	[RTN_UNSPEC]	= 0,
+	[RTN_UNICAST]	= 0,
+	[RTN_LOCAL]	= 0,
+	[RTN_BROADCAST]	= 0,
+	[RTN_ANYCAST]	= 0,
+	[RTN_MULTICAST]	= 0,
+	[RTN_BLACKHOLE]	= -EINVAL,
+	[RTN_UNREACHABLE] = -EHOSTUNREACH,
+	[RTN_PROHIBIT]	= -EACCES,
+	[RTN_THROW]	= -EAGAIN,
+	[RTN_NAT]	= -EINVAL,
+	[RTN_XRESOLVE]	= -EINVAL,
+};
+
+static int ip6_rt_type_to_error(u8 fib6_type)
+{
+	return fib6_prop[fib6_type];
+}
+
+static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
+{
+	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
+
+	switch (ort->fib6_type) {
+	case RTN_BLACKHOLE:
+		rt->dst.output = dst_discard_out;
+		rt->dst.input = dst_discard;
+		break;
+	case RTN_PROHIBIT:
+		rt->dst.output = ip6_pkt_prohibit_out;
+		rt->dst.input = ip6_pkt_prohibit;
+		break;
+	case RTN_THROW:
+	case RTN_UNREACHABLE:
+	default:
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		break;
+	}
+}
+
+static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
+{
+	if (ort->rt6i_flags & RTF_REJECT) {
+		ip6_rt_init_dst_reject(rt, ort);
+		return;
+	}
+
+	rt->dst.error = 0;
+	rt->dst.output = ip6_output;
+
+	if (ort->fib6_type == RTN_LOCAL) {
+		rt->dst.flags |= DST_HOST;
+		rt->dst.input = ip6_input;
+	} else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
+		rt->dst.input = ip6_mc_input;
+	} else {
+		rt->dst.input = ip6_forward;
+	}
+
+	if (ort->fib6_nh.nh_lwtstate) {
+		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
+		lwtunnel_set_redirect(&rt->dst);
+	}
+
+	rt->dst.lastuse = jiffies;
+}
+
 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 {
 	BUG_ON(from->from);
@@ -930,14 +999,12 @@  static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 
 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
 {
-	rt->dst.input = ort->dst.input;
-	rt->dst.output = ort->dst.output;
+	ip6_rt_init_dst(rt, ort);
+
 	rt->rt6i_dst = ort->rt6i_dst;
-	rt->dst.error = ort->dst.error;
 	rt->rt6i_idev = ort->rt6i_idev;
 	if (rt->rt6i_idev)
 		in6_dev_hold(rt->rt6i_idev);
-	rt->dst.lastuse = jiffies;
 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
 	rt->rt6i_flags = ort->rt6i_flags;
 	rt6_set_from(rt, ort);
@@ -2210,7 +2277,7 @@  static struct rt6_info *__ip6_route_redirect(struct net *net,
 			continue;
 		if (rt6_check_expired(rt))
 			continue;
-		if (rt->dst.error)
+		if (rt->rt6i_flags & RTF_REJECT)
 			break;
 		if (!(rt->rt6i_flags & RTF_GATEWAY))
 			continue;
@@ -2238,7 +2305,7 @@  static struct rt6_info *__ip6_route_redirect(struct net *net,
 
 	if (!rt)
 		rt = net->ipv6.ip6_null_entry;
-	else if (rt->dst.error) {
+	else if (rt->rt6i_flags & RTF_REJECT) {
 		rt = net->ipv6.ip6_null_entry;
 		goto out;
 	}
@@ -2707,15 +2774,6 @@  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
-	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->dst.input = ip6_mc_input;
-	else if (cfg->fc_flags & RTF_LOCAL)
-		rt->dst.input = ip6_input;
-	else
-		rt->dst.input = ip6_forward;
-
-	rt->dst.output = ip6_output;
-
 	if (cfg->fc_encap) {
 		struct lwtunnel_state *lwtstate;
 
@@ -2725,7 +2783,6 @@  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
-		lwtunnel_set_redirect(&rt->dst);
 	}
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -2765,27 +2822,6 @@  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 			}
 		}
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
-		switch (cfg->fc_type) {
-		case RTN_BLACKHOLE:
-			rt->dst.error = -EINVAL;
-			rt->dst.output = dst_discard_out;
-			rt->dst.input = dst_discard;
-			break;
-		case RTN_PROHIBIT:
-			rt->dst.error = -EACCES;
-			rt->dst.output = ip6_pkt_prohibit_out;
-			rt->dst.input = ip6_pkt_prohibit;
-			break;
-		case RTN_THROW:
-		case RTN_UNREACHABLE:
-		default:
-			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
-					: (cfg->fc_type == RTN_UNREACHABLE)
-					? -EHOSTUNREACH : -ENETUNREACH;
-			rt->dst.output = ip6_pkt_discard_out;
-			rt->dst.input = ip6_pkt_discard;
-			break;
-		}
 		goto install_route;
 	}
 
@@ -3475,12 +3511,9 @@  struct rt6_info *addrconf_dst_alloc(struct net *net,
 		return ERR_PTR(-ENOMEM);
 
 	in6_dev_hold(idev);
-
-	rt->dst.flags |= DST_HOST;
-	rt->dst.input = ip6_input;
-	rt->dst.output = ip6_output;
 	rt->rt6i_idev = idev;
 
+	rt->dst.flags |= DST_HOST;
 	rt->rt6i_protocol = RTPROT_KERNEL;
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast) {