Message ID | 1430255273-3045254-5-git-send-email-kafai@fb.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, Apr 28, 2015 at 02:07:51PM -0700, Martin KaFai Lau wrote: > > -static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, > - struct sk_buff *skb, u32 mtu) > +static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, > + const struct ipv6hdr *iph, u32 mtu) > { > struct rt6_info *rt6 = (struct rt6_info *)dst; > + struct net *net; > + > + if (rt6->rt6i_flags & RTF_LOCAL) > + return; > > dst_confirm(dst); > - if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) { > - struct net *net = dev_net(dst->dev); > + mtu = max_t(u32, mtu, IPV6_MIN_MTU); > + if (mtu >= dst_mtu(dst)) > + return; > > - rt6->rt6i_flags |= RTF_MODIFIED; > - if (mtu < IPV6_MIN_MTU) > - mtu = IPV6_MIN_MTU; > + if (!(rt6->rt6i_flags & RTF_CACHE)) { > + const struct in6_addr *daddr, *saddr; > + struct rt6_info *nrt6; > > - rt6->rt6i_pmtu = mtu; > - rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); > + if (iph) { > + daddr = &iph->daddr; > + saddr = &iph->saddr; > + } else if (sk) { > + daddr = &sk->sk_v6_daddr; > + saddr = &inet6_sk(sk)->saddr; > + } else { > + return; > + } > + nrt6 = ip6_pmtu_rt_cache_alloc(rt6, daddr, saddr); > + if (!nrt6) > + return; > + /* ip6_ins_rt(nrt6) will bump the rt6->rt6i_node->fn_sernum > + * which will fail the next rt6_check() and invalidate the > + * sk->sk_dst_cache. > + */ > + if (ip6_ins_rt(nrt6)) { > + dst_destroy(&nrt6->dst); fib6_add() does a dst_free() on error, so calling dst_destroy() here might result in a use after free. > + return; > + } > + > + rt6 = nrt6; > + dst = &nrt6->dst; > } > + > + net = dev_net(dst->dev); > + rt6->rt6i_flags |= RTF_MODIFIED; > + rt6->rt6i_pmtu = mtu; > + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); The update of expires and the setting of rt6i_pmtu should happen before the route is inserted with ip6_ins_rt(). This is because fib6_add_rt2node() tries to reuse old expired routes if still in the fib tree, the necessary informations are copied from the new route before it returnes -EEXIST on the new route. If your new route has no expires value set, fib6_add_rt2node() cleans expires of the old route before it resues it. Also rt6i_pmtu should be copied to the reused route in fib6_add_rt2node(), this should be done already in your first patchset. Otherwise we might use stale pmtu informations. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 29, 2015 at 01:39:18PM +0200, Steffen Klassert wrote: > On Tue, Apr 28, 2015 at 02:07:51PM -0700, Martin KaFai Lau wrote: > > + if (ip6_ins_rt(nrt6)) { > > + dst_destroy(&nrt6->dst); > > fib6_add() does a dst_free() on error, so calling dst_destroy() > here might result in a use after free. Good catch. > > > > + return; > > + } > > + > > + rt6 = nrt6; > > + dst = &nrt6->dst; > > } > > + > > + net = dev_net(dst->dev); > > + rt6->rt6i_flags |= RTF_MODIFIED; > > + rt6->rt6i_pmtu = mtu; > > + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); > > The update of expires and the setting of rt6i_pmtu should > happen before the route is inserted with ip6_ins_rt(). > > This is because fib6_add_rt2node() tries to reuse old > expired routes if still in the fib tree, the necessary > informations are copied from the new route before it > returnes -EEXIST on the new route. If your new route > has no expires value set, fib6_add_rt2node() cleans > expires of the old route before it resues it. > > Also rt6i_pmtu should be copied to the reused route in > fib6_add_rt2node(), this should be done already in your > first patchset. Otherwise we might use stale pmtu informations. Good catch. A similar race may also happen in the current ip6_pol_route() where it may clear the RTF_EXPIRES of the existing pmtu clone. Hence, copying rt6i_pmtu (at fib6_add_rt2node()) in the last patchset will not be right. I will do the copying and early-set-expire in this patchset instead. Thanks, ---Martin -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8bc83bc..09ab0f4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -873,16 +873,13 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *nrt; + struct rt6_info *rt; int strict = 0; - int attempts = 3; - int err; strict |= flags & RT6_LOOKUP_F_IFACE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; -redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); @@ -901,46 +898,12 @@ redo_rt6_select: strict &= ~RT6_LOOKUP_F_REACHABLE; fn = saved_fn; goto redo_rt6_select; - } else { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - goto out2; } } dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (rt->rt6i_flags & RTF_CACHE) - goto out2; - - if (!rt6_is_gw_or_nonexthop(rt) || - !(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL)) - nrt = ip6_pmtu_rt_cache_alloc(rt, &fl6->daddr, &fl6->saddr); - else - goto out2; - - ip6_rt_put(rt); - rt = nrt ? : net->ipv6.ip6_null_entry; - - dst_hold(&rt->dst); - if (nrt) { - err = ip6_ins_rt(nrt); - if (!err) - goto out2; - } - - if (--attempts <= 0) - goto out2; - - /* - * Race condition! In the gap, when table->tb6_lock was - * released someone could insert this route. Relookup. - */ - ip6_rt_put(rt); - goto redo_fib6_lookup_lock; - -out2: rt6_dst_from_metrics_check(rt); rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1113,22 +1076,59 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) +static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, + const struct ipv6hdr *iph, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info *)dst; + struct net *net; + + if (rt6->rt6i_flags & RTF_LOCAL) + return; dst_confirm(dst); - if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) { - struct net *net = dev_net(dst->dev); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); + if (mtu >= dst_mtu(dst)) + return; - rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + if (!(rt6->rt6i_flags & RTF_CACHE)) { + const struct in6_addr *daddr, *saddr; + struct rt6_info *nrt6; - rt6->rt6i_pmtu = mtu; - rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); + if (iph) { + daddr = &iph->daddr; + saddr = &iph->saddr; + } else if (sk) { + daddr = &sk->sk_v6_daddr; + saddr = &inet6_sk(sk)->saddr; + } else { + return; + } + nrt6 = ip6_pmtu_rt_cache_alloc(rt6, daddr, saddr); + if (!nrt6) + return; + /* ip6_ins_rt(nrt6) will bump the rt6->rt6i_node->fn_sernum + * which will fail the next rt6_check() and invalidate the + * sk->sk_dst_cache. + */ + if (ip6_ins_rt(nrt6)) { + dst_destroy(&nrt6->dst); + return; + } + + rt6 = nrt6; + dst = &nrt6->dst; } + + net = dev_net(dst->dev); + rt6->rt6i_flags |= RTF_MODIFIED; + rt6->rt6i_pmtu = mtu; + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); +} + +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, @@ -1147,7 +1147,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu);