From patchwork Tue Jul 31 01:23:31 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Miller X-Patchwork-Id: 174127 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 81BA22C0092 for ; Tue, 31 Jul 2012 11:23:36 +1000 (EST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754401Ab2GaBXd (ORCPT ); Mon, 30 Jul 2012 21:23:33 -0400 Received: from shards.monkeyblade.net ([149.20.54.216]:60209 "EHLO shards.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754016Ab2GaBXc (ORCPT ); Mon, 30 Jul 2012 21:23:32 -0400 Received: from localhost (74-93-104-98-Washington.hfc.comcastbusiness.net [74.93.104.98]) by shards.monkeyblade.net (Postfix) with ESMTPSA id C36E8584867; Mon, 30 Jul 2012 18:23:33 -0700 (PDT) Date: Mon, 30 Jul 2012 18:23:31 -0700 (PDT) Message-Id: <20120730.182331.1434352286747934185.davem@davemloft.net> To: eric.dumazet@gmail.com CC: netdev@vger.kernel.org Subject: [PATCH 2/2] ipv4: Properly purge netdev references on uncached routes. From: David Miller X-Mailer: Mew version 6.5 on Emacs 24.1 / Mule 6.0 (HANACHIRUSATO) Mime-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org When a device is unregistered, we have to purge all of the references to it that may exist in the entire system. If a route is uncached, we currently have no way of accomplishing this. So create a global list that is scanned when a network device goes down. This mirrors the logic in net/core/dst.c's dst_ifdown(). Signed-off-by: David S. Miller --- include/net/route.h | 3 +++ net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++--- net/ipv4/xfrm4_policy.c | 1 + 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 8c52bc6..776a27f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -57,6 +57,8 @@ struct rtable { /* Miscellaneous cached information */ u32 rt_pmtu; + + struct list_head rt_uncached; }; static inline bool rt_is_input_route(const struct rtable *rt) @@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; extern int ip_rt_init(void); extern void rt_cache_flush(struct net *net, int how); +extern void rt_flush_dev(struct net_device *dev); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8732cc7..c43ae3f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo if (event == NETDEV_UNREGISTER) { fib_disable_ip(dev, 2, -1); + rt_flush_dev(dev); return NOTIFY_DONE; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e2abb0d..7111fce 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void ipv4_dst_destroy(struct dst_entry *dst); static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) @@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = { .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, + .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, @@ -1168,9 +1170,11 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) return NULL; } -static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, +static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr) { + bool ret = false; + spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { @@ -1199,14 +1203,18 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, fnhe->fnhe_rth = rt; fnhe->fnhe_stamp = jiffies; + ret = true; } spin_unlock_bh(&fnhe_lock); + + return ret; } -static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) { struct rtable *orig, *prev, **p = &nh->nh_rth_output; + bool ret = true; if (rt_is_input_route(rt)) p = &nh->nh_rth_input; @@ -1221,6 +1229,48 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) dst_release(&orig->dst); } else { dst_release(&rt->dst); + ret = false; + } + + return ret; +} + +static DEFINE_SPINLOCK(rt_uncached_lock); +static LIST_HEAD(rt_uncached_list); + +static void rt_add_uncached_list(struct rtable *rt) +{ + spin_lock_bh(&rt_uncached_lock); + list_add_tail(&rt->rt_uncached, &rt_uncached_list); + spin_unlock_bh(&rt_uncached_lock); +} + +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct rtable *rt = (struct rtable *) dst; + + if (dst->flags & DST_NOCACHE) { + spin_lock_bh(&rt_uncached_lock); + list_del(&rt->rt_uncached); + spin_unlock_bh(&rt_uncached_lock); + } +} + +void rt_flush_dev(struct net_device *dev) +{ + if (!list_empty(&rt_uncached_list)) { + struct net *net = dev_net(dev); + struct rtable *rt; + + spin_lock_bh(&rt_uncached_lock); + list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { + if (rt->dst.dev != dev) + continue; + rt->dst.dev = net->loopback_dev; + dev_hold(rt->dst.dev); + dev_put(dev); + } + spin_unlock_bh(&rt_uncached_lock); } } @@ -1236,6 +1286,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag) { + bool cached = false; + if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); @@ -1246,10 +1298,12 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, rt->dst.tclassid = nh->nh_tclassid; #endif if (unlikely(fnhe)) - rt_bind_exception(rt, fnhe, daddr); + cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) - rt_cache_route(nh, rt); + cached = rt_cache_route(nh, rt); } + if (unlikely(!cached)) + rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -1316,6 +1370,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; rth->rt_flags |= RTCF_LOCAL; @@ -1441,6 +1496,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; rth->dst.output = ip_output; @@ -1607,6 +1663,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; @@ -1771,6 +1828,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -2050,6 +2108,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + INIT_LIST_HEAD(&rt->rt_uncached); + dst_free(new); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c628184..681ea2f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; + INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); return 0; }