Message ID | c830c1a9fd56531a754f034deb462e650641c54c.1523626834.git.joseph.salisbury@canonical.com |
---|---|
State | New |
Headers | show |
Series | [SRU,Bionic,1/1] xfrm: reuse uncached_list to track xdsts | expand |
On 04/20/18 18:14, Joseph Salisbury wrote: > From: Xin Long <lucien.xin@gmail.com> > > BugLink: http://bugs.launchpad.net/bugs/1746474 > > In early time, when freeing a xdst, it would be inserted into > dst_garbage.list first. Then if it's refcnt was still held > somewhere, later it would be put into dst_busy_list in > dst_gc_task(). > > When one dev was being unregistered, the dev of these dsts in > dst_busy_list would be set with loopback_dev and put this dev. > So that this dev's removal wouldn't get blocked, and avoid the > kmsg warning: > > kernel:unregister_netdevice: waiting for veth0 to become \ > free. Usage count = 2 > > However after Commit 52df157f17e5 ("xfrm: take refcnt of dst > when creating struct xfrm_dst bundle"), the xdst will not be > freed with dst gc, and this warning happens. > > To fix it, we need to find these xdsts that are still held by > others when removing the dev, and free xdst's dev and set it > with loopback_dev. > > But unfortunately after flow_cache for xfrm was deleted, no > list tracks them anymore. So we need to save these xdsts > somewhere to release the xdst's dev later. > > To make this easier, this patch is to reuse uncached_list to > track xdsts, so that the dev refcnt can be released in the > event NETDEV_UNREGISTER process of fib_netdev_notifier. > > Thanks to Florian, we could move forward this fix quickly. > > Fixes: 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle") > Reported-by: Jianlin Shi <jishi@redhat.com> > Reported-by: Hangbin Liu <liuhangbin@gmail.com> > Tested-by: Eyal Birger <eyal.birger@gmail.com> > Signed-off-by: Xin Long <lucien.xin@gmail.com> > Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> > (cherry picked from commit 510c321b557121861601f9d259aadd65aa274f35) > Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> Clean cherry-pick, tested by bug reporter. Thanks, Kleber > --- > include/net/ip6_route.h | 3 +++ > include/net/route.h | 3 +++ > net/ipv4/route.c | 21 +++++++++++++-------- > net/ipv4/xfrm4_policy.c | 4 +++- > net/ipv6/route.c | 4 ++-- > net/ipv6/xfrm6_policy.c | 5 +++++ > 6 files changed, 29 insertions(+), 11 deletions(-) > > diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h > index 18e442e..5c9732f 100644 > --- a/include/net/ip6_route.h > +++ b/include/net/ip6_route.h > @@ -170,6 +170,9 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu); > void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); > void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); > > +void rt6_uncached_list_add(struct rt6_info *rt); > +void rt6_uncached_list_del(struct rt6_info *rt); > + > static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) > { > const struct dst_entry *dst = skb_dst(skb); > diff --git a/include/net/route.h b/include/net/route.h > index d538e6d..2762c00 100644 > --- a/include/net/route.h > +++ b/include/net/route.h > @@ -227,6 +227,9 @@ struct in_ifaddr; > void fib_add_ifaddr(struct in_ifaddr *); > void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); > > +void rt_add_uncached_list(struct rtable *rt); > +void rt_del_uncached_list(struct rtable *rt); > + > static inline void ip_rt_put(struct rtable *rt) > { > /* dst_release() accepts a NULL parameter. > diff --git a/net/ipv4/route.c b/net/ipv4/route.c > index 4e153b2..8a272bc 100644 > --- a/net/ipv4/route.c > +++ b/net/ipv4/route.c > @@ -1386,7 +1386,7 @@ struct uncached_list { > > static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); > > -static void rt_add_uncached_list(struct rtable *rt) > +void rt_add_uncached_list(struct rtable *rt) > { > struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); > > @@ -1397,14 +1397,8 @@ static void rt_add_uncached_list(struct rtable *rt) > spin_unlock_bh(&ul->lock); > } > > -static void ipv4_dst_destroy(struct dst_entry *dst) > +void rt_del_uncached_list(struct rtable *rt) > { > - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); > - struct rtable *rt = (struct rtable *) dst; > - > - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) > - kfree(p); > - > if (!list_empty(&rt->rt_uncached)) { > struct uncached_list *ul = rt->rt_uncached_list; > > @@ -1414,6 +1408,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst) > } > } > > +static void ipv4_dst_destroy(struct dst_entry *dst) > +{ > + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); > + struct rtable *rt = (struct rtable *)dst; > + > + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) > + kfree(p); > + > + rt_del_uncached_list(rt); > +} > + > void rt_flush_dev(struct net_device *dev) > { > struct net *net = dev_net(dev); > diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c > index 05017e2..8d33f7b 100644 > --- a/net/ipv4/xfrm4_policy.c > +++ b/net/ipv4/xfrm4_policy.c > @@ -102,6 +102,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, > xdst->u.rt.rt_pmtu = rt->rt_pmtu; > xdst->u.rt.rt_table_id = rt->rt_table_id; > INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); > + rt_add_uncached_list(&xdst->u.rt); > > return 0; > } > @@ -241,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) > struct xfrm_dst *xdst = (struct xfrm_dst *)dst; > > dst_destroy_metrics_generic(dst); > - > + if (xdst->u.rt.rt_uncached_list) > + rt_del_uncached_list(&xdst->u.rt); > xfrm_dst_destroy(xdst); > } > > diff --git a/net/ipv6/route.c b/net/ipv6/route.c > index a560fb1..f04fafa 100644 > --- a/net/ipv6/route.c > +++ b/net/ipv6/route.c > @@ -128,7 +128,7 @@ struct uncached_list { > > static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); > > -static void rt6_uncached_list_add(struct rt6_info *rt) > +void rt6_uncached_list_add(struct rt6_info *rt) > { > struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); > > @@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt) > spin_unlock_bh(&ul->lock); > } > > -static void rt6_uncached_list_del(struct rt6_info *rt) > +void rt6_uncached_list_del(struct rt6_info *rt) > { > if (!list_empty(&rt->rt6i_uncached)) { > struct uncached_list *ul = rt->rt6i_uncached_list; > diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c > index 885ade2..26b598f 100644 > --- a/net/ipv6/xfrm6_policy.c > +++ b/net/ipv6/xfrm6_policy.c > @@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, > xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; > xdst->u.rt6.rt6i_dst = rt->rt6i_dst; > xdst->u.rt6.rt6i_src = rt->rt6i_src; > + INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); > + rt6_uncached_list_add(&xdst->u.rt6); > + atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); > > return 0; > } > @@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) > if (likely(xdst->u.rt6.rt6i_idev)) > in6_dev_put(xdst->u.rt6.rt6i_idev); > dst_destroy_metrics_generic(dst); > + if (xdst->u.rt6.rt6i_uncached_list) > + rt6_uncached_list_del(&xdst->u.rt6); > xfrm_dst_destroy(xdst); > } > >
On Fri, Apr 20, 2018 at 12:14:10PM -0400, Joseph Salisbury wrote: > From: Xin Long <lucien.xin@gmail.com> > > BugLink: http://bugs.launchpad.net/bugs/1746474 > > In early time, when freeing a xdst, it would be inserted into > dst_garbage.list first. Then if it's refcnt was still held > somewhere, later it would be put into dst_busy_list in > dst_gc_task(). > > When one dev was being unregistered, the dev of these dsts in > dst_busy_list would be set with loopback_dev and put this dev. > So that this dev's removal wouldn't get blocked, and avoid the > kmsg warning: > > kernel:unregister_netdevice: waiting for veth0 to become \ > free. Usage count = 2 > > However after Commit 52df157f17e5 ("xfrm: take refcnt of dst > when creating struct xfrm_dst bundle"), the xdst will not be > freed with dst gc, and this warning happens. > > To fix it, we need to find these xdsts that are still held by > others when removing the dev, and free xdst's dev and set it > with loopback_dev. > > But unfortunately after flow_cache for xfrm was deleted, no > list tracks them anymore. So we need to save these xdsts > somewhere to release the xdst's dev later. > > To make this easier, this patch is to reuse uncached_list to > track xdsts, so that the dev refcnt can be released in the > event NETDEV_UNREGISTER process of fib_netdev_notifier. > > Thanks to Florian, we could move forward this fix quickly. > > Fixes: 52df157f17e5 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle") > Reported-by: Jianlin Shi <jishi@redhat.com> > Reported-by: Hangbin Liu <liuhangbin@gmail.com> > Tested-by: Eyal Birger <eyal.birger@gmail.com> > Signed-off-by: Xin Long <lucien.xin@gmail.com> > Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> > (cherry picked from commit 510c321b557121861601f9d259aadd65aa274f35) > Signed-off-by: Joseph Salisbury <joseph.salisbury@canonical.com> Clean cherry pick, positive testing. Acked-by: Seth Forshee <seth.forshee@canonical.com>
Applied to bionix master-next -Stefan
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 18e442e..5c9732f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -170,6 +170,9 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); +void rt6_uncached_list_add(struct rt6_info *rt); +void rt6_uncached_list_del(struct rt6_info *rt); + static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb) { const struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/route.h b/include/net/route.h index d538e6d..2762c00 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -227,6 +227,9 @@ struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); +void rt_add_uncached_list(struct rtable *rt); +void rt_del_uncached_list(struct rtable *rt); + static inline void ip_rt_put(struct rtable *rt) { /* dst_release() accepts a NULL parameter. diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4e153b2..8a272bc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1386,7 +1386,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); -static void rt_add_uncached_list(struct rtable *rt) +void rt_add_uncached_list(struct rtable *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); @@ -1397,14 +1397,8 @@ static void rt_add_uncached_list(struct rtable *rt) spin_unlock_bh(&ul->lock); } -static void ipv4_dst_destroy(struct dst_entry *dst) +void rt_del_uncached_list(struct rtable *rt) { - struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); - struct rtable *rt = (struct rtable *) dst; - - if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) - kfree(p); - if (!list_empty(&rt->rt_uncached)) { struct uncached_list *ul = rt->rt_uncached_list; @@ -1414,6 +1408,17 @@ static void ipv4_dst_destroy(struct dst_entry *dst) } } +static void ipv4_dst_destroy(struct dst_entry *dst) +{ + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); + struct rtable *rt = (struct rtable *)dst; + + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); + + rt_del_uncached_list(rt); +} + void rt_flush_dev(struct net_device *dev) { struct net *net = dev_net(dev); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 05017e2..8d33f7b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -102,6 +102,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_table_id = rt->rt_table_id; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); + rt_add_uncached_list(&xdst->u.rt); return 0; } @@ -241,7 +242,8 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) struct xfrm_dst *xdst = (struct xfrm_dst *)dst; dst_destroy_metrics_generic(dst); - + if (xdst->u.rt.rt_uncached_list) + rt_del_uncached_list(&xdst->u.rt); xfrm_dst_destroy(xdst); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a560fb1..f04fafa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -128,7 +128,7 @@ struct uncached_list { static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); -static void rt6_uncached_list_add(struct rt6_info *rt) +void rt6_uncached_list_add(struct rt6_info *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); @@ -139,7 +139,7 @@ static void rt6_uncached_list_add(struct rt6_info *rt) spin_unlock_bh(&ul->lock); } -static void rt6_uncached_list_del(struct rt6_info *rt) +void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 885ade2..26b598f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -113,6 +113,9 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; + INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); + rt6_uncached_list_add(&xdst->u.rt6); + atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); return 0; } @@ -244,6 +247,8 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); + if (xdst->u.rt6.rt6i_uncached_list) + rt6_uncached_list_del(&xdst->u.rt6); xfrm_dst_destroy(xdst); }