diff mbox series

[RFC,v2,net-next,19/21] net/ipv6: separate handling of FIB entries from dst based routes

Message ID 20180319033622.16693-20-dsahern@gmail.com
State RFC, archived
Delegated to: David Miller
Headers show
Series net/ipv6: Separate data structures for FIB and data path | expand

Commit Message

David Ahern March 19, 2018, 3:36 a.m. UTC
Last step before flipping the data type for FIB entries:
- use fib6_info_alloc to create FIB entries in ip6_route_info_create
  and addrconf_dst_alloc
- use fib6_info_release in place of dst_release, ip6_rt_put and
  rt6_release
- remove the dst_hold before calling __ip6_ins_rt or ip6_del_rt
- when purging routes, drop per-cpu routes
- replace inc and dec of rt6i_ref with fib6_info_hold and fib6_info_release
- use rt->from since it points to the FIB entry
- drop references to exception bucket, fib6_metrics and per-cpu from
  dst entries (those are relevant for fib entries only)

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h   |   4 +-
 include/net/ip6_route.h |   3 +-
 net/ipv6/addrconf.c     |  18 +++--
 net/ipv6/anycast.c      |   7 +-
 net/ipv6/ip6_fib.c      |  50 ++++++++------
 net/ipv6/ip6_output.c   |   3 +-
 net/ipv6/ndisc.c        |   6 +-
 net/ipv6/route.c        | 171 +++++++++++++++++-------------------------------
 8 files changed, 110 insertions(+), 152 deletions(-)

Comments

Ido Schimmel March 24, 2018, 2:31 p.m. UTC | #1
On Sun, Mar 18, 2018 at 08:36:20PM -0700, David Ahern wrote:
> @@ -405,18 +383,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
>  		rt->rt6i_idev = NULL;
>  		in6_dev_put(idev);
>  	}
> -	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
> -	if (bucket) {
> -		rt->rt6i_exception_bucket = NULL;
> -		kfree(bucket);
> -	}
> -
> -	m = rt->fib6_metrics;
> -	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
> -		kfree(m);

You remove this...

>  
>  	rt->from = NULL;
> -	dst_release(&from->dst);
> +	fib6_info_release(from);

Yet fib6_info_release() doesn't take care of it (unlike the IPv4
equivalent), which means you're leaking the metrics.

>  }
David Ahern March 24, 2018, 3:31 p.m. UTC | #2
On 3/24/18 8:31 AM, Ido Schimmel wrote:
> On Sun, Mar 18, 2018 at 08:36:20PM -0700, David Ahern wrote:
>> @@ -405,18 +383,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
>>  		rt->rt6i_idev = NULL;
>>  		in6_dev_put(idev);
>>  	}
>> -	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
>> -	if (bucket) {
>> -		rt->rt6i_exception_bucket = NULL;
>> -		kfree(bucket);
>> -	}
>> -
>> -	m = rt->fib6_metrics;
>> -	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
>> -		kfree(m);
> 
> You remove this...
> 
>>  
>>  	rt->from = NULL;
>> -	dst_release(&from->dst);
>> +	fib6_info_release(from);
> 
> Yet fib6_info_release() doesn't take care of it (unlike the IPv4
> equivalent), which means you're leaking the metrics.
> 
>>  }

ok, I'll take a look. I thought I verified both paths (fib6_info and
dst) were freeing the metrics.
Ido Schimmel March 24, 2018, 4:02 p.m. UTC | #3
On Sat, Mar 24, 2018 at 09:31:02AM -0600, David Ahern wrote:
> On 3/24/18 8:31 AM, Ido Schimmel wrote:
> > On Sun, Mar 18, 2018 at 08:36:20PM -0700, David Ahern wrote:
> >> @@ -405,18 +383,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
> >>  		rt->rt6i_idev = NULL;
> >>  		in6_dev_put(idev);
> >>  	}
> >> -	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
> >> -	if (bucket) {
> >> -		rt->rt6i_exception_bucket = NULL;
> >> -		kfree(bucket);
> >> -	}
> >> -
> >> -	m = rt->fib6_metrics;
> >> -	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
> >> -		kfree(m);
> > 
> > You remove this...
> > 
> >>  
> >>  	rt->from = NULL;
> >> -	dst_release(&from->dst);
> >> +	fib6_info_release(from);
> > 
> > Yet fib6_info_release() doesn't take care of it (unlike the IPv4
> > equivalent), which means you're leaking the metrics.
> > 
> >>  }
> 
> ok, I'll take a look. I thought I verified both paths (fib6_info and
> dst) were freeing the metrics.

I get this from kmemleak (applied your patchset on top of fe2d55d295cf):

unreferenced object 0xffff88004e2c16c8 (size 96):
comm "systemd-network", pid 1255, jiffies 4295166424 (age 957.858s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
backtrace:
ip6_route_info_create (/net/ipv6/route.c:2849) 
ip6_route_add (/net/ipv6/route.c:2975) 
inet6_rtm_newroute (/net/ipv6/route.c:4357) 
rtnetlink_rcv_msg (/net/core/rtnetlink.c:4643) 
netlink_rcv_skb (/net/netlink/af_netlink.c:2445) 
netlink_unicast (/net/netlink/af_netlink.c:1309 /net/netlink/af_netlink.c:1334) 
netlink_sendmsg (/net/netlink/af_netlink.c:1897) 
sock_sendmsg (/net/socket.c:630 /net/socket.c:639) 
SYSC_sendto (/net/socket.c:1748) 
do_syscall_64 (/arch/x86/entry/common.c:287) 
entry_SYSCALL_64_after_hwframe (/arch/x86/entry/entry_64.S:239) 
0xffffffffffffffff (/./include/asm-generic/sections.h:42)
David Ahern March 25, 2018, 2:49 p.m. UTC | #4
On 3/24/18 10:02 AM, Ido Schimmel wrote:
>>
>> ok, I'll take a look. I thought I verified both paths (fib6_info and
>> dst) were freeing the metrics.
> 
> I get this from kmemleak (applied your patchset on top of fe2d55d295cf):
> 
> unreferenced object 0xffff88004e2c16c8 (size 96):
> comm "systemd-network", pid 1255, jiffies 4295166424 (age 957.858s)
> hex dump (first 32 bytes):
> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
> backtrace:
> ip6_route_info_create (/net/ipv6/route.c:2849) 
> ip6_route_add (/net/ipv6/route.c:2975) 
> inet6_rtm_newroute (/net/ipv6/route.c:4357) 
> rtnetlink_rcv_msg (/net/core/rtnetlink.c:4643) 
> netlink_rcv_skb (/net/netlink/af_netlink.c:2445) 
> netlink_unicast (/net/netlink/af_netlink.c:1309 /net/netlink/af_netlink.c:1334) 
> netlink_sendmsg (/net/netlink/af_netlink.c:1897) 
> sock_sendmsg (/net/socket.c:630 /net/socket.c:639) 
> SYSC_sendto (/net/socket.c:1748) 
> do_syscall_64 (/arch/x86/entry/common.c:287) 
> entry_SYSCALL_64_after_hwframe (/arch/x86/entry/entry_64.S:239) 
> 0xffffffffffffffff (/./include/asm-generic/sections.h:42) 
> 

Thanks for confirming. I'll take care of it.
diff mbox series

Patch

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 630392ae12d8..6c3d92bb3459 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -314,9 +314,7 @@  static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 
 	if (rt->rt6i_flags & RTF_PCPU ||
 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
-		rt = rt->from;
-
-	rt6_get_cookie_safe(rt, &cookie);
+		rt6_get_cookie_safe(rt->from, &cookie);
 
 	return cookie;
 }
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index d722fa843f31..a06931e529e1 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -114,8 +114,7 @@  static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
 				      unsigned int prefs,
 				      struct in6_addr *saddr)
 {
-	struct inet6_dev *idev =
-			rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
+	struct inet6_dev *idev = rt ? rt->rt6i_idev : NULL;
 	int err = 0;
 
 	if (rt && rt->rt6i_prefsrc.plen)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index de485b0a90d5..7dbd44958cd4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -927,7 +927,6 @@  void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warn("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	ip6_rt_put(ifp->rt);
 
 	kfree_rcu(ifp, rcu);
 }
@@ -1114,8 +1113,8 @@  ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	inet6addr_notifier_call_chain(NETDEV_UP, ifa);
 out:
 	if (unlikely(err < 0)) {
-		if (rt)
-			ip6_rt_put(rt);
+		fib6_info_release(rt);
+
 		if (ifa) {
 			if (ifa->idev)
 				in6_dev_put(ifa->idev);
@@ -1203,7 +1202,7 @@  cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
 		else {
 			if (!(rt->rt6i_flags & RTF_EXPIRES))
 				fib6_set_expires(rt, expires);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 		}
 	}
 }
@@ -2387,8 +2386,7 @@  static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 			continue;
 		if ((rt->rt6i_flags & noflags) != 0)
 			continue;
-		if (!dst_hold_safe(&rt->dst))
-			rt = NULL;
+		fib6_info_hold(rt);
 		break;
 	}
 out:
@@ -2700,7 +2698,7 @@  void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
 					      dev, expires, flags, GFP_ATOMIC);
 		}
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -3369,7 +3367,7 @@  static int fixup_permanent_addr(struct net *net,
 		ifp->rt = rt;
 		spin_unlock(&ifp->lock);
 
-		ip6_rt_put(prev);
+		fib6_info_release(prev);
 	}
 
 	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
@@ -5653,8 +5651,8 @@  static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 				ip6_del_rt(net, rt);
 		}
 		if (ifp->rt) {
-			if (dst_hold_safe(&ifp->rt->dst))
-				ip6_del_rt(net, ifp->rt);
+			ip6_del_rt(net, ifp->rt);
+			ifp->rt = NULL;
 		}
 		rt_genid_bump_ipv6(net);
 		break;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 1d9ccc815e0b..37b1a5805905 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -213,7 +213,7 @@  static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->dst);
+		fib6_info_release(ac->aca_rt);
 		kfree(ac);
 	}
 }
@@ -231,6 +231,7 @@  static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
 	aca->aca_addr = *addr;
 	in6_dev_hold(idev);
 	aca->aca_idev = idev;
+	fib6_info_hold(rt);
 	aca->aca_rt = rt;
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
@@ -274,7 +275,7 @@  int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	}
 	aca = aca_alloc(rt, addr);
 	if (!aca) {
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 		err = -ENOMEM;
 		goto out;
 	}
@@ -330,7 +331,6 @@  int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->dst);
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 	aca_put(aca);
@@ -358,7 +358,6 @@  void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
-		dst_hold(&aca->aca_rt->dst);
 		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
 
 		aca_put(aca);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index fe3ddbe36af4..0a3ad574318a 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -714,7 +714,7 @@  static struct fib6_node *fib6_add_1(struct net *net,
 			/* clean up an intermediate node */
 			if (!(fn->fn_flags & RTN_RTINFO)) {
 				RCU_INIT_POINTER(fn->leaf, NULL);
-				rt6_release(leaf);
+				fib6_info_release(leaf);
 			/* remove null_entry in the root node */
 			} else if (fn->fn_flags & RTN_TL_ROOT &&
 				   rcu_access_pointer(fn->leaf) ==
@@ -898,12 +898,32 @@  static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
 			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
 				new_leaf = fib6_find_prefix(net, table, fn);
 				atomic_inc(&new_leaf->rt6i_ref);
+
 				rcu_assign_pointer(fn->leaf, new_leaf);
-				rt6_release(rt);
+				fib6_info_release(rt);
 			}
 			fn = rcu_dereference_protected(fn->parent,
 				    lockdep_is_held(&table->tb6_lock));
 		}
+
+		if (rt->rt6i_pcpu) {
+			int cpu;
+
+			/* release the reference to this fib entry from
+			 * all of its cached pcpu routes
+			 */
+			for_each_possible_cpu(cpu) {
+				struct rt6_info **ppcpu_rt;
+				struct rt6_info *pcpu_rt;
+
+				ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+				pcpu_rt = *ppcpu_rt;
+				if (pcpu_rt) {
+					fib6_info_release(pcpu_rt->from);
+					pcpu_rt->from = NULL;
+				}
+			}
+		}
 	}
 }
 
@@ -1090,7 +1110,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		fib6_purge_rt(iter, fn, info->nl_net);
 		if (rcu_access_pointer(fn->rr_ptr) == iter)
 			fn->rr_ptr = NULL;
-		rt6_release(iter);
+		fib6_info_release(iter);
 
 		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
@@ -1106,7 +1126,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 					fib6_purge_rt(iter, fn, info->nl_net);
 					if (rcu_access_pointer(fn->rr_ptr) == iter)
 						fn->rr_ptr = NULL;
-					rt6_release(iter);
+					fib6_info_release(iter);
 					nsiblings--;
 					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
 				} else {
@@ -1174,9 +1194,6 @@  int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	int replace_required = 0;
 	int sernum = fib6_new_sernum(info->nl_net);
 
-	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
-		return -EINVAL;
-
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 			allow_create = 0;
@@ -1291,7 +1308,7 @@  int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			if (pn_leaf == rt) {
 				pn_leaf = NULL;
 				RCU_INIT_POINTER(pn->leaf, NULL);
-				atomic_dec(&rt->rt6i_ref);
+				fib6_info_release(rt);
 			}
 			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
 				pn_leaf = fib6_find_prefix(info->nl_net, table,
@@ -1303,7 +1320,7 @@  int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 					    info->nl_net->ipv6.fib6_null_entry;
 				}
 #endif
-				atomic_inc(&pn_leaf->rt6i_ref);
+				fib6_info_hold(pn_leaf);
 				rcu_assign_pointer(pn->leaf, pn_leaf);
 			}
 		}
@@ -1325,10 +1342,6 @@  int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 	     (fn->fn_flags & RTN_TL_ROOT &&
 	      !rcu_access_pointer(fn->leaf))))
 		fib6_repair_tree(info->nl_net, table, fn);
-	/* Always release dst as dst->__refcnt is guaranteed
-	 * to be taken before entering this function
-	 */
-	dst_release_immediate(&rt->dst);
 	return err;
 }
 
@@ -1628,7 +1641,7 @@  static struct fib6_node *fib6_repair_tree(struct net *net,
 				new_fn_leaf = net->ipv6.fib6_null_entry;
 			}
 #endif
-			atomic_inc(&new_fn_leaf->rt6i_ref);
+			fib6_info_hold(new_fn_leaf);
 			rcu_assign_pointer(fn->leaf, new_fn_leaf);
 			return pn;
 		}
@@ -1684,7 +1697,7 @@  static struct fib6_node *fib6_repair_tree(struct net *net,
 			return pn;
 
 		RCU_INIT_POINTER(pn->leaf, NULL);
-		rt6_release(pn_leaf);
+		fib6_info_release(pn_leaf);
 		fn = pn;
 	}
 }
@@ -1754,7 +1767,7 @@  static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
 	if (!info->skip_notify)
 		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
-	rt6_release(rt);
+	fib6_info_release(rt);
 }
 
 /* Need to own table->tb6_lock */
@@ -2253,9 +2266,8 @@  static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 
 	dev = rt->fib6_nh.nh_dev;
 	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
-		   rt->dst.__use, rt->rt6i_flags,
-		   dev ? dev->name : "");
+		   rt->rt6i_metric, atomic_read(&rt->rt6i_ref), 0,
+		   rt->rt6i_flags, dev ? dev->name : "");
 	iter->w.leaf = NULL;
 	return 0;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2c7f09c3c39e..95c8b1ef2803 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -969,7 +969,8 @@  static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 		if (!had_dst)
 			*dst = ip6_route_output(net, sk, fl6);
 		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
-		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
+		err = ip6_route_get_saddr(net, rt ? rt->from : NULL,
+					  &fl6->daddr,
 					  sk ? inet6_sk(sk)->srcprefs : 0,
 					  &fl6->saddr);
 		if (err)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9ae194cb400a..c22670f56b47 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1283,7 +1283,7 @@  static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 	}
@@ -1313,7 +1313,7 @@  static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK(0, err,
 				  "RA: %s got default router without neighbour\n",
 				  __func__);
-			ip6_rt_put(rt);
+			fib6_info_release(rt);
 			return;
 		}
 		neigh->flags |= NTF_ROUTER;
@@ -1499,7 +1499,7 @@  static void ndisc_router_discovery(struct sk_buff *skb)
 		ND_PRINTK(2, warn, "RA: invalid RA options\n");
 	}
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	if (neigh)
 		neigh_release(neigh);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8eda7d09729b..cd4210a58ab8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -351,13 +351,11 @@  static void rt6_info_init(struct rt6_info *rt)
 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 	INIT_LIST_HEAD(&rt->rt6i_siblings);
 	INIT_LIST_HEAD(&rt->rt6i_uncached);
-	rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
 }
 
 /* allocate dst with ip6_dst_ops */
-static struct rt6_info *__ip6_dst_alloc(struct net *net,
-					struct net_device *dev,
-					int flags)
+struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
+			       int flags)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					1, DST_OBSOLETE_FORCE_CHK, flags);
@@ -369,35 +367,15 @@  static struct rt6_info *__ip6_dst_alloc(struct net *net,
 
 	return rt;
 }
-
-struct rt6_info *ip6_dst_alloc(struct net *net,
-			       struct net_device *dev,
-			       int flags)
-{
-	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
-
-	if (rt) {
-		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
-		if (!rt->rt6i_pcpu) {
-			dst_release_immediate(&rt->dst);
-			return NULL;
-		}
-	}
-
-	return rt;
-}
 EXPORT_SYMBOL(ip6_dst_alloc);
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
-	struct rt6_exception_bucket *bucket;
 	struct rt6_info *from = rt->from;
 	struct inet6_dev *idev;
-	struct dst_metrics *m;
 
 	dst_destroy_metrics_generic(dst);
-	free_percpu(rt->rt6i_pcpu);
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
@@ -405,18 +383,9 @@  static void ip6_dst_destroy(struct dst_entry *dst)
 		rt->rt6i_idev = NULL;
 		in6_dev_put(idev);
 	}
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
-	if (bucket) {
-		rt->rt6i_exception_bucket = NULL;
-		kfree(bucket);
-	}
-
-	m = rt->fib6_metrics;
-	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
-		kfree(m);
 
 	rt->from = NULL;
-	dst_release(&from->dst);
+	fib6_info_release(from);
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -891,7 +860,7 @@  int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 		else
 			fib6_set_expires(rt, jiffies + HZ * lifetime);
 
-		ip6_rt_put(rt);
+		fib6_info_release(rt);
 	}
 	return 0;
 }
@@ -1010,11 +979,9 @@  static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
 
 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
 {
-	BUG_ON(from->from);
-
 	rt->rt6i_flags &= ~RTF_EXPIRES;
-	if (dst_hold_safe(&from->dst))
-		rt->from = from;
+	fib6_info_hold(from);
+	rt->from = from;
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
 	if (from->fib6_metrics != &dst_default_metrics) {
 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
@@ -1085,7 +1052,7 @@  static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
 	struct rt6_info *nrt;
 
 	dev = ip6_rt_get_dev_rcu(rt);
-	nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
+	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	if (nrt)
 		ip6_rt_copy_init(nrt, rt);
 
@@ -1194,8 +1161,6 @@  int ip6_ins_rt(struct net *net, struct rt6_info *rt)
 {
 	struct nl_info info = {	.nl_net = net, };
 
-	/* Hold dst to account for the reference from the fib6 tree */
-	dst_hold(&rt->dst);
 	return __ip6_ins_rt(rt, &info, NULL);
 }
 
@@ -1212,7 +1177,7 @@  static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
-	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
+	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
 	rcu_read_unlock();
 	if (!rt)
 		return NULL;
@@ -1247,7 +1212,7 @@  static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(rt);
-	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
+	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	rcu_read_unlock();
 	if (!pcpu_rt)
 		return NULL;
@@ -1308,7 +1273,7 @@  static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
 	net = dev_net(rt6_ex->rt6i->dst.dev);
 	rt6_ex->rt6i->rt6i_node = NULL;
 	hlist_del_rcu(&rt6_ex->hlist);
-	rt6_release(rt6_ex->rt6i);
+	ip6_rt_put(rt6_ex->rt6i);
 	kfree_rcu(rt6_ex, rcu);
 	WARN_ON_ONCE(!bucket->depth);
 	bucket->depth--;
@@ -1874,17 +1839,11 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *uncached_rt;
 
-		if (ip6_hold_safe(net, &f6i, true)) {
-			dst_use_noref(&f6i->dst, jiffies);
-		} else {
-			rcu_read_unlock();
-			uncached_rt = f6i;
-			goto uncached_rt_out;
-		}
+		fib6_info_hold(f6i);
 		rcu_read_unlock();
 
 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
-		dst_release(&rt->dst);
+		fib6_info_release(f6i);
 
 		if (uncached_rt) {
 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
@@ -1897,7 +1856,6 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_hold(&uncached_rt->dst);
 		}
 
-uncached_rt_out:
 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
 		return uncached_rt;
 
@@ -1906,24 +1864,12 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *pcpu_rt;
 
-		dst_use_noref(&f6i->dst, jiffies);
 		local_bh_disable();
 		pcpu_rt = rt6_get_pcpu_route(f6i);
 
-		if (!pcpu_rt) {
-			/* atomic_inc_not_zero() is needed when using rcu */
-			if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
-				/* No dst_hold() on rt is needed because grabbing
-				 * rt->rt6i_ref makes sure rt can't be released.
-				 */
-				pcpu_rt = rt6_make_pcpu_route(net, f6i);
-				rt6_release(f6i);
-			} else {
-				/* rt is already removed from tree */
-				pcpu_rt = net->ipv6.ip6_null_entry;
-				dst_hold(&pcpu_rt->dst);
-			}
-		}
+		if (!pcpu_rt)
+			pcpu_rt = rt6_make_pcpu_route(net, f6i);
+
 		local_bh_enable();
 		rcu_read_unlock();
 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
@@ -2160,11 +2106,26 @@  struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
  *	Destination cache support functions
  */
 
+static bool fib6_check(struct rt6_info *f6i, u32 cookie)
+{
+	u32 rt_cookie = 0;
+
+	if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) ||
+	     rt_cookie != cookie)
+		return false;
+
+	if (fib6_check_expired(f6i))
+		return false;
+
+	return true;
+}
+
 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 {
 	u32 rt_cookie = 0;
 
-	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
+	if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) ||
+	    rt_cookie != cookie)
 		return NULL;
 
 	if (rt6_check_expired(rt))
@@ -2177,7 +2138,7 @@  static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 {
 	if (!__rt6_check_expired(rt) &&
 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
-	    rt6_check(rt->from, cookie))
+	    fib6_check(rt->from, cookie))
 		return &rt->dst;
 	else
 		return NULL;
@@ -2208,7 +2169,7 @@  static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (rt6_check_expired(rt)) {
-				ip6_del_rt(dev_net(dst->dev), rt);
+				rt6_remove_exception_rt(rt);
 				dst = NULL;
 			}
 		} else {
@@ -2229,12 +2190,12 @@  static void ip6_link_failure(struct sk_buff *skb)
 	if (rt) {
 		if (rt->rt6i_flags & RTF_CACHE) {
 			if (dst_hold_safe(&rt->dst))
-				ip6_del_rt(dev_net(rt->dst.dev), rt);
-		} else {
+				rt6_remove_exception_rt(rt);
+		} else if (rt->from) {
 			struct fib6_node *fn;
 
 			rcu_read_lock();
-			fn = rcu_dereference(rt->rt6i_node);
+			fn = rcu_dereference(rt->from->rt6i_node);
 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
 				fn->fn_sernum = -1;
 			rcu_read_unlock();
@@ -2899,13 +2860,13 @@  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (!table)
 		goto out;
 
-	rt = ip6_dst_alloc(net, NULL,
-			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
-
-	if (!rt) {
-		err = -ENOMEM;
+	err = -ENOMEM;
+	rt = fib6_info_alloc(gfp_flags);
+	if (!rt)
 		goto out;
-	}
+
+	if (cfg->fc_flags & RTF_ADDRCONF)
+		rt->dst_nocount = true;
 
 	err = ip6_convert_metrics(net, rt, cfg);
 	if (err < 0)
@@ -2979,7 +2940,7 @@  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		if (err)
 			goto out;
 
-		rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
+		rt->fib6_nh.nh_gw = cfg->fc_gateway;
 	}
 
 	err = -ENODEV;
@@ -3010,7 +2971,7 @@  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 	    !netif_carrier_ok(dev))
 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
-	rt->fib6_nh.nh_dev = rt->dst.dev = dev;
+	rt->fib6_nh.nh_dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -3022,9 +2983,8 @@  static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
 		dev_put(dev);
 	if (idev)
 		in6_dev_put(idev);
-	if (rt)
-		dst_release_immediate(&rt->dst);
 
+	fib6_info_release(rt);
 	return ERR_PTR(err);
 }
 
@@ -3039,6 +2999,7 @@  int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		return PTR_ERR(rt);
 
 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
+	fib6_info_release(rt);
 
 	return err;
 }
@@ -3060,7 +3021,7 @@  static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	spin_unlock_bh(&table->tb6_lock);
 
 out:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 	return err;
 }
 
@@ -3114,7 +3075,7 @@  static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 out_unlock:
 	spin_unlock_bh(&table->tb6_lock);
 out_put:
-	ip6_rt_put(rt);
+	fib6_info_release(rt);
 
 	if (skb) {
 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
@@ -3185,8 +3146,7 @@  static int ip6_route_del(struct fib6_config *cfg,
 				continue;
 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
 				continue;
-			if (!dst_hold_safe(&rt->dst))
-				break;
+			fib6_info_hold(rt);
 			rcu_read_unlock();
 
 			/* if gateway was specified only delete the one hop */
@@ -3454,12 +3414,9 @@  static void __rt6_purge_dflt_routers(struct net *net,
 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
-			if (dst_hold_safe(&rt->dst)) {
-				rcu_read_unlock();
-				ip6_del_rt(net, rt);
-			} else {
-				rcu_read_unlock();
-			}
+			fib6_info_hold(rt);
+			rcu_read_unlock();
+			ip6_del_rt(net, rt);
 			goto restart;
 		}
 	}
@@ -3609,7 +3566,7 @@  struct rt6_info *addrconf_dst_alloc(struct net *net,
 	struct net_device *dev = idev->dev;
 	struct rt6_info *rt;
 
-	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
+	rt = fib6_info_alloc(gfp_flags);
 	if (!rt)
 		return ERR_PTR(-ENOMEM);
 
@@ -3630,8 +3587,8 @@  struct rt6_info *addrconf_dst_alloc(struct net *net,
 	}
 
 	rt->fib6_nh.nh_gw = *addr;
+	dev_hold(dev);
 	rt->fib6_nh.nh_dev = dev;
-	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
@@ -4276,7 +4233,7 @@  static int ip6_route_multipath_add(struct fib6_config *cfg,
 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
 					    rt, &r_cfg);
 		if (err) {
-			dst_release_immediate(&rt->dst);
+			fib6_info_release(rt);
 			goto cleanup;
 		}
 
@@ -4293,6 +4250,8 @@  static int ip6_route_multipath_add(struct fib6_config *cfg,
 	list_for_each_entry(nh, &rt6_nh_list, next) {
 		rt_last = nh->rt6_info;
 		err = __ip6_ins_rt(nh->rt6_info, info, extack);
+		fib6_info_release(nh->rt6_info);
+
 		/* save reference to first route for notification */
 		if (!rt_notif && !err)
 			rt_notif = nh->rt6_info;
@@ -4340,7 +4299,7 @@  static int ip6_route_multipath_add(struct fib6_config *cfg,
 cleanup:
 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
 		if (nh->rt6_info)
-			dst_release_immediate(&nh->rt6_info->dst);
+			fib6_info_release(nh->rt6_info);
 		list_del(&nh->next);
 		kfree(nh);
 	}
@@ -4763,14 +4722,6 @@  static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		goto errout;
 	}
 
-	if (fibmatch && rt->from) {
-		struct rt6_info *ort = rt->from;
-
-		dst_hold(&ort->dst);
-		ip6_rt_put(rt);
-		rt = ort;
-	}
-
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		ip6_rt_put(rt);
@@ -4780,12 +4731,12 @@  static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	skb_dst_set(skb, &rt->dst);
 	if (fibmatch)
-		err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
+		err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, 0);
 	else
-		err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
-				    iif, RTM_NEWROUTE,
+		err = rt6_fill_node(net, skb, rt->from, dst,
+				    &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 				    0);
 	if (err < 0) {