diff mbox series

[RFC,net-next,16/20] net/ipv6: Cleanup exception route handling

Message ID 20180225194730.30063-17-dsahern@gmail.com
State RFC, archived
Delegated to: David Miller
Headers show
Series net/ipv6: Separate data structures for FIB and data path | expand

Commit Message

David Ahern Feb. 25, 2018, 7:47 p.m. UTC
IPv6 FIB will only contain FIB entries with exception routes added to
the FIB entry. Remove CACHE and dst checks from fib6 add and delete since
they can never happen once the data type changes.

Fixup the lookup functions to use a f6i name for fib lookups and retain
the current rt name for return variables.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/ip6_fib.c |  16 +------
 net/ipv6/route.c   | 122 ++++++++++++++++++++++++++++++-----------------------
 2 files changed, 71 insertions(+), 67 deletions(-)

Comments

David Miller Feb. 26, 2018, 7:27 p.m. UTC | #1
From: David Ahern <dsahern@gmail.com>
Date: Sun, 25 Feb 2018 11:47:26 -0800

> IPv6 FIB will only contain FIB entries with exception routes added to
> the FIB entry. Remove CACHE and dst checks from fib6 add and delete since
> they can never happen once the data type changes.
> 
> Fixup the lookup functions to use a f6i name for fib lookups and retain
> the current rt name for return variables.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>

Hard to interpret what you mean in that second paragraph.

Do you mean that this invariant holds after patch #19 only?  Then you
really can't make these simplifications until then in order to keep
the patch series bisectable.

Otherwise you mean something else and I'd like to know what that is.

Either way, the commit message wording could be improved since even
a dolt like me got confused :-)
David Ahern Feb. 26, 2018, 8:25 p.m. UTC | #2
On 2/26/18 12:27 PM, David Miller wrote:
> From: David Ahern <dsahern@gmail.com>
> Date: Sun, 25 Feb 2018 11:47:26 -0800
> 
>> IPv6 FIB will only contain FIB entries with exception routes added to
>> the FIB entry. Remove CACHE and dst checks from fib6 add and delete since
>> they can never happen once the data type changes.
>>
>> Fixup the lookup functions to use a f6i name for fib lookups and retain
>> the current rt name for return variables.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
> 
> Hard to interpret what you mean in that second paragraph.
> 
> Do you mean that this invariant holds after patch #19 only?  Then you
> really can't make these simplifications until then in order to keep
> the patch series bisectable.
> 
> Otherwise you mean something else and I'd like to know what that is.
> 
> Either way, the commit message wording could be improved since even
> a dolt like me got confused :-)
> 

By the 20th iteration the description of the last few patches got a
little thin. ;-)

The point of patches 1-18 is to make as small a change per patch that
leads up to patch 19 being nothing more than s/rt6_info/fib6_info/ is
select data paths. A struct rename and all of the related code paths
makes for a huge patch.

Anyways, because rt6_info is used for both FIB and dst the lookup
functions can overload the rt6_info variable usually called 'rt'. This
patch introduces a new 'f6i' variable name for the result of the FIB
lookup and keeps 'rt' as the dst based return variable. 'f6i' becomes a
fib6_info in patch 19 which is why it is introduced as 'struct rt6_info
*f6i' now; avoids the additional churn in patch 19.
David Miller Feb. 26, 2018, 8:29 p.m. UTC | #3
From: David Ahern <dsahern@gmail.com>
Date: Mon, 26 Feb 2018 13:25:37 -0700

> Anyways, because rt6_info is used for both FIB and dst the lookup
> functions can overload the rt6_info variable usually called 'rt'. This
> patch introduces a new 'f6i' variable name for the result of the FIB
> lookup and keeps 'rt' as the dst based return variable. 'f6i' becomes a
> fib6_info in patch 19 which is why it is introduced as 'struct rt6_info
> *f6i' now; avoids the additional churn in patch 19.

This I understand. :)

Adjust this commit message to include some of this text and I'm
happy.

BTW, I love patch #20, those edits must have been fun to make :-)
Wei Wang Feb. 26, 2018, 10:29 p.m. UTC | #4
On Sun, Feb 25, 2018 at 11:47 AM, David Ahern <dsahern@gmail.com> wrote:
> IPv6 FIB will only contain FIB entries with exception routes added to
> the FIB entry. Remove CACHE and dst checks from fib6 add and delete since
> they can never happen once the data type changes.
>
> Fixup the lookup functions to use a f6i name for fib lookups and retain
> the current rt name for return variables.
>
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  net/ipv6/ip6_fib.c |  16 +------
>  net/ipv6/route.c   | 122 ++++++++++++++++++++++++++++++-----------------------
>  2 files changed, 71 insertions(+), 67 deletions(-)
>
> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
> index 5b03f7e8d850..63a91db61749 100644
> --- a/net/ipv6/ip6_fib.c
> +++ b/net/ipv6/ip6_fib.c
> @@ -1046,7 +1046,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
>  static void fib6_start_gc(struct net *net, struct rt6_info *rt)
>  {
>         if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
> -           (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
> +           (rt->rt6i_flags & RTF_EXPIRES))
>                 mod_timer(&net->ipv6.ip6_fib_timer,
>                           jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
>  }
> @@ -1097,8 +1097,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,

This rt here should be f6i?

>
>         if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
>                 return -EINVAL;
> -       if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
> -               return -EINVAL;
>
>         if (info->nlh) {
>                 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
> @@ -1622,8 +1620,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
>
>         RT6_TRACE("fib6_del_route\n");
>
> -       WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
> -
>         /* Unlink it */
>         *rtp = rt->rt6_next;

This rt here is also f6i right?

>         rt->rt6i_node = NULL;
> @@ -1692,21 +1688,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)

This rt here is also f6i right?

>         struct rt6_info __rcu **rtp;
>         struct rt6_info __rcu **rtp_next;
>
> -#if RT6_DEBUG >= 2
> -       if (rt->dst.obsolete > 0) {
> -               WARN_ON(fn);
> -               return -ENOENT;
> -       }
> -#endif
>         if (!fn || rt == net->ipv6.fib6_null_entry)
>                 return -ENOENT;
>
>         WARN_ON(!(fn->fn_flags & RTN_RTINFO));
>
> -       /* remove cached dst from exception table */
> -       if (rt->rt6i_flags & RTF_CACHE)
> -               return rt6_remove_exception_rt(rt);

Could you help delete rt6_remove_exception_rt() function? I don't
think it is used anymore.

> -
>         /*
>          *      Walk the leaf entries looking for ourself
>          */
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index 3ea60e932eb9..19b91c60ee55 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -1094,35 +1094,36 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
>                                              struct fib6_table *table,
>                                              struct flowi6 *fl6, int flags)
>  {
> -       struct rt6_info *rt, *rt_cache;
> +       struct rt6_info *f6i;
>         struct fib6_node *fn;
> +       struct rt6_info *rt;
>
>         rcu_read_lock();
>         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
>  restart:
> -       rt = rcu_dereference(fn->leaf);
> -       if (!rt) {
> -               rt = net->ipv6.fib6_null_entry;
> +       f6i = rcu_dereference(fn->leaf);
> +       if (!f6i) {
> +               f6i = net->ipv6.fib6_null_entry;
>         } else {
> -               rt = rt6_device_match(net, rt, &fl6->saddr,
> +               f6i = rt6_device_match(net, f6i, &fl6->saddr,
>                                       fl6->flowi6_oif, flags);
> -               if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
> -                       rt = rt6_multipath_select(rt, fl6,
> +               if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
> +                       f6i = rt6_multipath_select(f6i, fl6,
>                                                   fl6->flowi6_oif, flags);
>         }
> -       if (rt == net->ipv6.fib6_null_entry) {
> +       if (f6i == net->ipv6.fib6_null_entry) {
>                 fn = fib6_backtrack(fn, &fl6->saddr);
>                 if (fn)
>                         goto restart;
>         }
> +
>         /* Search through exception table */
> -       rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
> -       if (rt_cache) {
> -               rt = rt_cache;
> +       rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
> +       if (rt) {
>                 if (ip6_hold_safe(net, &rt, true))
>                         dst_use_noref(&rt->dst, jiffies);
>         } else {
> -               rt = ip6_create_rt_rcu(rt);
> +               rt = ip6_create_rt_rcu(f6i);
>         }
>
>         rcu_read_unlock();
> @@ -1204,9 +1205,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
>          *      Clone the route.
>          */
>
> -       if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
> -               ort = ort->from;
> -
>         rcu_read_lock();
>         dev = ip6_rt_get_dev_rcu(ort);
>         rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
> @@ -1432,11 +1430,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,

Could you help change the second parameter name from ort to f6i for
rt6_insert_exception()? I think that makes it more readable.

>         struct rt6_exception *rt6_ex;
>         int err = 0;
>
> -       /* ort can't be a cache or pcpu route */
> -       if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
> -               ort = ort->from;
> -       WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
> -
>         spin_lock_bh(&rt6_exception_lock);
>
>         if (ort->exception_bucket_flushed) {
> @@ -1815,7 +1808,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>                                int oif, struct flowi6 *fl6, int flags)
>  {
>         struct fib6_node *fn, *saved_fn;
> -       struct rt6_info *rt, *rt_cache;
> +       struct rt6_info *f6i;
> +       struct rt6_info *rt;
>         int strict = 0;
>
>         strict |= flags & RT6_LOOKUP_F_IFACE;
> @@ -1832,10 +1826,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>                 oif = 0;
>
>  redo_rt6_select:
> -       rt = rt6_select(net, fn, oif, strict);
> -       if (rt->rt6i_nsiblings)
> -               rt = rt6_multipath_select(rt, fl6, oif, strict);
> -       if (rt == net->ipv6.fib6_null_entry) {
> +       f6i = rt6_select(net, fn, oif, strict);
> +       if (f6i->rt6i_nsiblings)
> +               f6i = rt6_multipath_select(f6i, fl6, oif, strict);
> +       if (f6i == net->ipv6.fib6_null_entry) {
>                 fn = fib6_backtrack(fn, &fl6->saddr);
>                 if (fn)
>                         goto redo_rt6_select;
> @@ -1847,18 +1841,17 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>                 }
>         }
>
> -       /*Search through exception table */
> -       rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
> -       if (rt_cache)
> -               rt = rt_cache;
> -
> -       if (rt == net->ipv6.fib6_null_entry) {
> +       if (f6i == net->ipv6.fib6_null_entry) {
>                 rt = net->ipv6.ip6_null_entry;
>                 rcu_read_unlock();
>                 dst_hold(&rt->dst);
>                 trace_fib6_table_lookup(net, rt, table, fl6);
>                 return rt;
> -       } else if (rt->rt6i_flags & RTF_CACHE) {
> +       }
> +
> +       /*Search through exception table */
> +       rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);

Could you help change the first paramter name from rt to f6i in the
function definition of rt6_find_cached_rt() as well?

> +       if (rt) {
>                 if (ip6_hold_safe(net, &rt, true))
>                         dst_use_noref(&rt->dst, jiffies);
>
> @@ -1866,7 +1859,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>                 trace_fib6_table_lookup(net, rt, table, fl6);
>                 return rt;
>         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
> -                           !(rt->rt6i_flags & RTF_GATEWAY))) {
> +                           !(f6i->rt6i_flags & RTF_GATEWAY))) {
>                 /* Create a RTF_CACHE clone which will not be
>                  * owned by the fib6 tree.  It is for the special case where
>                  * the daddr in the skb during the neighbor look-up is different
> @@ -1875,16 +1868,16 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>
>                 struct rt6_info *uncached_rt;
>
> -               if (ip6_hold_safe(net, &rt, true)) {
> -                       dst_use_noref(&rt->dst, jiffies);
> +               if (ip6_hold_safe(net, &f6i, true)) {
> +                       dst_use_noref(&f6i->dst, jiffies);
>                 } else {
>                         rcu_read_unlock();
> -                       uncached_rt = rt;
> +                       uncached_rt = f6i;
>                         goto uncached_rt_out;
>                 }
>                 rcu_read_unlock();
>
> -               uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
> +               uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
>                 dst_release(&rt->dst);
>
>                 if (uncached_rt) {
> @@ -1907,18 +1900,18 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>
>                 struct rt6_info *pcpu_rt;
>
> -               dst_use_noref(&rt->dst, jiffies);
> +               dst_use_noref(&f6i->dst, jiffies);
>                 local_bh_disable();
> -               pcpu_rt = rt6_get_pcpu_route(rt);
> +               pcpu_rt = rt6_get_pcpu_route(f6i);
>
>                 if (!pcpu_rt) {
>                         /* atomic_inc_not_zero() is needed when using rcu */
> -                       if (atomic_inc_not_zero(&rt->rt6i_ref)) {
> +                       if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
>                                 /* No dst_hold() on rt is needed because grabbing
>                                  * rt->rt6i_ref makes sure rt can't be released.
>                                  */
> -                               pcpu_rt = rt6_make_pcpu_route(net, rt);
> -                               rt6_release(rt);
> +                               pcpu_rt = rt6_make_pcpu_route(net, f6i);
> +                               rt6_release(f6i);
>                         } else {
>                                 /* rt is already removed from tree */
>                                 pcpu_rt = net->ipv6.ip6_null_entry;
> @@ -2296,7 +2289,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
>                                              int flags)
>  {
>         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
> -       struct rt6_info *rt, *rt_cache;
> +       struct rt6_info *ret = NULL, *rt_cache;
> +       struct rt6_info *rt;
>         struct fib6_node *fn;
>
>         /* Get the "current" route for this destination and
> @@ -2335,7 +2329,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
>                         if (rt_cache &&
>                             ipv6_addr_equal(&rdfl->gateway,
>                                             &rt_cache->rt6i_gateway)) {
> -                               rt = rt_cache;
> +                               ret = rt_cache;
>                                 break;
>                         }
>                         continue;
> @@ -2346,7 +2340,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
>         if (!rt)
>                 rt = net->ipv6.fib6_null_entry;
>         else if (rt->rt6i_flags & RTF_REJECT) {
> -               rt = net->ipv6.ip6_null_entry;
> +               ret = net->ipv6.ip6_null_entry;
>                 goto out;
>         }
>
> @@ -2357,12 +2351,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
>         }
>
>  out:
> -       ip6_hold_safe(net, &rt, true);
> +       if (ret)
> +               dst_hold(&ret->dst);
> +       else
> +               ret = ip6_create_rt_rcu(rt);
>
>         rcu_read_unlock();
>
> -       trace_fib6_table_lookup(net, rt, table, fl6);
> -       return rt;
> +       trace_fib6_table_lookup(net, ret, table, fl6);
> +       return ret;
>  };
>
>  static struct dst_entry *ip6_route_redirect(struct net *net,
> @@ -3031,6 +3028,22 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
>         return err;
>  }
>
> +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
> +{
> +       int rc = -ESRCH;
> +
> +       if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
> +               goto out;
> +
> +       if (cfg->fc_flags & RTF_GATEWAY &&
> +           !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
> +               goto out;
> +       if (dst_hold_safe(&rt->dst))
> +               rc = rt6_remove_exception_rt(rt);
> +out:
> +       return rc;
> +}
> +
>  static int ip6_route_del(struct fib6_config *cfg,
>                          struct netlink_ext_ack *extack)
>  {
> @@ -3055,11 +3068,16 @@ static int ip6_route_del(struct fib6_config *cfg,
>         if (fn) {
>                 for_each_fib6_node_rt_rcu(fn) {
>                         if (cfg->fc_flags & RTF_CACHE) {
> +                               int rc;
> +
>                                 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
>                                                               &cfg->fc_src);

Here "rt" should also be renamed to "f6i"? Probably need to update
for_each_fib6_node_rt_rcu() macro to use f6i instead of rt.

> -                               if (!rt_cache)
> -                                       continue;
> -                               rt = rt_cache;
> +                               if (rt_cache) {
> +                                       rc = ip6_del_cached_rt(rt_cache, cfg);
> +                                       if (rc != -ESRCH)
> +                                               return rc;
> +                               }
> +                               continue;
>                         }
>                         if (cfg->fc_ifindex &&
>                             (!rt->fib6_nh.nh_dev ||
> @@ -3176,7 +3194,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
>                                      NEIGH_UPDATE_F_ISROUTER)),
>                      NDISC_REDIRECT, &ndopts);
>
> -       nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
> +       nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
>                 goto out;
>
> --
> 2.11.0
>
David Ahern Feb. 26, 2018, 11:02 p.m. UTC | #5
On 2/26/18 3:29 PM, Wei Wang wrote:
> On Sun, Feb 25, 2018 at 11:47 AM, David Ahern <dsahern@gmail.com> wrote:
>> IPv6 FIB will only contain FIB entries with exception routes added to
>> the FIB entry. Remove CACHE and dst checks from fib6 add and delete since
>> they can never happen once the data type changes.
>>
>> Fixup the lookup functions to use a f6i name for fib lookups and retain
>> the current rt name for return variables.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>> ---
>>  net/ipv6/ip6_fib.c |  16 +------
>>  net/ipv6/route.c   | 122 ++++++++++++++++++++++++++++++-----------------------
>>  2 files changed, 71 insertions(+), 67 deletions(-)
>>
>> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
>> index 5b03f7e8d850..63a91db61749 100644
>> --- a/net/ipv6/ip6_fib.c
>> +++ b/net/ipv6/ip6_fib.c
>> @@ -1046,7 +1046,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
>>  static void fib6_start_gc(struct net *net, struct rt6_info *rt)
>>  {
>>         if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
>> -           (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
>> +           (rt->rt6i_flags & RTF_EXPIRES))
>>                 mod_timer(&net->ipv6.ip6_fib_timer,
>>                           jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
>>  }
>> @@ -1097,8 +1097,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
> 
> This rt here should be f6i?
> 
>>
>>         if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
>>                 return -EINVAL;
>> -       if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
>> -               return -EINVAL;
>>
>>         if (info->nlh) {
>>                 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
>> @@ -1622,8 +1620,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
>>
>>         RT6_TRACE("fib6_del_route\n");
>>
>> -       WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
>> -
>>         /* Unlink it */
>>         *rtp = rt->rt6_next;
> 
> This rt here is also f6i right?
> 
>>         rt->rt6i_node = NULL;
>> @@ -1692,21 +1688,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
> 
> This rt here is also f6i right?
> 
>>         struct rt6_info __rcu **rtp;
>>         struct rt6_info __rcu **rtp_next;
>>
>> -#if RT6_DEBUG >= 2
>> -       if (rt->dst.obsolete > 0) {
>> -               WARN_ON(fn);
>> -               return -ENOENT;
>> -       }
>> -#endif
>>         if (!fn || rt == net->ipv6.fib6_null_entry)
>>                 return -ENOENT;
>>
>>         WARN_ON(!(fn->fn_flags & RTN_RTINFO));
>>
>> -       /* remove cached dst from exception table */
>> -       if (rt->rt6i_flags & RTF_CACHE)
>> -               return rt6_remove_exception_rt(rt);
> 
> Could you help delete rt6_remove_exception_rt() function? I don't
> think it is used anymore.

It is still used by ip6_negative_advice, ip6_link_failure and
ip6_del_cached_rt. It can be made static; will fix.


The rest of your comments for this patch are renaming rt to f6i. My
thought is to follow up with another patch that does the rename of rt to
f6i for all fib6_info. Given how large this change is already I did not
want to add extra diffs for that. If there is agreement to fold that
part in now, I can do it.
Wei Wang Feb. 27, 2018, 12:32 a.m. UTC | #6
On Mon, Feb 26, 2018 at 3:02 PM, David Ahern <dsahern@gmail.com> wrote:
> On 2/26/18 3:29 PM, Wei Wang wrote:
>> On Sun, Feb 25, 2018 at 11:47 AM, David Ahern <dsahern@gmail.com> wrote:
>>> IPv6 FIB will only contain FIB entries with exception routes added to
>>> the FIB entry. Remove CACHE and dst checks from fib6 add and delete since
>>> they can never happen once the data type changes.
>>>
>>> Fixup the lookup functions to use a f6i name for fib lookups and retain
>>> the current rt name for return variables.
>>>
>>> Signed-off-by: David Ahern <dsahern@gmail.com>
>>> ---
>>>  net/ipv6/ip6_fib.c |  16 +------
>>>  net/ipv6/route.c   | 122 ++++++++++++++++++++++++++++++-----------------------
>>>  2 files changed, 71 insertions(+), 67 deletions(-)
>>>
>>> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
>>> index 5b03f7e8d850..63a91db61749 100644
>>> --- a/net/ipv6/ip6_fib.c
>>> +++ b/net/ipv6/ip6_fib.c
>>> @@ -1046,7 +1046,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
>>>  static void fib6_start_gc(struct net *net, struct rt6_info *rt)
>>>  {
>>>         if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
>>> -           (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
>>> +           (rt->rt6i_flags & RTF_EXPIRES))
>>>                 mod_timer(&net->ipv6.ip6_fib_timer,
>>>                           jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
>>>  }
>>> @@ -1097,8 +1097,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
>>
>> This rt here should be f6i?
>>
>>>
>>>         if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
>>>                 return -EINVAL;
>>> -       if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
>>> -               return -EINVAL;
>>>
>>>         if (info->nlh) {
>>>                 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
>>> @@ -1622,8 +1620,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
>>>
>>>         RT6_TRACE("fib6_del_route\n");
>>>
>>> -       WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
>>> -
>>>         /* Unlink it */
>>>         *rtp = rt->rt6_next;
>>
>> This rt here is also f6i right?
>>
>>>         rt->rt6i_node = NULL;
>>> @@ -1692,21 +1688,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
>>
>> This rt here is also f6i right?
>>
>>>         struct rt6_info __rcu **rtp;
>>>         struct rt6_info __rcu **rtp_next;
>>>
>>> -#if RT6_DEBUG >= 2
>>> -       if (rt->dst.obsolete > 0) {
>>> -               WARN_ON(fn);
>>> -               return -ENOENT;
>>> -       }
>>> -#endif
>>>         if (!fn || rt == net->ipv6.fib6_null_entry)
>>>                 return -ENOENT;
>>>
>>>         WARN_ON(!(fn->fn_flags & RTN_RTINFO));
>>>
>>> -       /* remove cached dst from exception table */
>>> -       if (rt->rt6i_flags & RTF_CACHE)
>>> -               return rt6_remove_exception_rt(rt);
>>
>> Could you help delete rt6_remove_exception_rt() function? I don't
>> think it is used anymore.
>
> It is still used by ip6_negative_advice, ip6_link_failure and
> ip6_del_cached_rt. It can be made static; will fix.
>
Right. Missed those.

>
> The rest of your comments for this patch are renaming rt to f6i. My
> thought is to follow up with another patch that does the rename of rt to
> f6i for all fib6_info. Given how large this change is already I did not
> want to add extra diffs for that. If there is agreement to fold that
> part in now, I can do it.
Sure. Sounds good to me.
diff mbox series

Patch

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5b03f7e8d850..63a91db61749 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1046,7 +1046,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 static void fib6_start_gc(struct net *net, struct rt6_info *rt)
 {
 	if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
-	    (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE)))
+	    (rt->rt6i_flags & RTF_EXPIRES))
 		mod_timer(&net->ipv6.ip6_fib_timer,
 			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
 }
@@ -1097,8 +1097,6 @@  int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 
 	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
 		return -EINVAL;
-	if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
 
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1622,8 +1620,6 @@  static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 
 	RT6_TRACE("fib6_del_route\n");
 
-	WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
-
 	/* Unlink it */
 	*rtp = rt->rt6_next;
 	rt->rt6i_node = NULL;
@@ -1692,21 +1688,11 @@  int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	struct rt6_info __rcu **rtp;
 	struct rt6_info __rcu **rtp_next;
 
-#if RT6_DEBUG >= 2
-	if (rt->dst.obsolete > 0) {
-		WARN_ON(fn);
-		return -ENOENT;
-	}
-#endif
 	if (!fn || rt == net->ipv6.fib6_null_entry)
 		return -ENOENT;
 
 	WARN_ON(!(fn->fn_flags & RTN_RTINFO));
 
-	/* remove cached dst from exception table */
-	if (rt->rt6i_flags & RTF_CACHE)
-		return rt6_remove_exception_rt(rt);
-
 	/*
 	 *	Walk the leaf entries looking for ourself
 	 */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3ea60e932eb9..19b91c60ee55 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1094,35 +1094,36 @@  static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 					     struct fib6_table *table,
 					     struct flowi6 *fl6, int flags)
 {
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *f6i;
 	struct fib6_node *fn;
+	struct rt6_info *rt;
 
 	rcu_read_lock();
 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
-	rt = rcu_dereference(fn->leaf);
-	if (!rt) {
-		rt = net->ipv6.fib6_null_entry;
+	f6i = rcu_dereference(fn->leaf);
+	if (!f6i) {
+		f6i = net->ipv6.fib6_null_entry;
 	} else {
-		rt = rt6_device_match(net, rt, &fl6->saddr,
+		f6i = rt6_device_match(net, f6i, &fl6->saddr,
 				      fl6->flowi6_oif, flags);
-		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
-			rt = rt6_multipath_select(rt, fl6,
+		if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
+			f6i = rt6_multipath_select(f6i, fl6,
 						  fl6->flowi6_oif, flags);
 	}
-	if (rt == net->ipv6.fib6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto restart;
 	}
+
 	/* Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache) {
-		rt = rt_cache;
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
 		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
 	} else {
-		rt = ip6_create_rt_rcu(rt);
+		rt = ip6_create_rt_rcu(f6i);
 	}
 
 	rcu_read_unlock();
@@ -1204,9 +1205,6 @@  static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	 *	Clone the route.
 	 */
 
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(ort);
 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
@@ -1432,11 +1430,6 @@  static int rt6_insert_exception(struct rt6_info *nrt,
 	struct rt6_exception *rt6_ex;
 	int err = 0;
 
-	/* ort can't be a cache or pcpu route */
-	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
-		ort = ort->from;
-	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
-
 	spin_lock_bh(&rt6_exception_lock);
 
 	if (ort->exception_bucket_flushed) {
@@ -1815,7 +1808,8 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			       int oif, struct flowi6 *fl6, int flags)
 {
 	struct fib6_node *fn, *saved_fn;
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *f6i;
+	struct rt6_info *rt;
 	int strict = 0;
 
 	strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1832,10 +1826,10 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		oif = 0;
 
 redo_rt6_select:
-	rt = rt6_select(net, fn, oif, strict);
-	if (rt->rt6i_nsiblings)
-		rt = rt6_multipath_select(rt, fl6, oif, strict);
-	if (rt == net->ipv6.fib6_null_entry) {
+	f6i = rt6_select(net, fn, oif, strict);
+	if (f6i->rt6i_nsiblings)
+		f6i = rt6_multipath_select(f6i, fl6, oif, strict);
+	if (f6i == net->ipv6.fib6_null_entry) {
 		fn = fib6_backtrack(fn, &fl6->saddr);
 		if (fn)
 			goto redo_rt6_select;
@@ -1847,18 +1841,17 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		}
 	}
 
-	/*Search through exception table */
-	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
-	if (rt_cache)
-		rt = rt_cache;
-
-	if (rt == net->ipv6.fib6_null_entry) {
+	if (f6i == net->ipv6.fib6_null_entry) {
 		rt = net->ipv6.ip6_null_entry;
 		rcu_read_unlock();
 		dst_hold(&rt->dst);
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
-	} else if (rt->rt6i_flags & RTF_CACHE) {
+	}
+
+	/*Search through exception table */
+	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
+	if (rt) {
 		if (ip6_hold_safe(net, &rt, true))
 			dst_use_noref(&rt->dst, jiffies);
 
@@ -1866,7 +1859,7 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 		trace_fib6_table_lookup(net, rt, table, fl6);
 		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
-			    !(rt->rt6i_flags & RTF_GATEWAY))) {
+			    !(f6i->rt6i_flags & RTF_GATEWAY))) {
 		/* Create a RTF_CACHE clone which will not be
 		 * owned by the fib6 tree.  It is for the special case where
 		 * the daddr in the skb during the neighbor look-up is different
@@ -1875,16 +1868,16 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *uncached_rt;
 
-		if (ip6_hold_safe(net, &rt, true)) {
-			dst_use_noref(&rt->dst, jiffies);
+		if (ip6_hold_safe(net, &f6i, true)) {
+			dst_use_noref(&f6i->dst, jiffies);
 		} else {
 			rcu_read_unlock();
-			uncached_rt = rt;
+			uncached_rt = f6i;
 			goto uncached_rt_out;
 		}
 		rcu_read_unlock();
 
-		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
+		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
 		dst_release(&rt->dst);
 
 		if (uncached_rt) {
@@ -1907,18 +1900,18 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		struct rt6_info *pcpu_rt;
 
-		dst_use_noref(&rt->dst, jiffies);
+		dst_use_noref(&f6i->dst, jiffies);
 		local_bh_disable();
-		pcpu_rt = rt6_get_pcpu_route(rt);
+		pcpu_rt = rt6_get_pcpu_route(f6i);
 
 		if (!pcpu_rt) {
 			/* atomic_inc_not_zero() is needed when using rcu */
-			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+			if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
 				/* No dst_hold() on rt is needed because grabbing
 				 * rt->rt6i_ref makes sure rt can't be released.
 				 */
-				pcpu_rt = rt6_make_pcpu_route(net, rt);
-				rt6_release(rt);
+				pcpu_rt = rt6_make_pcpu_route(net, f6i);
+				rt6_release(f6i);
 			} else {
 				/* rt is already removed from tree */
 				pcpu_rt = net->ipv6.ip6_null_entry;
@@ -2296,7 +2289,8 @@  static struct rt6_info *__ip6_route_redirect(struct net *net,
 					     int flags)
 {
 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
-	struct rt6_info *rt, *rt_cache;
+	struct rt6_info *ret = NULL, *rt_cache;
+	struct rt6_info *rt;
 	struct fib6_node *fn;
 
 	/* Get the "current" route for this destination and
@@ -2335,7 +2329,7 @@  static struct rt6_info *__ip6_route_redirect(struct net *net,
 			if (rt_cache &&
 			    ipv6_addr_equal(&rdfl->gateway,
 					    &rt_cache->rt6i_gateway)) {
-				rt = rt_cache;
+				ret = rt_cache;
 				break;
 			}
 			continue;
@@ -2346,7 +2340,7 @@  static struct rt6_info *__ip6_route_redirect(struct net *net,
 	if (!rt)
 		rt = net->ipv6.fib6_null_entry;
 	else if (rt->rt6i_flags & RTF_REJECT) {
-		rt = net->ipv6.ip6_null_entry;
+		ret = net->ipv6.ip6_null_entry;
 		goto out;
 	}
 
@@ -2357,12 +2351,15 @@  static struct rt6_info *__ip6_route_redirect(struct net *net,
 	}
 
 out:
-	ip6_hold_safe(net, &rt, true);
+	if (ret)
+		dst_hold(&ret->dst);
+	else
+		ret = ip6_create_rt_rcu(rt);
 
 	rcu_read_unlock();
 
-	trace_fib6_table_lookup(net, rt, table, fl6);
-	return rt;
+	trace_fib6_table_lookup(net, ret, table, fl6);
+	return ret;
 };
 
 static struct dst_entry *ip6_route_redirect(struct net *net,
@@ -3031,6 +3028,22 @@  static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
 	return err;
 }
 
+static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+{
+	int rc = -ESRCH;
+
+	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
+		goto out;
+
+	if (cfg->fc_flags & RTF_GATEWAY &&
+	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
+		goto out;
+	if (dst_hold_safe(&rt->dst))
+		rc = rt6_remove_exception_rt(rt);
+out:
+	return rc;
+}
+
 static int ip6_route_del(struct fib6_config *cfg,
 			 struct netlink_ext_ack *extack)
 {
@@ -3055,11 +3068,16 @@  static int ip6_route_del(struct fib6_config *cfg,
 	if (fn) {
 		for_each_fib6_node_rt_rcu(fn) {
 			if (cfg->fc_flags & RTF_CACHE) {
+				int rc;
+
 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
 							      &cfg->fc_src);
-				if (!rt_cache)
-					continue;
-				rt = rt_cache;
+				if (rt_cache) {
+					rc = ip6_del_cached_rt(rt_cache, cfg);
+					if (rc != -ESRCH)
+						return rc;
+				}
+				continue;
 			}
 			if (cfg->fc_ifindex &&
 			    (!rt->fib6_nh.nh_dev ||
@@ -3176,7 +3194,7 @@  static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 				     NEIGH_UPDATE_F_ISROUTER)),
 		     NDISC_REDIRECT, &ndopts);
 
-	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
+	nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
 	if (!nrt)
 		goto out;