Patchwork fib_rules: add .suppress operation

login
register
mail settings
Submitter Stefan Tomanek
Date July 26, 2013, 10:46 a.m.
Message ID <20130726104657.GF10216@zirkel.wertarbyte.de>
Download mbox | patch
Permalink /patch/262123/
State Superseded
Delegated to: David Miller
Headers show

Comments

Stefan Tomanek - July 26, 2013, 10:46 a.m.
This change adds a new operation to the fib_rules_ops struct; it allows the
suppression of routing decisions if certain criteria are not met by its
results.

The first implemented constraint is a minimum prefix length added to the
structures of routing rules. If a rule is added with a minimum prefix length
>0, only routes meeting this threshold will be considered. Any other (more
general) routing table entries will be ignored.

When configuring a system with multiple network uplinks and default routes, it
is often convinient to reference the main routing table multiple times - but
omitting the default route. Using this patch and a modified "ip" utility, this
can be achieved by using the following command sequence:

  $ ip route add table secuplink default via 10.42.23.1

  $ ip rule add pref 100            table main prefixlength 1
  $ ip rule add pref 150 fwmark 0xA table secuplink

With this setup, packets marked 0xA will be processed by the additional routing
table "secuplink", but only if no suitable route in the main routing table can
be found. By using a minimal prefixlength of 1, the default route (/0) of the
table "main" is hidden to packets processed by rule 100; packets traveling to
destinations with more specific routing entries are processed as usual.

Signed-off-by: Stefan Tomanek <stefan.tomanek@wertarbyte.de>
---
 include/net/fib_rules.h        |    4 ++++
 include/uapi/linux/fib_rules.h |    2 +-
 net/core/fib_rules.c           |    8 ++++++++
 net/ipv4/fib_rules.c           |    9 +++++++++
 net/ipv6/fib6_rules.c          |   12 ++++++++++++
 5 files changed, 34 insertions(+), 1 deletion(-)
Hannes Frederic Sowa - July 26, 2013, 5:05 p.m.
On Fri, Jul 26, 2013 at 12:46:57PM +0200, Stefan Tomanek wrote:
>  		if (err != -EAGAIN) {
> +			if (ops->suppress && ops->suppress(rule, arg)) {
> +				continue;
> +			}
>  			if ((arg->flags & FIB_LOOKUP_NOREF) ||
>  			    likely(atomic_inc_not_zero(&rule->refcnt))) {
>  				arg->rule = rule;
>
> [...]
>
> +static int fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) {
> +	struct rt6_info *rt = (struct rt6_info *) arg->result;
> +	/*
> +	 * do not accept result if the route does
> +	 * not meet the required prefix length
> +	 */
> +	if (rt->rt6i_dst.plen < rule->table_prefixlen_min) {
> +		return 1;
> +	}
> +	return 0;
> +}

Urks, fib6_rule_action is broken. The switch (rule->action) does update the rt
entry but does not signal the correct error code to stop iterating the rules
in case it finds a blackhole, prohibit etc. action (it always signals
-EAGAIN).

A change in this logic could have impact to your patch as I currently
don't know how the null handling of arg->result will turn out. IPv6 does not
preinitialize arg->result as IPv4 does.

I am looking for a solution.

Thanks,

  Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Frederic Sowa - July 27, 2013, 6:26 a.m.
Oh, I overlooked something.

On Fri, Jul 26, 2013 at 12:46:57PM +0200, Stefan Tomanek wrote:
> --- a/net/ipv6/fib6_rules.c
> +++ b/net/ipv6/fib6_rules.c
> @@ -111,6 +111,17 @@ out:
>  	return rt == NULL ? -EAGAIN : 0;
>  }
>  
> +static int fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) {
> +	struct rt6_info *rt = (struct rt6_info *) arg->result;
> +	/*
> +	 * do not accept result if the route does
> +	 * not meet the required prefix length
> +	 */
> +	if (rt->rt6i_dst.plen < rule->table_prefixlen_min) {
> +		return 1;
> +	}
> +	return 0;
> +}

In case of IPv6 (we don't clone the result but merely return a reference to
the rt6_info), we also need to decrement the reference count if we suppress
the route. A ip6_rt_put should do the trick.

Greetings,

  Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Frederic Sowa - July 27, 2013, 7:07 a.m.
Hm, IPv4 actually seems to have the same problem:

On Fri, Jul 26, 2013 at 12:46:57PM +0200, Stefan Tomanek wrote:
> +static int fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) {
> +	/* do not accept result if the route does not meet the required prefix length */
> +	struct fib_result *result = (struct fib_result *) arg->result;
> +	if (result->prefixlen < rule->table_prefixlen_min) {
> +		return 1;
> +	}
> +	return 0;
> +}

In case of suppressing the route, we need to decrement the reference counter
of fib_info:

if (!(fib_flags & FIB_LOOKUP_NOREF))
	fib_info_put(result->fi);	


Also:

> index e361f48..bb15664 100644
> --- a/include/net/fib_rules.h
> +++ b/include/net/fib_rules.h
> @@ -18,6 +18,7 @@ struct fib_rule {
>         u32                     pref;
>         u32                     flags;
>         u32                     table;
> +       u8                      table_prefixlen_min;
>         u8                      action;
>         u32                     target;
>         struct fib_rule __rcu   *ctarget;
> @@ -46,6 +47,8 @@ struct fib_rules_ops {   
>         int                     (*action)(struct fib_rule *,
>                                           struct flowi *, int,
>                                           struct fib_lookup_arg *);
> +       int                     (*suppress)(struct fib_rule *,
> +                                           struct fib_lookup_arg *);

          bool and true/false would be nicer.

>         int                     (*match)(struct fib_rule *,
>                                          struct flowi *, int);
>         int                     (*configure)(struct fib_rule *,
> 

Greetings,

  Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Tomanek - July 27, 2013, 10:21 a.m.
Thanks for your input, I've put together all the proposed patches in my github
branch:

https://github.com/wertarbyte/linux/compare/fib_rule_suppress

Any more ideas, before I squash it all together?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Frederic Sowa - July 27, 2013, 3:10 p.m.
On Sat, Jul 27, 2013 at 12:21:49PM +0200, Stefan Tomanek wrote:
> Thanks for your input, I've put together all the proposed patches in my github
> branch:
> 
> https://github.com/wertarbyte/linux/compare/fib_rule_suppress
> 
> Any more ideas, before I squash it all together?

This should fix the compile error (was a c&p error from me in the previous
mail).

-               if (!(fib_flags & FIB_LOOKUP_NOREF))
+               if (!(arg->flags & FIB_LOOKUP_NOREF))

This should get a considerable amount of stress testing now. ;)

Greetings,

  Hannes

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Tomanek - July 28, 2013, 10:36 a.m.
Dies schrieb Hannes Frederic Sowa (hannes@stressinduktion.org):

> This should fix the compile error (was a c&p error from me in the previous
> mail).
> 
> -               if (!(fib_flags & FIB_LOOKUP_NOREF))
> +               if (!(arg->flags & FIB_LOOKUP_NOREF))
> 
> This should get a considerable amount of stress testing now. ;)

I amended that change and made a forced push, so now the branch should be up to
date and in working condition:
https://github.com/wertarbyte/linux/compare/fib_rule_suppress
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e361f48..bb15664 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -18,6 +18,7 @@  struct fib_rule {
 	u32			pref;
 	u32			flags;
 	u32			table;
+	u8			table_prefixlen_min;
 	u8			action;
 	u32			target;
 	struct fib_rule __rcu	*ctarget;
@@ -46,6 +47,8 @@  struct fib_rules_ops {
 	int			(*action)(struct fib_rule *,
 					  struct flowi *, int,
 					  struct fib_lookup_arg *);
+	int			(*suppress)(struct fib_rule *,
+					    struct fib_lookup_arg *);
 	int			(*match)(struct fib_rule *,
 					 struct flowi *, int);
 	int			(*configure)(struct fib_rule *,
@@ -80,6 +83,7 @@  struct fib_rules_ops {
 	[FRA_FWMARK]	= { .type = NLA_U32 }, \
 	[FRA_FWMASK]	= { .type = NLA_U32 }, \
 	[FRA_TABLE]     = { .type = NLA_U32 }, \
+	[FRA_TABLE_PREFIXLEN_MIN] = { .type = NLA_U8 }, \
 	[FRA_GOTO]	= { .type = NLA_U32 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 51da65b..59cd31b 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -45,7 +45,7 @@  enum {
 	FRA_FLOW,	/* flow/class id */
 	FRA_UNUSED6,
 	FRA_UNUSED7,
-	FRA_UNUSED8,
+	FRA_TABLE_PREFIXLEN_MIN,
 	FRA_TABLE,	/* Extended table id */
 	FRA_FWMASK,	/* mask for netfilter mark */
 	FRA_OIFNAME,
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 2173544..1e0e1f4 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -227,6 +227,9 @@  jumped:
 			err = ops->action(rule, fl, flags, arg);
 
 		if (err != -EAGAIN) {
+			if (ops->suppress && ops->suppress(rule, arg)) {
+				continue;
+			}
 			if ((arg->flags & FIB_LOOKUP_NOREF) ||
 			    likely(atomic_inc_not_zero(&rule->refcnt))) {
 				arg->rule = rule;
@@ -337,6 +340,8 @@  static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
 	rule->action = frh->action;
 	rule->flags = frh->flags;
 	rule->table = frh_get_table(frh, tb);
+	if (tb[FRA_TABLE_PREFIXLEN_MIN])
+		rule->table_prefixlen_min = nla_get_u8(tb[FRA_TABLE_PREFIXLEN_MIN]);
 
 	if (!tb[FRA_PRIORITY] && ops->default_pref)
 		rule->pref = ops->default_pref(ops);
@@ -523,6 +528,7 @@  static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
 			 + nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
 			 + nla_total_size(4) /* FRA_PRIORITY */
 			 + nla_total_size(4) /* FRA_TABLE */
+			 + nla_total_size(1) /* FRA_TABLE_PREFIXLEN_MIN */
 			 + nla_total_size(4) /* FRA_FWMARK */
 			 + nla_total_size(4); /* FRA_FWMASK */
 
@@ -548,6 +554,8 @@  static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	frh->table = rule->table;
 	if (nla_put_u32(skb, FRA_TABLE, rule->table))
 		goto nla_put_failure;
+	if (nla_put_u8(skb, FRA_TABLE_PREFIXLEN_MIN, rule->table_prefixlen_min))
+		goto nla_put_failure;
 	frh->res1 = 0;
 	frh->res2 = 0;
 	frh->action = rule->action;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 26aa65d..cb5fcc1 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -101,6 +101,14 @@  errout:
 	return err;
 }
 
+static int fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) {
+	/* do not accept result if the route does not meet the required prefix length */
+	struct fib_result *result = (struct fib_result *) arg->result;
+	if (result->prefixlen < rule->table_prefixlen_min) {
+		return 1;
+	}
+	return 0;
+}
 
 static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 {
@@ -267,6 +275,7 @@  static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
+	.suppress	= fib4_rule_suppress,
 	.match		= fib4_rule_match,
 	.configure	= fib4_rule_configure,
 	.delete		= fib4_rule_delete,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2e1a432..dcba659 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -111,6 +111,17 @@  out:
 	return rt == NULL ? -EAGAIN : 0;
 }
 
+static int fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) {
+	struct rt6_info *rt = (struct rt6_info *) arg->result;
+	/*
+	 * do not accept result if the route does
+	 * not meet the required prefix length
+	 */
+	if (rt->rt6i_dst.plen < rule->table_prefixlen_min) {
+		return 1;
+	}
+	return 0;
+}
 
 static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 {
@@ -244,6 +255,7 @@  static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
 	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
 	.match			= fib6_rule_match,
+	.suppress		= fib6_rule_suppress,
 	.configure		= fib6_rule_configure,
 	.compare		= fib6_rule_compare,
 	.fill			= fib6_rule_fill,