diff mbox series

[net-next] lwtunnel: add support for multiple geneve opts

Message ID 9c4231b54baf60619c110c818ca7a6eb37a2e52e.1574156351.git.lucien.xin@gmail.com
State Accepted
Delegated to: David Miller
Headers show
Series [net-next] lwtunnel: add support for multiple geneve opts | expand

Commit Message

Xin Long Nov. 19, 2019, 9:39 a.m. UTC
geneve RFC (draft-ietf-nvo3-geneve-14) allows a geneve packet to carry
multiple geneve opts, so it's necessary for lwtunnel to support adding
multiple geneve opts in one lwtunnel route. But vxlan and erspan opts
are still only allowed to add one option.

With this patch, iproute2 could make it like:

  # ip r a 1.1.1.0/24 encap ip id 1 geneve_opts 0:0:12121212,1:2:12121212 \
    dst 10.1.0.2 dev geneve1

  # ip r a 1.1.1.0/24 encap ip id 1 vxlan_opts 456 \
    dst 10.1.0.2 dev erspan1

  # ip r a 1.1.1.0/24 encap ip id 1 erspan_opts 1:123:0:0 \
    dst 10.1.0.2 dev erspan1

Which are pretty much like cls_flower and act_tunnel_key.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/ipv4/ip_tunnel_core.c | 111 +++++++++++++++++++++++++++++++---------------
 1 file changed, 75 insertions(+), 36 deletions(-)

Comments

David Miller Nov. 20, 2019, 6:15 p.m. UTC | #1
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 19 Nov 2019 17:39:11 +0800

> geneve RFC (draft-ietf-nvo3-geneve-14) allows a geneve packet to carry
> multiple geneve opts, so it's necessary for lwtunnel to support adding
> multiple geneve opts in one lwtunnel route. But vxlan and erspan opts
> are still only allowed to add one option.
> 
> With this patch, iproute2 could make it like:
> 
>   # ip r a 1.1.1.0/24 encap ip id 1 geneve_opts 0:0:12121212,1:2:12121212 \
>     dst 10.1.0.2 dev geneve1
> 
>   # ip r a 1.1.1.0/24 encap ip id 1 vxlan_opts 456 \
>     dst 10.1.0.2 dev erspan1
> 
>   # ip r a 1.1.1.0/24 encap ip id 1 erspan_opts 1:123:0:0 \
>     dst 10.1.0.2 dev erspan1
> 
> Which are pretty much like cls_flower and act_tunnel_key.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Applied, thanks Xin.
diff mbox series

Patch

diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index ee71e76..7d21f7e 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -251,7 +251,7 @@  erspan_opt_policy[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1] = {
 };
 
 static int ip_tun_parse_opts_geneve(struct nlattr *attr,
-				    struct ip_tunnel_info *info,
+				    struct ip_tunnel_info *info, int opts_len,
 				    struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1];
@@ -273,7 +273,7 @@  static int ip_tun_parse_opts_geneve(struct nlattr *attr,
 		return -EINVAL;
 
 	if (info) {
-		struct geneve_opt *opt = ip_tunnel_info_opts(info);
+		struct geneve_opt *opt = ip_tunnel_info_opts(info) + opts_len;
 
 		memcpy(opt->opt_data, nla_data(attr), data_len);
 		opt->length = data_len / 4;
@@ -288,7 +288,7 @@  static int ip_tun_parse_opts_geneve(struct nlattr *attr,
 }
 
 static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
-				   struct ip_tunnel_info *info,
+				   struct ip_tunnel_info *info, int opts_len,
 				   struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1];
@@ -303,7 +303,8 @@  static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
 		return -EINVAL;
 
 	if (info) {
-		struct vxlan_metadata *md = ip_tunnel_info_opts(info);
+		struct vxlan_metadata *md =
+			ip_tunnel_info_opts(info) + opts_len;
 
 		attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
 		md->gbp = nla_get_u32(attr);
@@ -314,7 +315,7 @@  static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
 }
 
 static int ip_tun_parse_opts_erspan(struct nlattr *attr,
-				    struct ip_tunnel_info *info,
+				    struct ip_tunnel_info *info, int opts_len,
 				    struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1];
@@ -329,7 +330,8 @@  static int ip_tun_parse_opts_erspan(struct nlattr *attr,
 		return -EINVAL;
 
 	if (info) {
-		struct erspan_metadata *md = ip_tunnel_info_opts(info);
+		struct erspan_metadata *md =
+			ip_tunnel_info_opts(info) + opts_len;
 
 		attr = tb[LWTUNNEL_IP_OPT_ERSPAN_VER];
 		md->version = nla_get_u8(attr);
@@ -356,30 +358,57 @@  static int ip_tun_parse_opts_erspan(struct nlattr *attr,
 static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
 			     struct netlink_ext_ack *extack)
 {
-	struct nlattr *tb[LWTUNNEL_IP_OPTS_MAX + 1];
-	int err;
+	int err, rem, opt_len, opts_len = 0, type = 0;
+	struct nlattr *nla;
 
 	if (!attr)
 		return 0;
 
-	err = nla_parse_nested(tb, LWTUNNEL_IP_OPTS_MAX, attr,
-			       ip_opts_policy, extack);
+	err = nla_validate(nla_data(attr), nla_len(attr), LWTUNNEL_IP_OPTS_MAX,
+			   ip_opts_policy, extack);
 	if (err)
 		return err;
 
-	if (tb[LWTUNNEL_IP_OPTS_GENEVE])
-		err = ip_tun_parse_opts_geneve(tb[LWTUNNEL_IP_OPTS_GENEVE],
-					       info, extack);
-	else if (tb[LWTUNNEL_IP_OPTS_VXLAN])
-		err = ip_tun_parse_opts_vxlan(tb[LWTUNNEL_IP_OPTS_VXLAN],
-					      info, extack);
-	else if (tb[LWTUNNEL_IP_OPTS_ERSPAN])
-		err = ip_tun_parse_opts_erspan(tb[LWTUNNEL_IP_OPTS_ERSPAN],
-					       info, extack);
-	else
-		err = -EINVAL;
+	nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
+		switch (nla_type(nla)) {
+		case LWTUNNEL_IP_OPTS_GENEVE:
+			if (type && type != TUNNEL_GENEVE_OPT)
+				return -EINVAL;
+			opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len,
+							   extack);
+			if (opt_len < 0)
+				return opt_len;
+			opts_len += opt_len;
+			if (opts_len > IP_TUNNEL_OPTS_MAX)
+				return -EINVAL;
+			type = TUNNEL_GENEVE_OPT;
+			break;
+		case LWTUNNEL_IP_OPTS_VXLAN:
+			if (type)
+				return -EINVAL;
+			opt_len = ip_tun_parse_opts_vxlan(nla, info, opts_len,
+							  extack);
+			if (opt_len < 0)
+				return opt_len;
+			opts_len += opt_len;
+			type = TUNNEL_VXLAN_OPT;
+			break;
+		case LWTUNNEL_IP_OPTS_ERSPAN:
+			if (type)
+				return -EINVAL;
+			opt_len = ip_tun_parse_opts_erspan(nla, info, opts_len,
+							   extack);
+			if (opt_len < 0)
+				return opt_len;
+			opts_len += opt_len;
+			type = TUNNEL_ERSPAN_OPT;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
 
-	return err;
+	return opts_len;
 }
 
 static int ip_tun_get_optlen(struct nlattr *attr,
@@ -475,18 +504,23 @@  static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb,
 {
 	struct geneve_opt *opt;
 	struct nlattr *nest;
+	int offset = 0;
 
 	nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE);
 	if (!nest)
 		return -ENOMEM;
 
-	opt = ip_tunnel_info_opts(tun_info);
-	if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) ||
-	    nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
-	    nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4,
-		    opt->opt_data)) {
-		nla_nest_cancel(skb, nest);
-		return -ENOMEM;
+	while (tun_info->options_len > offset) {
+		opt = ip_tunnel_info_opts(tun_info) + offset;
+		if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS,
+				 opt->opt_class) ||
+		    nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
+		    nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4,
+			    opt->opt_data)) {
+			nla_nest_cancel(skb, nest);
+			return -ENOMEM;
+		}
+		offset += sizeof(*opt) + opt->length * 4;
 	}
 
 	nla_nest_end(skb, nest);
@@ -602,13 +636,18 @@  static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
 
 	opt_len = nla_total_size(0);		/* LWTUNNEL_IP_OPTS */
 	if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
-		struct geneve_opt *opt = ip_tunnel_info_opts(info);
-
-		opt_len += nla_total_size(0)	/* LWTUNNEL_IP_OPTS_GENEVE */
-			   + nla_total_size(2)	/* OPT_GENEVE_CLASS */
-			   + nla_total_size(1)	/* OPT_GENEVE_TYPE */
-			   + nla_total_size(opt->length * 4);
-						/* OPT_GENEVE_DATA */
+		struct geneve_opt *opt;
+		int offset = 0;
+
+		opt_len += nla_total_size(0);	/* LWTUNNEL_IP_OPTS_GENEVE */
+		while (info->options_len > offset) {
+			opt = ip_tunnel_info_opts(info) + offset;
+			opt_len += nla_total_size(2)	/* OPT_GENEVE_CLASS */
+				   + nla_total_size(1)	/* OPT_GENEVE_TYPE */
+				   + nla_total_size(opt->length * 4);
+							/* OPT_GENEVE_DATA */
+			offset += sizeof(*opt) + opt->length * 4;
+		}
 	} else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
 		opt_len += nla_total_size(0)	/* LWTUNNEL_IP_OPTS_VXLAN */
 			   + nla_total_size(4);	/* OPT_VXLAN_GBP */