diff mbox

[net-next,1/2] Support outside netns for tunnels.

Message ID 1451933147-17266-1-git-send-email-saurabh@cplanenetworks.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Saurabh Mohan Jan. 4, 2016, 6:45 p.m. UTC
This patch enchances a tunnel interface, like gre, to have the tunnel
encap/decap be in the context of a network namespace that is different from 
the namespace of the tunnel interface.

From userspace this feature may be configured using the new 'onetns' keyword:
ip netns exec custa ip link add dev tun1 type gre local 10.0.0.1 \
 remote 10.0.0.2 onetns outside 

In the above example the tunnel would be in the 'custa' namespace and the 
tunnel endpoints would be in the 'outside' namespace.

Also, proposing the use of netns name 'global' to specify the global namespace.

If this patch set is accepted then I will add support for other tunnels as 
well.

Signed-off-by: Saurabh Mohan <saurabh@cplanenetworks.com>
---
 include/uapi/linux/if_tunnel.h | 19 +++++++++++++++++++
 net/ipv4/ip_tunnel.c           | 24 +++++++++++++++++++++---
 2 files changed, 40 insertions(+), 3 deletions(-)

Comments

Tom Herbert Jan. 4, 2016, 7:47 p.m. UTC | #1
On Mon, Jan 4, 2016 at 10:45 AM, Saurabh Mohan
<saurabh@cplanenetworks.com> wrote:
>
> This patch enchances a tunnel interface, like gre, to have the tunnel
> encap/decap be in the context of a network namespace that is different from
> the namespace of the tunnel interface.
>
> From userspace this feature may be configured using the new 'onetns' keyword:
> ip netns exec custa ip link add dev tun1 type gre local 10.0.0.1 \
>  remote 10.0.0.2 onetns outside
>
> In the above example the tunnel would be in the 'custa' namespace and the
> tunnel endpoints would be in the 'outside' namespace.
>
> Also, proposing the use of netns name 'global' to specify the global namespace.
>
> If this patch set is accepted then I will add support for other tunnels as
> well.
>
This might be interesting. Can you please ad a 0/n patch that
describes the motivation for this, in particular I would like to know
if this has a positive impact on ns performance if somehow we are
eliminating indirection.

Thanks,
Tom

> Signed-off-by: Saurabh Mohan <saurabh@cplanenetworks.com>
> ---
>  include/uapi/linux/if_tunnel.h | 19 +++++++++++++++++++
>  net/ipv4/ip_tunnel.c           | 24 +++++++++++++++++++++---
>  2 files changed, 40 insertions(+), 3 deletions(-)
>
> diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
> index af4de90..2e43753 100644
> --- a/include/uapi/linux/if_tunnel.h
> +++ b/include/uapi/linux/if_tunnel.h
> @@ -3,6 +3,7 @@
>
>  #include <linux/types.h>
>  #include <asm/byteorder.h>
> +#include <linux/limits.h>
>
>
>  #define SIOCGETTUNNEL   (SIOCDEVPRIVATE + 0)
> @@ -27,6 +28,14 @@
>  #define GRE_FLAGS      __cpu_to_be16(0x00F8)
>  #define GRE_VERSION    __cpu_to_be16(0x0007)
>
> +struct o_netns_parm {
> +       __u8                    o_netns_flag;
> +       __u32                   o_netns_fd;
> +       char                    netns[NAME_MAX];
> +};
> +#define TUNNEL_ONETNS_FLAG_GLOBAL      (1<<0)
> +#define TUNNEL_ONETNS_FLAG_NETNS       (1<<1)
> +
>  struct ip_tunnel_parm {
>         char                    name[IFNAMSIZ];
>         int                     link;
> @@ -35,6 +44,7 @@ struct ip_tunnel_parm {
>         __be32                  i_key;
>         __be32                  o_key;
>         struct iphdr            iph;
> +       struct o_netns_parm     o_net;
>  };
>
>  enum {
> @@ -57,6 +67,9 @@ enum {
>         IFLA_IPTUN_ENCAP_FLAGS,
>         IFLA_IPTUN_ENCAP_SPORT,
>         IFLA_IPTUN_ENCAP_DPORT,
> +       IFLA_IPTUN_ONETNS_FLAGS,
> +       IFLA_IPTUN_ONETNS_FD,
> +       IFLA_IPTUN_ONETNS_NAME,
>         __IFLA_IPTUN_MAX,
>  };
>  #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
> @@ -113,6 +126,9 @@ enum {
>         IFLA_GRE_ENCAP_SPORT,
>         IFLA_GRE_ENCAP_DPORT,
>         IFLA_GRE_COLLECT_METADATA,
> +       IFLA_GRE_ONETNS_FLAGS,
> +       IFLA_GRE_ONETNS_FD,
> +       IFLA_GRE_ONETNS_NAME,
>         __IFLA_GRE_MAX,
>  };
>
> @@ -128,6 +144,9 @@ enum {
>         IFLA_VTI_OKEY,
>         IFLA_VTI_LOCAL,
>         IFLA_VTI_REMOTE,
> +       IFLA_VTI_ONETNS_FLAGS,
> +       IFLA_VTI_ONETNS_FD,
> +       IFLA_VTI_ONETNS_NAME,
>         __IFLA_VTI_MAX,
>  };
>
> diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
> index c7bd72e..f8dd717 100644
> --- a/net/ipv4/ip_tunnel.c
> +++ b/net/ipv4/ip_tunnel.c
> @@ -259,6 +259,16 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
>         return &itn->tunnels[h];
>  }
>
> +static struct net *ip_tunnel_get_onet(struct net *inet,
> +                                     struct ip_tunnel_parm *parms)
> +{
> +       if (parms->o_net.o_netns_flag == 0)
> +               return inet;
> +       if (parms->o_net.o_netns_flag & TUNNEL_ONETNS_FLAG_GLOBAL)
> +               return &init_net;
> +       return get_net_ns_by_fd(parms->o_net.o_netns_fd);
> +}
> +
>  static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
>  {
>         struct hlist_head *head = ip_bucket(itn, &t->parms);
> @@ -330,7 +340,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
>
>         tunnel = netdev_priv(dev);
>         tunnel->parms = *parms;
> -       tunnel->net = net;
> +       tunnel->net = ip_tunnel_get_onet(net, &tunnel->parms);
>
>         err = register_netdevice(dev);
>         if (err)
> @@ -818,6 +828,14 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
>         t->parms.iph.daddr = p->iph.daddr;
>         t->parms.i_key = p->i_key;
>         t->parms.o_key = p->o_key;
> +       if (strcmp(p->o_net.netns, t->parms.o_net.netns)) {
> +               /* change the itn */
> +               struct net *o_net = ip_tunnel_get_onet(dev_net(dev), p);
> +
> +               itn = net_generic(o_net, t->ip_tnl_net_id);
> +               t->parms.o_net = p->o_net;
> +               t->net = o_net;
> +       }
>         if (dev->type != ARPHRD_ETHER) {
>                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
>                 memcpy(dev->broadcast, &p->iph.daddr, 4);
> @@ -1071,7 +1089,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
>                       struct ip_tunnel_parm *p)
>  {
>         struct ip_tunnel *nt;
> -       struct net *net = dev_net(dev);
> +       struct net *net = ip_tunnel_get_onet(dev_net(dev), p);
>         struct ip_tunnel_net *itn;
>         int mtu;
>         int err;
> @@ -1169,7 +1187,7 @@ int ip_tunnel_init(struct net_device *dev)
>         }
>
>         tunnel->dev = dev;
> -       tunnel->net = dev_net(dev);
> +       tunnel->net = ip_tunnel_get_onet(dev_net(dev), &tunnel->parms);
>         strcpy(tunnel->parms.name, dev->name);
>         iph->version            = 4;
>         iph->ihl                = 5;
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joe Perches Jan. 4, 2016, 7:54 p.m. UTC | #2
On Mon, 2016-01-04 at 11:47 -0800, Tom Herbert wrote:
> On Mon, Jan 4, 2016 at 10:45 AM, Saurabh Mohan
> <saurabh@cplanenetworks.com> wrote:
> > 
> > This patch enchances a tunnel interface, like gre, to have the tunnel
> > encap/decap be in the context of a network namespace that is different from
> > the namespace of the tunnel interface.
> > 
> > From userspace this feature may be configured using the new 'onetns' keyword:
> > ip netns exec custa ip link add dev tun1 type gre local 10.0.0.1 \
> >  remote 10.0.0.2 onetns outside
> > 
> > In the above example the tunnel would be in the 'custa' namespace and the
> > tunnel endpoints would be in the 'outside' namespace.
> > 
> > Also, proposing the use of netns name 'global' to specify the global namespace.
> > 
> > If this patch set is accepted then I will add support for other tunnels as
> > well.
> > 
> This might be interesting. Can you please ad a 0/n patch that
> describes the motivation for this, in particular I would like to know
> if this has a positive impact on ns performance if somehow we are
> eliminating indirection.
[]
> > diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
[]
> > @@ -3,6 +3,7 @@
> > 
> >  #include 
> >  #include 
> > +#include 
> > 
> > 
> >  #define SIOCGETTUNNEL   (SIOCDEVPRIVATE + 0)
> > @@ -27,6 +28,14 @@
> >  #define GRE_FLAGS      __cpu_to_be16(0x00F8)
> >  #define GRE_VERSION    __cpu_to_be16(0x0007)
> > 
> > +struct o_netns_parm {
> > +       __u8                    o_netns_flag;
> > +       __u32                   o_netns_fd;
> > +       char                    netns[NAME_MAX];
> > +};

Trivia:

It could eliminate a few padding bytes if the
o_netns_fd and o_netns_flag fields were reversed.

and netns[NAME_MAX] is normally netns[NAME_MAX + 1]

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Dichtel Jan. 5, 2016, 4:47 p.m. UTC | #3
Le 04/01/2016 19:45, Saurabh Mohan a écrit :
>
> This patch enchances a tunnel interface, like gre, to have the tunnel
> encap/decap be in the context of a network namespace that is different from
> the namespace of the tunnel interface.
>
>  From userspace this feature may be configured using the new 'onetns' keyword:
> ip netns exec custa ip link add dev tun1 type gre local 10.0.0.1 \
>   remote 10.0.0.2 onetns outside
>
> In the above example the tunnel would be in the 'custa' namespace and the
> tunnel endpoints would be in the 'outside' namespace.
What is the difference with the following commands?

ip netns exec outside ip link add dev tun1 type gre local 10.0.0.1 \
    remote 10.0.0.2
ip netns exec outside ip link set tun1 netns custa

or

ip exec custa ip netns set outside 1234
ip exec custa ip link add tun1 link-netnsid 1234 type gre local 10.0.0.1 \
    remote 10.0.0.2


Regards,
Nicolas
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Saurabh Mohan Jan. 7, 2016, 6:59 p.m. UTC | #4
On 01/05/2016 08:47 AM, Nicolas Dichtel wrote:
> Le 04/01/2016 19:45, Saurabh Mohan a écrit :
>>
>> This patch enchances a tunnel interface, like gre, to have the tunnel
>> encap/decap be in the context of a network namespace that is different from
>> the namespace of the tunnel interface.
>>
>>   From userspace this feature may be configured using the new 'onetns' keyword:
>> ip netns exec custa ip link add dev tun1 type gre local 10.0.0.1 \
>>    remote 10.0.0.2 onetns outside
>>
>> In the above example the tunnel would be in the 'custa' namespace and the
>> tunnel endpoints would be in the 'outside' namespace.
> What is the difference with the following commands?
>
> ip netns exec outside ip link add dev tun1 type gre local 10.0.0.1 \
>      remote 10.0.0.2
> ip netns exec outside ip link set tun1 netns custa
>
> or
>
> ip exec custa ip netns set outside 1234
> ip exec custa ip link add tun1 link-netnsid 1234 type gre local 10.0.0.1 \
>      remote 10.0.0.2
>
>

these methods would be functionally equivalent to what this patch does.
no point in adding a third way to do the same.
diff mbox

Patch

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index af4de90..2e43753 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -3,6 +3,7 @@ 
 
 #include <linux/types.h>
 #include <asm/byteorder.h>
+#include <linux/limits.h>
 
 
 #define SIOCGETTUNNEL   (SIOCDEVPRIVATE + 0)
@@ -27,6 +28,14 @@ 
 #define GRE_FLAGS	__cpu_to_be16(0x00F8)
 #define GRE_VERSION	__cpu_to_be16(0x0007)
 
+struct o_netns_parm {
+	__u8			o_netns_flag;
+	__u32			o_netns_fd;
+	char			netns[NAME_MAX];
+};
+#define TUNNEL_ONETNS_FLAG_GLOBAL	(1<<0)
+#define TUNNEL_ONETNS_FLAG_NETNS	(1<<1)
+
 struct ip_tunnel_parm {
 	char			name[IFNAMSIZ];
 	int			link;
@@ -35,6 +44,7 @@  struct ip_tunnel_parm {
 	__be32			i_key;
 	__be32			o_key;
 	struct iphdr		iph;
+	struct o_netns_parm	o_net;
 };
 
 enum {
@@ -57,6 +67,9 @@  enum {
 	IFLA_IPTUN_ENCAP_FLAGS,
 	IFLA_IPTUN_ENCAP_SPORT,
 	IFLA_IPTUN_ENCAP_DPORT,
+	IFLA_IPTUN_ONETNS_FLAGS,
+	IFLA_IPTUN_ONETNS_FD,
+	IFLA_IPTUN_ONETNS_NAME,
 	__IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX	(__IFLA_IPTUN_MAX - 1)
@@ -113,6 +126,9 @@  enum {
 	IFLA_GRE_ENCAP_SPORT,
 	IFLA_GRE_ENCAP_DPORT,
 	IFLA_GRE_COLLECT_METADATA,
+	IFLA_GRE_ONETNS_FLAGS,
+	IFLA_GRE_ONETNS_FD,
+	IFLA_GRE_ONETNS_NAME,
 	__IFLA_GRE_MAX,
 };
 
@@ -128,6 +144,9 @@  enum {
 	IFLA_VTI_OKEY,
 	IFLA_VTI_LOCAL,
 	IFLA_VTI_REMOTE,
+	IFLA_VTI_ONETNS_FLAGS,
+	IFLA_VTI_ONETNS_FD,
+	IFLA_VTI_ONETNS_NAME,
 	__IFLA_VTI_MAX,
 };
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c7bd72e..f8dd717 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -259,6 +259,16 @@  static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
 	return &itn->tunnels[h];
 }
 
+static struct net *ip_tunnel_get_onet(struct net *inet,
+				      struct ip_tunnel_parm *parms)
+{
+	if (parms->o_net.o_netns_flag == 0)
+		return inet;
+	if (parms->o_net.o_netns_flag & TUNNEL_ONETNS_FLAG_GLOBAL)
+		return &init_net;
+	return get_net_ns_by_fd(parms->o_net.o_netns_fd);
+}
+
 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
 {
 	struct hlist_head *head = ip_bucket(itn, &t->parms);
@@ -330,7 +340,7 @@  static struct net_device *__ip_tunnel_create(struct net *net,
 
 	tunnel = netdev_priv(dev);
 	tunnel->parms = *parms;
-	tunnel->net = net;
+	tunnel->net = ip_tunnel_get_onet(net, &tunnel->parms);
 
 	err = register_netdevice(dev);
 	if (err)
@@ -818,6 +828,14 @@  static void ip_tunnel_update(struct ip_tunnel_net *itn,
 	t->parms.iph.daddr = p->iph.daddr;
 	t->parms.i_key = p->i_key;
 	t->parms.o_key = p->o_key;
+	if (strcmp(p->o_net.netns, t->parms.o_net.netns)) {
+		/* change the itn */
+		struct net *o_net = ip_tunnel_get_onet(dev_net(dev), p);
+
+		itn = net_generic(o_net, t->ip_tnl_net_id);
+		t->parms.o_net = p->o_net;
+		t->net = o_net;
+	}
 	if (dev->type != ARPHRD_ETHER) {
 		memcpy(dev->dev_addr, &p->iph.saddr, 4);
 		memcpy(dev->broadcast, &p->iph.daddr, 4);
@@ -1071,7 +1089,7 @@  int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 		      struct ip_tunnel_parm *p)
 {
 	struct ip_tunnel *nt;
-	struct net *net = dev_net(dev);
+	struct net *net = ip_tunnel_get_onet(dev_net(dev), p);
 	struct ip_tunnel_net *itn;
 	int mtu;
 	int err;
@@ -1169,7 +1187,7 @@  int ip_tunnel_init(struct net_device *dev)
 	}
 
 	tunnel->dev = dev;
-	tunnel->net = dev_net(dev);
+	tunnel->net = ip_tunnel_get_onet(dev_net(dev), &tunnel->parms);
 	strcpy(tunnel->parms.name, dev->name);
 	iph->version		= 4;
 	iph->ihl		= 5;