diff mbox

[ovs-dev,RFC] datapath: allow tunnels to be created with rtnetlink

Message ID 79BBBFE6CB6C9B488C1A45ACD284F5195F1001BC@SHSMSX103.ccr.corp.intel.com
State RFC
Headers show

Commit Message

Yang, Yi Dec. 6, 2016, 7:17 a.m. UTC
Hi, guys

This patch isn't updated from June on, Cascardo said he/Eric is still working on this, but six months passed, we don't see any following patch for this, now I have time to revisit it, for case 3 Pravin mentioned, I can make it work by applying the below patch [1] against https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316881.html, but it only can create vxlan port, it will also create vxlan port even if I create vxlan-gpe port by cmd "sudo ovs-vsctl add-port br-int vxlan_gpe1 -- set interface vxlan_gpe1 type=vxlan options:remote_ip=flow options:key=flow options:dst_port=4790 options:exts=gpe", the reason is very simple, vxlan_configure_exts in datapath/vport-vxlan.c will be called to set vxlan configuration, but it can't recognize vxlangpe extension and flags, you guys told me datapath/vport-vxlan.c and datapath/linux/compat/include/linux/openvswitch.h are compatibility code, we can't change them, but for case 3, we have to change them like patch [2], I know we shoul
 d change them on Linux net-next kernel first, here I just show you we have to do so for case 3 Pravin mentioned, I'm happy to hear you have better way for this.

So my advice about this is we can push patch [2] to Linux net-next first, then apply patch series https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316879.html from Cascardo and apply [1], that can cover all the cases Pravin mentioned.

This patch has blocked us too long, we look forward to seeing progress on this. 


[1]:
From f40b6fec09e1f9ad14e50ba224f46b1b9657399c Mon Sep 17 00:00:00 2001
From: Yi Yang <yi.y.yang@intel.com>
Date: Tue, 6 Dec 2016 12:39:41 +0800
Subject: [PATCH] Use ovs compat modules if USE_UPSTREAM_TUNNEL is defined

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
---
 lib/dpif-netlink.c | 4 ++++
 1 file changed, 4 insertions(+)


-----Original Message-----
From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Pravin Shelar
Sent: Friday, October 21, 2016 1:31 AM
To: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Cc: ovs dev <dev@openvswitch.org>; Eric Garver <e@erig.me>
Subject: Re: [ovs-dev] [RFC PATCH] datapath: allow tunnels to be created with rtnetlink

On Tue, Sep 20, 2016 at 7:01 AM, Thadeu Lima de Souza Cascardo <cascardo@redhat.com> wrote:
> In order to use rtnetlink to create new tunnel vports, the backported 
> tunnels require some code that was removed from their upstream 
> version, mainly the necessary code for newlink and for start_xmit.
>
> This patch adds back the necessary code for VXLAN, GRE and Geneve 
> tunnels.
>
> Signed-off-by: Eric Garver <e@erig.me>
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
> ---
>  datapath/linux/Modules.mk                       |   1 +
>  datapath/linux/compat/geneve.c                  |  15 +--
>  datapath/linux/compat/include/linux/if_tunnel.h |  71 ++++++++++++
>  datapath/linux/compat/ip_gre.c                  |  65 ++++++++---
>  datapath/linux/compat/vxlan.c                   | 147 +++++++++++++++++++++---
>  5 files changed, 261 insertions(+), 38 deletions(-)  create mode 
> 100644 datapath/linux/compat/include/linux/if_tunnel.h
>
> diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk 
> index 26f6d22..ad7d14a 100644
> --- a/datapath/linux/Modules.mk
> +++ b/datapath/linux/Modules.mk
> @@ -38,6 +38,7 @@ openvswitch_headers += \
>         linux/compat/include/linux/if.h \
>         linux/compat/include/linux/if_ether.h \
>         linux/compat/include/linux/if_link.h \
> +       linux/compat/include/linux/if_tunnel.h \
>         linux/compat/include/linux/if_vlan.h \
>         linux/compat/include/linux/in.h \
>         linux/compat/include/linux/jiffies.h \ diff --git 
> a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c 
> index 0c5b58a..79bb0ba 100644
> --- a/datapath/linux/compat/geneve.c
> +++ b/datapath/linux/compat/geneve.c
> @@ -1112,9 +1112,8 @@ tx_error:
>  }
>  #endif
>
> -netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
> +static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct 
> +net_device *dev)
>  {
> -       struct net_device *dev = skb->dev;
>         struct geneve_dev *geneve = netdev_priv(dev);
>         struct ip_tunnel_info *info = NULL;
>
> @@ -1128,18 +1127,12 @@ netdev_tx_t rpl_geneve_xmit(struct sk_buff 
> *skb)  #endif
>         return geneve_xmit_skb(skb, dev, info);  } 
> -EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
>
> -static netdev_tx_t geneve_dev_xmit(struct sk_buff *skb, struct 
> net_device *dev)
> +netdev_tx_t rpl_geneve_xmit(struct sk_buff *skb)
>  {
> -       /* Drop All packets coming from networking stack. OVS-CB is
> -        * not initialized for these packets.
> -        */
> -
> -       dev_kfree_skb(skb);
> -       dev->stats.tx_dropped++;
> -       return NETDEV_TX_OK;
> +       return geneve_dev_xmit(skb, skb->dev);
>  }
This would crash kernel.
OVS compat code expect dst entry in skb-cb, packet coming from kernel would not have it.

> +EXPORT_SYMBOL_GPL(rpl_geneve_xmit);
>
>  static int __geneve_change_mtu(struct net_device *dev, int new_mtu, 
> bool strict)  { diff --git 
> a/datapath/linux/compat/include/linux/if_tunnel.h 
> b/datapath/linux/compat/include/linux/if_tunnel.h
> new file mode 100644
> index 0000000..476fe3c
> --- /dev/null
> +++ b/datapath/linux/compat/include/linux/if_tunnel.h
> @@ -0,0 +1,71 @@
> +#ifndef _LINUX_IF_TUNNEL_WRAPPER_H
> +#define _LINUX_IF_TUNNEL_WRAPPER_H
> +
> +#include_next<linux/if_tunnel.h>
> +
> +/* GRE section */
> +enum {
> +#define IFLA_GRE_UNSPEC rpl_IFLA_GRE_UNSPEC
> +       IFLA_GRE_UNSPEC,
> +
> +#define IFLA_GRE_LINK rpl_IFLA_GRE_LINK
> +       IFLA_GRE_LINK,
> +
> +#define IFLA_GRE_IFLAGS rpl_IFLA_GRE_IFLAGS
> +       IFLA_GRE_IFLAGS,
> +
> +#define IFLA_GRE_OFLAGS rpl_IFLA_GRE_OFLAGS
> +       IFLA_GRE_OFLAGS,
> +
> +#define IFLA_GRE_IKEY rpl_IFLA_GRE_IKEY
> +       IFLA_GRE_IKEY,
> +
> +#define IFLA_GRE_OKEY rpl_IFLA_GRE_OKEY
> +       IFLA_GRE_OKEY,
> +
> +#define IFLA_GRE_LOCAL rpl_IFLA_GRE_LOCAL
> +       IFLA_GRE_LOCAL,
> +
> +#define IFLA_GRE_REMOTE rpl_IFLA_GRE_REMOTE
> +       IFLA_GRE_REMOTE,
> +
> +#define IFLA_GRE_TTL rpl_IFLA_GRE_TTL
> +       IFLA_GRE_TTL,
> +
> +#define IFLA_GRE_TOS rpl_IFLA_GRE_TOS
> +       IFLA_GRE_TOS,
> +
> +#define IFLA_GRE_PMTUDISC rpl_IFLA_GRE_PMTUDISC
> +       IFLA_GRE_PMTUDISC,
> +
> +#define IFLA_GRE_ENCAP_LIMIT rpl_IFLA_GRE_ENCAP_LIMIT
> +       IFLA_GRE_ENCAP_LIMIT,
> +
> +#define IFLA_GRE_FLOWINFO rpl_IFLA_GRE_FLOWINFO
> +       IFLA_GRE_FLOWINFO,
> +
> +#define IFLA_GRE_FLAGS rpl_IFLA_GRE_FLAGS
> +       IFLA_GRE_FLAGS,
> +
> +#define IFLA_GRE_ENCAP_TYPE rpl_IFLA_GRE_ENCAP_TYPE
> +       IFLA_GRE_ENCAP_TYPE,
> +
> +#define IFLA_GRE_ENCAP_FLAGS rpl_IFLA_GRE_ENCAP_FLAGS
> +       IFLA_GRE_ENCAP_FLAGS,
> +
> +#define IFLA_GRE_ENCAP_SPORT rpl_IFLA_GRE_ENCAP_SPORT
> +       IFLA_GRE_ENCAP_SPORT,
> +
> +#define IFLA_GRE_ENCAP_DPORT rpl_IFLA_GRE_ENCAP_DPORT
> +       IFLA_GRE_ENCAP_DPORT,
> +
> +#define IFLA_GRE_COLLECT_METADATA rpl_IFLA_GRE_COLLECT_METADATA
> +       IFLA_GRE_COLLECT_METADATA,
> +
> +#define __IFLA_GRE_MAX rpl__IFLA_GRE_MAX
> +       __IFLA_GRE_MAX
> +};
> +#undef IFLA_GRE_MAX
> +#define IFLA_GRE_MAX   (__IFLA_GRE_MAX - 1)
> +

After thinking about the actual issue of using netdevices for tunnel port this is what I think.
These are tree different implementations of OVS tunnel.

Case 1. OVS kernel module is upstream. It is straight forward to tunnel devices on upstream kernel module. STT and lisp are not available.
Case 2. OVS kernel module is out of tree. In this case OVS has compat code and USE_UPSTREAM_TUNNEL is defined. We are using upstream kernel modules for geneve, gre and vxlan, for rest of vport. (stt and lisp) we are using netdevices from compat code.
Case 3. OVS kernel module is out of tree and not using upstream tunnel devices. we have to fallback to  OVS compat code for all tunnel modules.

Now to detect these cases userspace could probe for tunnel device "ovs_geneve" or "ovs_vxlan" if it exist it is case 3, and userspace vswitchd has to use existing vport APIs. Otherwise we could use netdev based tunnel devices. And create tunnel devices for each type of tunnel port.

Comments

Eric Garver Dec. 6, 2016, 2:38 p.m. UTC | #1
On Tue, Dec 06, 2016 at 07:17:20AM +0000, Yang, Yi Y wrote:
> Hi, guys

Hi Yi,

> This patch isn't updated from June on, Cascardo said he/Eric is still
> working on this, but six months passed, we don't see any following

Work is still ongoing. There was delay due to some debate about how and
when to prefer out-of-tree vs in-tree tunnels.

> patch for this, now I have time to revisit it, for case 3 Pravin
> mentioned, I can make it work by applying the below patch [1] against
> https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316881.html,
> but it only can create vxlan port, it will also create vxlan port even
> if I create vxlan-gpe port by cmd "sudo ovs-vsctl add-port br-int
> vxlan_gpe1 -- set interface vxlan_gpe1 type=vxlan
> options:remote_ip=flow options:key=flow options:dst_port=4790
> options:exts=gpe", the reason is very simple, vxlan_configure_exts in
> datapath/vport-vxlan.c will be called to set vxlan configuration, but
> it can't recognize vxlangpe extension and flags, you guys told me
> datapath/vport-vxlan.c and
> datapath/linux/compat/include/linux/openvswitch.h are compatibility
> code, we can't change them, but for case 3, we have to change them
> like patch [2], I know we should change them on Linux net-next kernel
> first, here I just show you we have to do so for case 3 Pravin
> mentioned, I'm happy to hear you have better way for this.
> 
> So my advice about this is we can push patch [2] to Linux net-next
> first, then apply patch series
> https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316879.html
> from Cascardo and apply [1], that can cover all the cases Pravin
> mentioned.

As Cascardo and Jesse mentioned back in June [1], we should not be
adding new features to this interface. GPE has been backported to the
out-of-tree VXLAN code. The only part that remains is userspace changes
to create with rtnetlink, which is still being worked on.

[1] https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316777.html

> This patch has blocked us too long, we look forward to seeing progress on this. 
> 
> 
> [1]:
> From f40b6fec09e1f9ad14e50ba224f46b1b9657399c Mon Sep 17 00:00:00 2001
> From: Yi Yang <yi.y.yang@intel.com>
> Date: Tue, 6 Dec 2016 12:39:41 +0800
> Subject: [PATCH] Use ovs compat modules if USE_UPSTREAM_TUNNEL is defined
> 
> Signed-off-by: Yi Yang <yi.y.yang@intel.com>
> ---
>  lib/dpif-netlink.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 0d03334..2286c3e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -1062,6 +1062,9 @@ dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
>  static int
>  dpif_netlink_port_create(struct netdev *netdev)
>  {
> +#ifndef USE_UPSTREAM_TUNNEL
> +    return EOPNOTSUPP;
> +#else
>      switch (netdev_to_ovs_vport_type(netdev_get_type(netdev))) {
>      case OVS_VPORT_TYPE_VXLAN:
>          return netdev_vxlan_create(netdev);
> @@ -1077,6 +1080,7 @@ dpif_netlink_port_create(struct netdev *netdev)
>          return EOPNOTSUPP;
>      }
>      return 0;
> +#endif
>  }
> 
>  static int
> --
> 2.1.0
> 
> 
> [2]:
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
> index 12260d8..17e21cb 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -291,6 +291,7 @@ enum ovs_vport_attr {
>  enum {
>         OVS_VXLAN_EXT_UNSPEC,
>         OVS_VXLAN_EXT_GBP,      /* Flag or __u32 */
> +       OVS_VXLAN_EXT_GPE,      /* Flag or __u32 */
>         __OVS_VXLAN_EXT_MAX,
>  };
> 
> diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
> index 11965c0..a5882ed 100644
> --- a/datapath/vport-vxlan.c
> +++ b/datapath/vport-vxlan.c
> @@ -54,11 +54,26 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
>                 nla_nest_end(skb, exts);
>         }
> 
> +       if (vxlan->flags & VXLAN_F_GPE) {
> +               struct nlattr *exts;
> +
> +               exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
> +               if (!exts)
> +                       return -EMSGSIZE;
> +
> +               if (vxlan->flags & VXLAN_F_GPE &&
> +                   nla_put_flag(skb, OVS_VXLAN_EXT_GPE))
> +                       return -EMSGSIZE;
> +
> +               nla_nest_end(skb, exts);
> +       }
> +
>         return 0;
>  }
> 
>  static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
>         [OVS_VXLAN_EXT_GBP]     = { .type = NLA_FLAG, },
> +       [OVS_VXLAN_EXT_GPE]     = { .type = NLA_FLAG, },
>  };
> 
>  static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
> @@ -76,6 +91,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
> 
>         if (exts[OVS_VXLAN_EXT_GBP])
>                 conf->flags |= VXLAN_F_GBP;
> +       if (exts[OVS_VXLAN_EXT_GPE])
> +               conf->flags |= VXLAN_F_GPE;
> 
>         return 0;
>  }
Jiri Benc Dec. 6, 2016, 2:51 p.m. UTC | #2
On Tue, 6 Dec 2016 07:17:20 +0000, Yang, Yi Y wrote:
> So my advice about this is we can push patch [2] to Linux net-next
> first, then apply patch series
> https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316879.html
> from Cascardo and apply [1], that can cover all the cases Pravin
> mentioned.

There's no reason to add this to the genetlink interface when we're
going to switch to rtnetlink for tunnel configuration.

NACKed-by: Jiri Benc <jbenc@redhat.com>
Pravin Shelar Dec. 6, 2016, 5:46 p.m. UTC | #3
On Mon, Dec 5, 2016 at 11:17 PM, Yang, Yi Y <yi.y.yang@intel.com> wrote:
> Hi, guys
>
> This patch isn't updated from June on, Cascardo said he/Eric is still working on this, but six months passed, we don't see any following patch for this, now I have time to revisit it, for case 3 Pravin mentioned, I can make it work by applying the below patch [1] against https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316881.html, but it only can create vxlan port, it will also create vxlan port even if I create vxlan-gpe port by cmd "sudo ovs-vsctl add-port br-int vxlan_gpe1 -- set interface vxlan_gpe1 type=vxlan options:remote_ip=flow options:key=flow options:dst_port=4790 options:exts=gpe", the reason is very simple, vxlan_configure_exts in datapath/vport-vxlan.c will be called to set vxlan configuration, but it can't recognize vxlangpe extension and flags, you guys told me datapath/vport-vxlan.c and datapath/linux/compat/include/linux/openvswitch.h are compatibility code, we can't change them, but for case 3, we have to change them like patch [2], I know we sho
 uld change them on Linux net-next kernel first, here I just show you we have to do so for case 3 Pravin mentioned, I'm happy to hear you have better way for this.
>
> So my advice about this is we can push patch [2] to Linux net-next first, then apply patch series https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316879.html from Cascardo and apply [1], that can cover all the cases Pravin mentioned.
>
> This patch has blocked us too long, we look forward to seeing progress on this.
>
>
> [1]:
> From f40b6fec09e1f9ad14e50ba224f46b1b9657399c Mon Sep 17 00:00:00 2001
> From: Yi Yang <yi.y.yang@intel.com>
> Date: Tue, 6 Dec 2016 12:39:41 +0800
> Subject: [PATCH] Use ovs compat modules if USE_UPSTREAM_TUNNEL is defined
>
> Signed-off-by: Yi Yang <yi.y.yang@intel.com>
> ---
>  lib/dpif-netlink.c | 4 ++++
>  1 file changed, 4 insertions(+)
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 0d03334..2286c3e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -1062,6 +1062,9 @@ dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
>  static int
>  dpif_netlink_port_create(struct netdev *netdev)
>  {
> +#ifndef USE_UPSTREAM_TUNNEL
> +    return EOPNOTSUPP;
> +#else
We can not use USE_UPSTREAM_TUNNEL macro symbol in userspace. The
vswitchd should not be tied to specific kernel version.

>      switch (netdev_to_ovs_vport_type(netdev_get_type(netdev))) {
>      case OVS_VPORT_TYPE_VXLAN:
>          return netdev_vxlan_create(netdev);
> @@ -1077,6 +1080,7 @@ dpif_netlink_port_create(struct netdev *netdev)
>          return EOPNOTSUPP;
>      }
>      return 0;
> +#endif
>  }
>
>  static int
> --
> 2.1.0
>
>
Yang, Yi Dec. 7, 2016, 12:24 a.m. UTC | #4
On Tue, Dec 06, 2016 at 09:46:04AM -0800, Pravin Shelar wrote:
> >  lib/dpif-netlink.c | 4 ++++
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > index 0d03334..2286c3e 100644
> > --- a/lib/dpif-netlink.c
> > +++ b/lib/dpif-netlink.c
> > @@ -1062,6 +1062,9 @@ dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
> >  static int
> >  dpif_netlink_port_create(struct netdev *netdev)
> >  {
> > +#ifndef USE_UPSTREAM_TUNNEL
> > +    return EOPNOTSUPP;
> > +#else
> We can not use USE_UPSTREAM_TUNNEL macro symbol in userspace. The
> vswitchd should not be tied to specific kernel version.
>
Pravin, can you help point out how we can do this better? Frankly
speaking, I don't know how to do this without such macro help, maybe
it is best to use some code to dynamically detect, but I don't know
how to implement this, I hope to get your guide about this.
Yang, Yi Dec. 7, 2016, 12:47 a.m. UTC | #5
On Tue, Dec 06, 2016 at 09:38:09AM -0500, Eric Garver wrote:
> On Tue, Dec 06, 2016 at 07:17:20AM +0000, Yang, Yi Y wrote:
> > Hi, guys
> 
> Hi Yi,
> 
> > This patch isn't updated from June on, Cascardo said he/Eric is still
> > working on this, but six months passed, we don't see any following
> 
> Work is still ongoing. There was delay due to some debate about how and
> when to prefer out-of-tree vs in-tree tunnels.

I'd like to know how you will handle 3 cases Pravin mentioned

"""
Case 1. OVS kernel module is upstream. It is straight forward to
tunnel devices on upstream kernel module. STT and lisp are not
available.
Case 2. OVS kernel module is out of tree. In this case OVS has compat
code and USE_UPSTREAM_TUNNEL is defined. We are using upstream kernel
modules for geneve, gre and vxlan, for rest of vport. (stt and lisp)
we are using netdevices from compat code.
Case 3. OVS kernel module is out of tree and not using upstream tunnel
devices. we have to fallback to  OVS compat code for all tunnel
modules.
"""

According to the below Cascardo's reply, it seems those old patches can
handle all the cases, but my test confirmed we can't create vxlan-gpe if
we don't change the compatibility code, I want to hear your idea about
this.

"""
So, in summary, we drop this patch, submit what we had before, make sure
it
works in the following scenarions:

1) upstream ovs and tunnels are used;
  1a) metadata tunnels can be created, those are used;
  1b) we use compat vports if the configuration allows that;

2) out-of-tree ovs and out-of-tree tunnels are used;
   we make sure using rtnetlink will fail and compat vport is used;
   NOTE: this should work even with the old out-of-tree code that named
         drivers as vxlan instead of ovs_vxlan.

3) out-of-tree ovs and upstream/in-tree tunnels are used;
   it should work just like with upstream ovs, unless the out-of-tree
code does
   not support metadata tunnels, in which case, it should fallback to
compat
   code.
"""

> 
> > 
> > So my advice about this is we can push patch [2] to Linux net-next
> > first, then apply patch series
> > https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316879.html
> > from Cascardo and apply [1], that can cover all the cases Pravin
> > mentioned.
> 
> As Cascardo and Jesse mentioned back in June [1], we should not be
> adding new features to this interface. GPE has been backported to the
> out-of-tree VXLAN code. The only part that remains is userspace changes
> to create with rtnetlink, which is still being worked on.

I notice if we fallback to ovs compat modules to create vxlan, it will
use generic netlink but not rtnetlink, do you meam you're changing
generic netlink in function dpif_netlink_vport_transact in lib/dpif-netlink.c to rtnetlink?

I want to know when you have a test patch available, I can help test
even implement it.
>
Yang, Yi Dec. 7, 2016, 1:03 a.m. UTC | #6
On Tue, Dec 06, 2016 at 03:51:03PM +0100, Jiri Benc wrote:
> On Tue, 6 Dec 2016 07:17:20 +0000, Yang, Yi Y wrote:
> > So my advice about this is we can push patch [2] to Linux net-next
> > first, then apply patch series
> > https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316879.html
> > from Cascardo and apply [1], that can cover all the cases Pravin
> > mentioned.
> 
> There's no reason to add this to the genetlink interface when we're
> going to switch to rtnetlink for tunnel configuration.
>
But ovs userspace use genetlink to create vxlan tunnel port when we build ovs to use its own compat modules, this is case 3 Pravin, how do you think we can handle this?

Now the dilemma is we can't create vxlangpe port although it has been in
ovs, I know it is ok if we use upstream tunnel code, but in many cases,
Linux distribution didn't include recent kernels, we have to use ovs'
compat modules, Pravin's comments also addressed this.

So anybody will change current genetlink in ovs to rtnetlink? I will try
to change it if nobody will fix it. Another issue, I think ovs will send
rtnetlink message to kernel, right? Can ovs handle rtnetlink message if
we use ovs compat modules instead of upstream kernel modules?
Jiri Benc Dec. 7, 2016, 8:37 a.m. UTC | #7
On Wed, 7 Dec 2016 09:03:39 +0800, Yang, Yi wrote:
> But ovs userspace use genetlink to create vxlan tunnel port when we
> build ovs to use its own compat modules, this is case 3 Pravin, how
> do you think we can handle this?

Where's the problem? I don't see any.

> Now the dilemma is we can't create vxlangpe port although it has been
> in ovs, I know it is ok if we use upstream tunnel code, but in many
> cases, Linux distribution didn't include recent kernels, we have to
> use ovs' compat modules, Pravin's comments also addressed this.

There's no problem in using VXLAN-GPE in the out of tree driver using
rtnetlink. As Eric wrote, we're working on this.

> So anybody will change current genetlink in ovs to rtnetlink? I will
> try to change it if nobody will fix it. Another issue, I think ovs
> will send rtnetlink message to kernel, right? Can ovs handle
> rtnetlink message if we use ovs compat modules instead of upstream
> kernel modules?

I don't see any problem.

We know how to handle all of the cases. Eric took over the work from
Thadeu and he has been working on the patches. Nobody prevents you from
submitting your own patches; but please do your own research in such
case first. By asking tons of question you're not speeding things up,
as we'll be spending time explaining instead of writing and testing the
code.

 Jiri
Pravin Shelar Dec. 8, 2016, 12:28 a.m. UTC | #8
On Tue, Dec 6, 2016 at 4:24 PM, Yang, Yi <yi.y.yang@intel.com> wrote:
> On Tue, Dec 06, 2016 at 09:46:04AM -0800, Pravin Shelar wrote:
>> >  lib/dpif-netlink.c | 4 ++++
>> >  1 file changed, 4 insertions(+)
>> >
>> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> > index 0d03334..2286c3e 100644
>> > --- a/lib/dpif-netlink.c
>> > +++ b/lib/dpif-netlink.c
>> > @@ -1062,6 +1062,9 @@ dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
>> >  static int
>> >  dpif_netlink_port_create(struct netdev *netdev)
>> >  {
>> > +#ifndef USE_UPSTREAM_TUNNEL
>> > +    return EOPNOTSUPP;
>> > +#else
>> We can not use USE_UPSTREAM_TUNNEL macro symbol in userspace. The
>> vswitchd should not be tied to specific kernel version.
>>
> Pravin, can you help point out how we can do this better? Frankly
> speaking, I don't know how to do this without such macro help, maybe
> it is best to use some code to dynamically detect, but I don't know
> how to implement this, I hope to get your guide about this.

I have explained it in same thread. I do not want to copy paste it
here. Can you comment on the specific msg that I have sent so that we
can have better conversation.
Pravin Shelar Dec. 8, 2016, 12:35 a.m. UTC | #9
On Wed, Dec 7, 2016 at 12:37 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Wed, 7 Dec 2016 09:03:39 +0800, Yang, Yi wrote:
>> But ovs userspace use genetlink to create vxlan tunnel port when we
>> build ovs to use its own compat modules, this is case 3 Pravin, how
>> do you think we can handle this?
>
> Where's the problem? I don't see any.
>
>> Now the dilemma is we can't create vxlangpe port although it has been
>> in ovs, I know it is ok if we use upstream tunnel code, but in many
>> cases, Linux distribution didn't include recent kernels, we have to
>> use ovs' compat modules, Pravin's comments also addressed this.
>
> There's no problem in using VXLAN-GPE in the out of tree driver using
> rtnetlink. As Eric wrote, we're working on this.
>

In compat mode, OVS tunnel devices are not used in same way as LWT,
since OVS do support kernel version that does not have core LWT
support. Therefore we have to use legacy vport APIs to create these
ports. There might be a way to configure the device, once it is
created, using rtnetlink API but would complicate the code. So I think
in such cases like GPE we could to add code to the legacy code.
Jiri Benc Dec. 8, 2016, 9:14 a.m. UTC | #10
On Wed, 7 Dec 2016 16:35:59 -0800, Pravin Shelar wrote:
> In compat mode, OVS tunnel devices are not used in same way as LWT,
> since OVS do support kernel version that does not have core LWT
> support. Therefore we have to use legacy vport APIs to create these
> ports.

I see. Yes, that's unfortunate.

> There might be a way to configure the device, once it is
> created, using rtnetlink API but would complicate the code. So I think
> in such cases like GPE we could to add code to the legacy code.

Could we just support the newest shiniest features only with lwtunnel
capable kernel? Kernel 4.3 is out for more than a year already, that's
a long time. And several more months will pass before this is available
in an Open vSwitch release.

What about:
- always preferring the out of tree module (whatever capabilities it
  has)
	- first try rtnetlink
	- if it fails, try genetlink
	- if it fails (but the out of tree module is there), just don't
	  bother with kernel
- then try the in kernel module
	- first rtnetlink
	- if it fails then genetlink

This way, we would accommodate most of the stuff. With old kernels,
VXLAN-GPE wouldn't be available even with the out of tree module (but
I think that's it, I can't think of any other feature unavailable at
this point; of course, more may be added in the future).

Is this really that bad? It's (relatively) simply to implement, the out
of tree module does not diverge from the in kernel one too much, and I
don't think the requirement for lwtunnels capable kernel for the newest
features is unreasonable.

Thanks,

 Jiri
Eric Garver Dec. 8, 2016, 8:24 p.m. UTC | #11
On Wed, Dec 07, 2016 at 08:47:56AM +0800, Yang, Yi wrote:
> On Tue, Dec 06, 2016 at 09:38:09AM -0500, Eric Garver wrote:
> > On Tue, Dec 06, 2016 at 07:17:20AM +0000, Yang, Yi Y wrote:
> > > Hi, guys
> > 
> > Hi Yi,
> > 
> > > This patch isn't updated from June on, Cascardo said he/Eric is still
> > > working on this, but six months passed, we don't see any following
> > 
> > Work is still ongoing. There was delay due to some debate about how and
> > when to prefer out-of-tree vs in-tree tunnels.
> 
> I'd like to know how you will handle 3 cases Pravin mentioned
> 
> """
> Case 1. OVS kernel module is upstream. It is straight forward to
> tunnel devices on upstream kernel module. STT and lisp are not
> available.
> Case 2. OVS kernel module is out of tree. In this case OVS has compat
> code and USE_UPSTREAM_TUNNEL is defined. We are using upstream kernel
> modules for geneve, gre and vxlan, for rest of vport. (stt and lisp)
> we are using netdevices from compat code.
> Case 3. OVS kernel module is out of tree and not using upstream tunnel
> devices. we have to fallback to  OVS compat code for all tunnel
> modules.
> """
> 
> According to the below Cascardo's reply, it seems those old patches can
> handle all the cases, but my test confirmed we can't create vxlan-gpe if
> we don't change the compatibility code, I want to hear your idea about
> this.
> 
> """
> So, in summary, we drop this patch, submit what we had before, make sure
> it
> works in the following scenarions:
> 
> 1) upstream ovs and tunnels are used;
>   1a) metadata tunnels can be created, those are used;
>   1b) we use compat vports if the configuration allows that;
> 
> 2) out-of-tree ovs and out-of-tree tunnels are used;
>    we make sure using rtnetlink will fail and compat vport is used;
>    NOTE: this should work even with the old out-of-tree code that named
>          drivers as vxlan instead of ovs_vxlan.
> 
> 3) out-of-tree ovs and upstream/in-tree tunnels are used;
>    it should work just like with upstream ovs, unless the out-of-tree
> code does
>    not support metadata tunnels, in which case, it should fallback to
> compat
>    code.
> """
> 
> > 
> > > 
> > > So my advice about this is we can push patch [2] to Linux net-next
> > > first, then apply patch series
> > > https://mail.openvswitch.org/pipermail/ovs-dev/2016-June/316879.html
> > > from Cascardo and apply [1], that can cover all the cases Pravin
> > > mentioned.
> > 
> > As Cascardo and Jesse mentioned back in June [1], we should not be
> > adding new features to this interface. GPE has been backported to the
> > out-of-tree VXLAN code. The only part that remains is userspace changes
> > to create with rtnetlink, which is still being worked on.
> 
> I notice if we fallback to ovs compat modules to create vxlan, it will
> use generic netlink but not rtnetlink, do you meam you're changing
> generic netlink in function dpif_netlink_vport_transact in lib/dpif-netlink.c to rtnetlink?

If using out-of-tree modules we will try to create using rtnetlink
before falling back to genetlink.

> I want to know when you have a test patch available, I can help test
> even implement it.

That would be appreciated.
Yang, Yi Dec. 9, 2016, 12:47 a.m. UTC | #12
On Thu, Dec 08, 2016 at 03:24:35PM -0500, Eric Garver wrote:
> On Wed, Dec 07, 2016 at 08:47:56AM +0800, Yang, Yi wrote:
> > 
> > I notice if we fallback to ovs compat modules to create vxlan, it will
> > use generic netlink but not rtnetlink, do you meam you're changing
> > generic netlink in function dpif_netlink_vport_transact in lib/dpif-netlink.c to rtnetlink?
> 
> If using out-of-tree modules we will try to create using rtnetlink
> before falling back to genetlink.
> 
> > I want to know when you have a test patch available, I can help test
> > even implement it.
> 
> That would be appreciated.

Thank you, look forward to seeing your patches available ASAP :-)
Pravin Shelar Dec. 9, 2016, 6:49 a.m. UTC | #13
On Thu, Dec 8, 2016 at 1:14 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Wed, 7 Dec 2016 16:35:59 -0800, Pravin Shelar wrote:
>> In compat mode, OVS tunnel devices are not used in same way as LWT,
>> since OVS do support kernel version that does not have core LWT
>> support. Therefore we have to use legacy vport APIs to create these
>> ports.
>
> I see. Yes, that's unfortunate.
>
>> There might be a way to configure the device, once it is
>> created, using rtnetlink API but would complicate the code. So I think
>> in such cases like GPE we could to add code to the legacy code.
>
> Could we just support the newest shiniest features only with lwtunnel
> capable kernel? Kernel 4.3 is out for more than a year already, that's
> a long time. And several more months will pass before this is available
> in an Open vSwitch release.
>
OVS out of tree kernel module is using compat tunnel code upto kernel
4.5 kernel even thought LWT is available in these kernels. This is due
to missing features on these kernel which are backported to OVS
module. In future we could bump up requirements of kernel again.
Therefore I think we could add compat code for GPE given it is not
that complicated.
Jiri Benc Dec. 9, 2016, 8:43 a.m. UTC | #14
On Thu, 8 Dec 2016 22:49:56 -0800, Pravin Shelar wrote:
> OVS out of tree kernel module is using compat tunnel code upto kernel
> 4.5 kernel even thought LWT is available in these kernels. This is due
> to missing features on these kernel which are backported to OVS
> module. In future we could bump up requirements of kernel again.
> Therefore I think we could add compat code for GPE given it is not
> that complicated.

What I'm concerned about is not so much the code in the out of tree
module but the added code that would have to try genetlink for
VXLAN-GPE. I hoped to use rtnetlink only for this; adding genetlink
would required quite a bit of code (also because it will have to be
tried only for the out of tree module but never for the in kernel one;
this differs from what we'll do for VXLAN itself).

We'd end up with three different handling for different features:

(1) genetlink+rtnetlink for both out of tree and in tree module (for
    e.g. plain VXLAN)
(2) genetlink+rtnetlink for out of tree module, rtnetlink for in three
    module (for VXLAN-GPE)
(3) rtnetlink for both out of tree and in tree module (new VXLAN
    features added in the future)

I hoped to avoid this. Especially given that (2) covers only very
limited time period.

But if you insist...

 Jiri
Pravin Shelar Dec. 9, 2016, 11:12 p.m. UTC | #15
On Fri, Dec 9, 2016 at 12:43 AM, Jiri Benc <jbenc@redhat.com> wrote:
> On Thu, 8 Dec 2016 22:49:56 -0800, Pravin Shelar wrote:
>> OVS out of tree kernel module is using compat tunnel code upto kernel
>> 4.5 kernel even thought LWT is available in these kernels. This is due
>> to missing features on these kernel which are backported to OVS
>> module. In future we could bump up requirements of kernel again.
>> Therefore I think we could add compat code for GPE given it is not
>> that complicated.
>
> What I'm concerned about is not so much the code in the out of tree
> module but the added code that would have to try genetlink for
> VXLAN-GPE. I hoped to use rtnetlink only for this; adding genetlink
> would required quite a bit of code (also because it will have to be
> tried only for the out of tree module but never for the in kernel one;
> this differs from what we'll do for VXLAN itself).
>
> We'd end up with three different handling for different features:
>
> (1) genetlink+rtnetlink for both out of tree and in tree module (for
>     e.g. plain VXLAN)
> (2) genetlink+rtnetlink for out of tree module, rtnetlink for in three
>     module (for VXLAN-GPE)
> (3) rtnetlink for both out of tree and in tree module (new VXLAN
>     features added in the future)
>
I see where the confusion is.

If you are using OVS tunnel compat code then you can not use LWT
interfaces even if kernel supports LWT. So you just need to detect
what out of tree kernel module is using and accordingly use genetlink
or rtnetlink interface.
If you read this thread you would see discussion related to this and
you would be able to avoid this complexity.

So from userspace you just need to detect if you can create ovs_geneve
interface if it is successfull use existing genetlink code as it is
for ALL tunnel type, if it fails move to rtnetlink interface for ALL
tunnel types if that fails too then fall back to existing genetlink
interface. This works irrespective of OVS kernel module.

Therefore is not much difference even if we add compat interface for VXLAN-GPE.
Eric Garver Jan. 9, 2017, 9:53 p.m. UTC | #16
On Fri, Dec 09, 2016 at 03:12:39PM -0800, Pravin Shelar wrote:
> On Fri, Dec 9, 2016 at 12:43 AM, Jiri Benc <jbenc@redhat.com> wrote:
> > On Thu, 8 Dec 2016 22:49:56 -0800, Pravin Shelar wrote:
> >> OVS out of tree kernel module is using compat tunnel code upto kernel
> >> 4.5 kernel even thought LWT is available in these kernels. This is due
> >> to missing features on these kernel which are backported to OVS
> >> module. In future we could bump up requirements of kernel again.
> >> Therefore I think we could add compat code for GPE given it is not
> >> that complicated.
> >
> > What I'm concerned about is not so much the code in the out of tree
> > module but the added code that would have to try genetlink for
> > VXLAN-GPE. I hoped to use rtnetlink only for this; adding genetlink
> > would required quite a bit of code (also because it will have to be
> > tried only for the out of tree module but never for the in kernel one;
> > this differs from what we'll do for VXLAN itself).
> >
> > We'd end up with three different handling for different features:
> >
> > (1) genetlink+rtnetlink for both out of tree and in tree module (for
> >     e.g. plain VXLAN)
> > (2) genetlink+rtnetlink for out of tree module, rtnetlink for in three
> >     module (for VXLAN-GPE)
> > (3) rtnetlink for both out of tree and in tree module (new VXLAN
> >     features added in the future)
> >
> I see where the confusion is.
> 
> If you are using OVS tunnel compat code then you can not use LWT
> interfaces even if kernel supports LWT. So you just need to detect
> what out of tree kernel module is using and accordingly use genetlink
> or rtnetlink interface.
> If you read this thread you would see discussion related to this and
> you would be able to avoid this complexity.
> 
> So from userspace you just need to detect if you can create ovs_geneve
> interface if it is successfull use existing genetlink code as it is

I don't follow why probing for ovs_geneve should be our indication to
try genetlink first. Can you elaborate?

> for ALL tunnel type, if it fails move to rtnetlink interface for ALL
> tunnel types if that fails too then fall back to existing genetlink
> interface. This works irrespective of OVS kernel module.
> 
> Therefore is not much difference even if we add compat interface for VXLAN-GPE.
Pravin Shelar Jan. 10, 2017, 1:59 p.m. UTC | #17
On Tue, Jan 10, 2017 at 3:23 AM, Eric Garver <e@erig.me> wrote:
> On Fri, Dec 09, 2016 at 03:12:39PM -0800, Pravin Shelar wrote:
>> On Fri, Dec 9, 2016 at 12:43 AM, Jiri Benc <jbenc@redhat.com> wrote:
>> > On Thu, 8 Dec 2016 22:49:56 -0800, Pravin Shelar wrote:
>> >> OVS out of tree kernel module is using compat tunnel code upto kernel
>> >> 4.5 kernel even thought LWT is available in these kernels. This is due
>> >> to missing features on these kernel which are backported to OVS
>> >> module. In future we could bump up requirements of kernel again.
>> >> Therefore I think we could add compat code for GPE given it is not
>> >> that complicated.
>> >
>> > What I'm concerned about is not so much the code in the out of tree
>> > module but the added code that would have to try genetlink for
>> > VXLAN-GPE. I hoped to use rtnetlink only for this; adding genetlink
>> > would required quite a bit of code (also because it will have to be
>> > tried only for the out of tree module but never for the in kernel one;
>> > this differs from what we'll do for VXLAN itself).
>> >
>> > We'd end up with three different handling for different features:
>> >
>> > (1) genetlink+rtnetlink for both out of tree and in tree module (for
>> >     e.g. plain VXLAN)
>> > (2) genetlink+rtnetlink for out of tree module, rtnetlink for in three
>> >     module (for VXLAN-GPE)
>> > (3) rtnetlink for both out of tree and in tree module (new VXLAN
>> >     features added in the future)
>> >
>> I see where the confusion is.
>>
>> If you are using OVS tunnel compat code then you can not use LWT
>> interfaces even if kernel supports LWT. So you just need to detect
>> what out of tree kernel module is using and accordingly use genetlink
>> or rtnetlink interface.
>> If you read this thread you would see discussion related to this and
>> you would be able to avoid this complexity.
>>
>> So from userspace you just need to detect if you can create ovs_geneve
>> interface if it is successfull use existing genetlink code as it is
>
> I don't follow why probing for ovs_geneve should be our indication to
> try genetlink first. Can you elaborate?
>

OVS kernel module has compile time checks for various kernel features,
if any of required tunnel feature is missing OVS kernel module
compiles in support for its own tunnel implementation. This compat
tunnel implementation is exposed as ovs_* tunnel device.
Therefore if ovs_geneve device availability shows that current kernel
tunnel device does not support all features and we should use OVS
compat tunnel implementation. To use compat-tunnel implementation we
have to use gnetlink interface. OVS compat tunnels code do not support
LWT interface.

>> for ALL tunnel type, if it fails move to rtnetlink interface for ALL
>> tunnel types if that fails too then fall back to existing genetlink
>> interface. This works irrespective of OVS kernel module.
>>
>> Therefore is not much difference even if we add compat interface for VXLAN-GPE.
Jiri Benc Jan. 10, 2017, 2:44 p.m. UTC | #18
On Tue, 10 Jan 2017 19:29:21 +0530, Pravin Shelar wrote:
> OVS kernel module has compile time checks for various kernel features,
> if any of required tunnel feature is missing OVS kernel module
> compiles in support for its own tunnel implementation. This compat
> tunnel implementation is exposed as ovs_* tunnel device.
> Therefore if ovs_geneve device availability shows that current kernel
> tunnel device does not support all features and we should use OVS
> compat tunnel implementation. To use compat-tunnel implementation we
> have to use gnetlink interface. OVS compat tunnels code do not support
> LWT interface.

I've been trying to wrap my head around this for some time already but
I'm afraid I may still not understand what you mean.

By "exposed as ovs_* tunnel device", do you mean rtnetlink kind? If so,
then the steps to create the interface (a geneve tunnel in this example)
would be:

create an ovs_geneve interface using rtnetlink
if successful {
	delete the created interface
	create geneve interface using genetlink
	if successful {
		done
	} else {
		fail
	}
} else {
	create lwtunnel geneve interface using rtnetlink
	check parameters of the created interface
	if it is lwtunnel {
		done
	} else {
		delete the created interface
		create geneve interface using genetlink
		if successful {
			done
		} else {
			fail
		}
	}
}

Is it what you have in mind?

 Jiri
Jiri Benc Jan. 10, 2017, 3:26 p.m. UTC | #19
On Tue, 10 Jan 2017 15:44:07 +0100, Jiri Benc wrote:
> create an ovs_geneve interface using rtnetlink

This can be done just once. The pseudocode at startup thus would be:

create an ovs_geneve interface using rtnetlink
if successful {
	delete the created interface
	set out_of_tree flag
}

And interface creation:

if not out_of_tree {
	create lwtunnel geneve interface using rtnetlink
	check parameters of the created interface
	if it is lwtunnel {
		done, exit
	} else {
		delete the created interface
	}
}
create geneve interface using genetlink
if successful {
	done
} else {
	fail
}

Is that what you want? If so, should the out_of_tree flag be queried
and set per tunnel type, or just based globally on ovs_geneve (or a
different kind)?

 Jiri
Pravin Shelar Jan. 10, 2017, 3:29 p.m. UTC | #20
On Tue, Jan 10, 2017 at 8:14 PM, Jiri Benc <jbenc@redhat.com> wrote:
> On Tue, 10 Jan 2017 19:29:21 +0530, Pravin Shelar wrote:
>> OVS kernel module has compile time checks for various kernel features,
>> if any of required tunnel feature is missing OVS kernel module
>> compiles in support for its own tunnel implementation. This compat
>> tunnel implementation is exposed as ovs_* tunnel device.
>> Therefore if ovs_geneve device availability shows that current kernel
>> tunnel device does not support all features and we should use OVS
>> compat tunnel implementation. To use compat-tunnel implementation we
>> have to use gnetlink interface. OVS compat tunnels code do not support
>> LWT interface.
>
> I've been trying to wrap my head around this for some time already but
> I'm afraid I may still not understand what you mean.
>
> By "exposed as ovs_* tunnel device", do you mean rtnetlink kind? If so,
> then the steps to create the interface (a geneve tunnel in this example)
> would be:
>
> create an ovs_geneve interface using rtnetlink
> if successful {
>         delete the created interface
>         create geneve interface using genetlink
>         if successful {
>                 done
>         } else {
>                 fail
>         }
> } else {
>         create lwtunnel geneve interface using rtnetlink
>         check parameters of the created interface
>         if it is lwtunnel {
>                 done
>         } else {
>                 delete the created interface
>                 create geneve interface using genetlink
>                 if successful {
>                         done
>                 } else {
>                         fail
>                 }
>         }
> }
>
> Is it what you have in mind?
>
Yes. This looks good to me.

Once it is done for very first tunnel device we do not need to repeat
it for any other tunnel device irrespective of the type. If OVS is
using LWT for one type then all other tunnel type are managed over LWT
interface and Vice Versa.
Eric Garver Jan. 10, 2017, 3:49 p.m. UTC | #21
On Tue, Jan 10, 2017 at 04:26:10PM +0100, Jiri Benc wrote:
> On Tue, 10 Jan 2017 15:44:07 +0100, Jiri Benc wrote:
> > create an ovs_geneve interface using rtnetlink
> 
> This can be done just once. The pseudocode at startup thus would be:
> 
> create an ovs_geneve interface using rtnetlink
> if successful {
> 	delete the created interface
> 	set out_of_tree flag
> }
> 
> And interface creation:
> 
> if not out_of_tree {
> 	create lwtunnel geneve interface using rtnetlink
> 	check parameters of the created interface
> 	if it is lwtunnel {
> 		done, exit
> 	} else {
> 		delete the created interface
> 	}
> }
> create geneve interface using genetlink
> if successful {
> 	done
> } else {
> 	fail
> }
> 
> Is that what you want? If so, should the out_of_tree flag be queried
> and set per tunnel type, or just based globally on ovs_geneve (or a
> different kind)?
> 

With Pravin's last feedback I think we can do the lwt probe on init as
well.

== On startup/init ==

create an ovs_geneve interface using rtnetlink
if successful {
 	check parameters of the created interface
 	if it is lwtunnel {
        set lwt flag
    }

	delete the created interface
	set out_of_tree flag
}

== On interface creation ==

if not out-of-tree and is lwt {
 	create lwtunnel geneve interface using rtnetlink
    if successful {
        done, exit
    }
}
create geneve interface using genetlink
if successful {
	done
} else {
	fail
}
Jiri Benc Jan. 10, 2017, 4 p.m. UTC | #22
On Tue, 10 Jan 2017 10:49:15 -0500, Eric Garver wrote:
> With Pravin's last feedback I think we can do the lwt probe on init as
> well.

Yes, we can.

> == On startup/init ==
> 
> create an ovs_geneve interface using rtnetlink
> if successful {
>  	check parameters of the created interface
>  	if it is lwtunnel {
>         set lwt flag
>     }
> 
> 	delete the created interface
> 	set out_of_tree flag
> }

Although this would need to be a bit more complex, as the lwt probe
needs to be done on tunnel without the ovs_ prefix, i.e.:

create an ovs_geneve interface using rtnetlink
if successful {
	delete the created interface
	set out_of_tree flag
} else {
	create a geneve interface using rtnetlink
 	check parameters of the created interface
 	if it is lwtunnel {
        	set lwt flag
	}
	delete the created interface
}

And note that we'd still have to read back parameters of the created
interface on each interface creation even with the lwt flag set (as the
kernel may not support VXLAN-GPE, for example). I'm not sure we save
much by doing the lwt check on startup. But it can be done.

 Jiri
diff mbox

Patch

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 0d03334..2286c3e 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -1062,6 +1062,9 @@  dpif_netlink_port_query__(const struct dpif_netlink *dpif, odp_port_t port_no,
 static int
 dpif_netlink_port_create(struct netdev *netdev)
 {
+#ifndef USE_UPSTREAM_TUNNEL
+    return EOPNOTSUPP;
+#else
     switch (netdev_to_ovs_vport_type(netdev_get_type(netdev))) {
     case OVS_VPORT_TYPE_VXLAN:
         return netdev_vxlan_create(netdev);
@@ -1077,6 +1080,7 @@  dpif_netlink_port_create(struct netdev *netdev)
         return EOPNOTSUPP;
     }
     return 0;
+#endif
 }

 static int
--
2.1.0


[2]:

diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index 12260d8..17e21cb 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -291,6 +291,7 @@  enum ovs_vport_attr {
 enum {
        OVS_VXLAN_EXT_UNSPEC,
        OVS_VXLAN_EXT_GBP,      /* Flag or __u32 */
+       OVS_VXLAN_EXT_GPE,      /* Flag or __u32 */
        __OVS_VXLAN_EXT_MAX,
 };

diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 11965c0..a5882ed 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -54,11 +54,26 @@  static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
                nla_nest_end(skb, exts);
        }

+       if (vxlan->flags & VXLAN_F_GPE) {
+               struct nlattr *exts;
+
+               exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+               if (!exts)
+                       return -EMSGSIZE;
+
+               if (vxlan->flags & VXLAN_F_GPE &&
+                   nla_put_flag(skb, OVS_VXLAN_EXT_GPE))
+                       return -EMSGSIZE;
+
+               nla_nest_end(skb, exts);
+       }
+
        return 0;
 }

 static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
        [OVS_VXLAN_EXT_GBP]     = { .type = NLA_FLAG, },
+       [OVS_VXLAN_EXT_GPE]     = { .type = NLA_FLAG, },
 };

 static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
@@ -76,6 +91,8 @@  static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,

        if (exts[OVS_VXLAN_EXT_GBP])
                conf->flags |= VXLAN_F_GBP;
+       if (exts[OVS_VXLAN_EXT_GPE])
+               conf->flags |= VXLAN_F_GPE;

        return 0;
 }