diff mbox series

[ovs-dev,net-next] openvswitch: Make metadata_dst tunnel work in IP_TUNNEL_INFO_BRIDGE mode

Message ID 1553164451-904-1-git-send-email-wenxu@ucloud.cn
State Awaiting Upstream
Headers show
Series [ovs-dev,net-next] openvswitch: Make metadata_dst tunnel work in IP_TUNNEL_INFO_BRIDGE mode | expand

Commit Message

wenxu March 21, 2019, 10:34 a.m. UTC
From: wenxu <wenxu@ucloud.cn>

There is currently no support for the multicasti/broadcst aspects
of VXLAN in ovs. In the datapath flow the tun_dst must specific.
But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
And the packet can forward through the fdb of vxlan devcice. In
this mode the broadcast/multicast packet can be sent through the
following ways in ovs.

ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
	options:key=1000 options:remote_ip=flow
ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
	action=output:vxlan

bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
	src_vni 1000 vni 1000 self
bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
src_vni 1000 vni 1000 self

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 include/uapi/linux/openvswitch.h |  1 +
 net/openvswitch/flow_netlink.c   | 32 ++++++++++++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

Comments

Pravin Shelar March 23, 2019, 7:50 a.m. UTC | #1
On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
>
> From: wenxu <wenxu@ucloud.cn>
>
> There is currently no support for the multicasti/broadcst aspects
> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
> And the packet can forward through the fdb of vxlan devcice. In
> this mode the broadcast/multicast packet can be sent through the
> following ways in ovs.
>
> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
>         options:key=1000 options:remote_ip=flow
> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
>         action=output:vxlan
>
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>         src_vni 1000 vni 1000 self
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> src_vni 1000 vni 1000 self
>
This would make datapath bit complicated, can you give example of such use-case?

> Signed-off-by: wenxu <wenxu@ucloud.cn>
> ---
>  include/uapi/linux/openvswitch.h |  1 +
>  net/openvswitch/flow_netlink.c   | 32 ++++++++++++++++++++++++++------
>  2 files changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index dbe0cbe..696a308 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -364,6 +364,7 @@ enum ovs_tunnel_key_attr {
>         OVS_TUNNEL_KEY_ATTR_IPV6_DST,           /* struct in6_addr dst IPv6 address. */
>         OVS_TUNNEL_KEY_ATTR_PAD,
>         OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,        /* struct erspan_metadata */
> +       OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST,        /* No argument. No dst IP address. */
>         __OVS_TUNNEL_KEY_ATTR_MAX
>  };
>
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 691da85..033df5c 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -403,6 +403,7 @@ size_t ovs_key_attr_size(void)
>         [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
>         [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
>         [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
> +       [OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST]   = { .len = 0 },
>  };
>
>  static const struct ovs_len_tbl
> @@ -663,7 +664,7 @@ static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
>
>  static int ip_tun_from_nlattr(const struct nlattr *attr,
>                               struct sw_flow_match *match, bool is_mask,
> -                             bool log)
> +                             bool log, bool *no_ipv4_dst)
>  {
>         bool ttl = false, ipv4 = false, ipv6 = false;
>         __be16 tun_flags = 0;
> @@ -671,6 +672,9 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
>         struct nlattr *a;
>         int rem;
>
> +       if (no_ipv4_dst)
> +               *no_ipv4_dst = false;
> +
>         nla_for_each_nested(a, attr, rem) {
>                 int type = nla_type(a);
>                 int err;
> @@ -782,6 +786,12 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
>                         tun_flags |= TUNNEL_ERSPAN_OPT;
>                         opts_type = type;
>                         break;
> +               case OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST:
> +                       if (no_ipv4_dst) {
> +                               *no_ipv4_dst = true;
> +                               ipv4 = true;
> +                       }
> +                       break;
>                 default:
>                         OVS_NLERR(log, "Unknown IP tunnel attribute %d",
>                                   type);
> @@ -812,9 +822,16 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
>                         OVS_NLERR(log, "IP tunnel dst address not specified");
>                         return -EINVAL;
>                 }
> -               if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
> -                       OVS_NLERR(log, "IPv4 tunnel dst address is zero");
> -                       return -EINVAL;
> +               if (ipv4) {
> +                       bool no_dst = no_ipv4_dst ? *no_ipv4_dst : false;
> +
> +                       if (no_dst && match->key->tun_key.u.ipv4.dst) {
> +                               OVS_NLERR(log, "IPv4 tunnel dst address is not zero");
> +                               return -EINVAL;
> +                       } else if (!no_dst && !match->key->tun_key.u.ipv4.dst) {
> +                               OVS_NLERR(log, "IPv4 tunnel dst address is zero");
> +                               return -EINVAL;
> +                       }
>                 }
>                 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
>                         OVS_NLERR(log, "IPv6 tunnel dst address is zero");
> @@ -1178,7 +1195,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
>         }
>         if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
>                 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
> -                                      is_mask, log) < 0)
> +                                      is_mask, log, NULL) < 0)
Please pass non-null value here to have complete validation of tunnel
parameters.

>                         return -EINVAL;
>                 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
>         }
> @@ -2551,10 +2568,11 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
>         struct nlattr *a;
>         int err = 0, start, opts_type;
>         __be16 dst_opt_type;
> +       bool no_ipv4_dst;
>
>         dst_opt_type = 0;
>         ovs_match_init(&match, &key, true, NULL);
> -       opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
> +       opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log, &no_ipv4_dst);
>         if (opts_type < 0)
>                 return opts_type;
>
> @@ -2605,6 +2623,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
>         tun_info->mode = IP_TUNNEL_INFO_TX;
>         if (key.tun_proto == AF_INET6)
>                 tun_info->mode |= IP_TUNNEL_INFO_IPV6;
> +       else if (key.tun_proto == AF_INET && no_ipv4_dst)
> +               tun_info->mode |= IP_TUNNEL_INFO_BRIDGE;
>         tun_info->key = key.tun_key;
>
>         /* We need to store the options in the action itself since
> --
> 1.8.3.1
>
wenxu March 23, 2019, 9:18 a.m. UTC | #2
On 2019/3/23 下午3:50, Pravin Shelar wrote:
> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
>> From: wenxu <wenxu@ucloud.cn>
>>
>> There is currently no support for the multicasti/broadcst aspects
>> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
>> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
>> And the packet can forward through the fdb of vxlan devcice. In
>> this mode the broadcast/multicast packet can be sent through the
>> following ways in ovs.
>>
>> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
>>         options:key=1000 options:remote_ip=flow
>> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
>>         action=output:vxlan
>>
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>         src_vni 1000 vni 1000 self
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>> src_vni 1000 vni 1000 self
>>
> This would make datapath bit complicated, can you give example of such use-case?
>
There is currently no support for the multicast/broadcast aspects
of VXLAN in ovs.To get around thelack of multicast support, it is possible to pre-provision MAC to IP address mappings either manually or from a controller.

With this patch we can achieve this through the fdb of the lower vxlan
device.

For example. three severs connects with vxlan. 
server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000

All the broadcast arp request from server1, can be send to vxlan_sys_4789
in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
the fdb table in the vxlan device as following:

bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
        src_vni 1000 vni 1000 self
bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
src_vni 1000 vni 1000 self


Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
table of lower vxlan device.
wenxu March 23, 2019, 9:28 a.m. UTC | #3
On 2019/3/23 下午3:50, Pravin Shelar wrote:
> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
>> From: wenxu <wenxu@ucloud.cn>
>>
>> There is currently no support for the multicasti/broadcst aspects
>> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
>> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
>> And the packet can forward through the fdb of vxlan devcice. In
>> this mode the broadcast/multicast packet can be sent through the
>> following ways in ovs.
>>
>> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
>>         options:key=1000 options:remote_ip=flow
>> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
>>         action=output:vxlan
>>
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>         src_vni 1000 vni 1000 self
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>> src_vni 1000 vni 1000 self
>>
> This would make datapath bit complicated, can you give example of such use-case?
>

There is currently no support for the multicast aspects
of VXLAN in ovs.
With this patch we can achieve this through the fdb of the lower vxlan
device.

For example. three severs connects with vxlan. 
server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000

All the broadcast arp request from server1, can be send to vxlan_sys_4789
in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
the fdb table in the vxlan device as following:

bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
        src_vni 1000 vni 1000 self
bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
src_vni 1000 vni 1000 self


Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
table of lower vxlan device.
wenxu March 23, 2019, 10:01 a.m. UTC | #4
On 2019/3/23 下午3:50, Pravin Shelar wrote:
>
>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>> index 691da85..033df5c 100644
>> --- a/net/openvswitch/flow_netlink.c
>> +++ b/net/openvswitch/flow_netlink.c
>> @@ -403,6 +403,7 @@ size_t ovs_key_attr_size(void)
>>         [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
>>         [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
>>         [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
>> +       [OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST]   = { .len = 0 },
>>  };
>>
>>  static const struct ovs_len_tbl
>> @@ -663,7 +664,7 @@ static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
>>
>>  static int ip_tun_from_nlattr(const struct nlattr *attr,
>>                               struct sw_flow_match *match, bool is_mask,
>> -                             bool log)
>> +                             bool log, bool *no_ipv4_dst)
>>  {
>>         bool ttl = false, ipv4 = false, ipv6 = false;
>>         __be16 tun_flags = 0;
>> @@ -671,6 +672,9 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
>>         struct nlattr *a;
>>         int rem;
>>
>> +       if (no_ipv4_dst)
>> +               *no_ipv4_dst = false;
>> +
>>         nla_for_each_nested(a, attr, rem) {
>>                 int type = nla_type(a);
>>                 int err;
>> @@ -782,6 +786,12 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
>>                         tun_flags |= TUNNEL_ERSPAN_OPT;
>>                         opts_type = type;
>>                         break;
>> +               case OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST:
>> +                       if (no_ipv4_dst) {
>> +                               *no_ipv4_dst = true;
>> +                               ipv4 = true;
>> +                       }
>> +                       break;
>>                 default:
>>                         OVS_NLERR(log, "Unknown IP tunnel attribute %d",
>>                                   type);
>> @@ -812,9 +822,16 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
>>                         OVS_NLERR(log, "IP tunnel dst address not specified");
>>                         return -EINVAL;
>>                 }
>> -               if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
>> -                       OVS_NLERR(log, "IPv4 tunnel dst address is zero");
>> -                       return -EINVAL;
>> +               if (ipv4) {
>> +                       bool no_dst = no_ipv4_dst ? *no_ipv4_dst : false;
>> +
>> +                       if (no_dst && match->key->tun_key.u.ipv4.dst) {
>> +                               OVS_NLERR(log, "IPv4 tunnel dst address is not zero");
>> +                               return -EINVAL;
>> +                       } else if (!no_dst && !match->key->tun_key.u.ipv4.dst) {
>> +                               OVS_NLERR(log, "IPv4 tunnel dst address is zero");
>> +                               return -EINVAL;
>> +                       }
>>                 }
>>                 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
>>                         OVS_NLERR(log, "IPv6 tunnel dst address is zero");
>> @@ -1178,7 +1195,7 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
>>         }
>>         if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
>>                 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
>> -                                      is_mask, log) < 0)
>> +                                      is_mask, log, NULL) < 0)
> Please pass non-null value here to have complete validation of tunnel
> parameters.
>
The new attr OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST is only care in the OVS_KEY_ATTR_TUNNE of action.

It should not care in the  OVS_KEY_ATTR_TUNNELof match. So we pass the NULL keep the original algorithm
Pravin Shelar March 23, 2019, 9:39 p.m. UTC | #5
On Sat, Mar 23, 2019 at 2:18 AM wenxu <wenxu@ucloud.cn> wrote:
>
> On 2019/3/23 下午3:50, Pravin Shelar wrote:
>
> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
>
> From: wenxu <wenxu@ucloud.cn>
>
> There is currently no support for the multicasti/broadcst aspects
> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
> And the packet can forward through the fdb of vxlan devcice. In
> this mode the broadcast/multicast packet can be sent through the
> following ways in ovs.
>
> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
>         options:key=1000 options:remote_ip=flow
> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
>         action=output:vxlan
>
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>         src_vni 1000 vni 1000 self
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> src_vni 1000 vni 1000 self
>
> This would make datapath bit complicated, can you give example of such use-case?
>
> There is currently no support for the multicast/broadcast aspects
> of VXLAN in ovs. To get around the lack of multicast support, it is possible to
> pre-provision MAC to IP address mappings either manually or from a controller.
>
> With this patch we can achieve this through the fdb of the lower vxlan
> device.
>
> For example. three severs connects with vxlan.
> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
>
> All the broadcast arp request from server1, can be send to vxlan_sys_4789
> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
> the fdb table in the vxlan device as following:
>
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>         src_vni 1000 vni 1000 self
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> src_vni 1000 vni 1000 self
>
>
> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
> table of lower vxlan device.

Have you tried OVS mac learning?
wenxu March 24, 2019, 7:03 a.m. UTC | #6
On 2019/3/24 上午5:39, Pravin Shelar wrote:
> On Sat, Mar 23, 2019 at 2:18 AM wenxu <wenxu@ucloud.cn> wrote:
>> On 2019/3/23 下午3:50, Pravin Shelar wrote:
>>
>> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
>>
>> From: wenxu <wenxu@ucloud.cn>
>>
>> There is currently no support for the multicasti/broadcst aspects
>> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
>> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
>> And the packet can forward through the fdb of vxlan devcice. In
>> this mode the broadcast/multicast packet can be sent through the
>> following ways in ovs.
>>
>> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
>>         options:key=1000 options:remote_ip=flow
>> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
>>         action=output:vxlan
>>
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>         src_vni 1000 vni 1000 self
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>> src_vni 1000 vni 1000 self
>>
>> This would make datapath bit complicated, can you give example of such use-case?
>>
>> There is currently no support for the multicast/broadcast aspects
>> of VXLAN in ovs. To get around the lack of multicast support, it is possible to
>> pre-provision MAC to IP address mappings either manually or from a controller.
>>
>> With this patch we can achieve this through the fdb of the lower vxlan
>> device.
>>
>> For example. three severs connects with vxlan.
>> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
>> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
>> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
>>
>> All the broadcast arp request from server1, can be send to vxlan_sys_4789
>> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
>> the fdb table in the vxlan device as following:
>>
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>         src_vni 1000 vni 1000 self
>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>> src_vni 1000 vni 1000 self
>>
>>
>> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
>> table of lower vxlan device.
> Have you tried OVS mac learning?
>
The key point is that it makes ovs vxlan tunnel can make use of the fdb table of lower vxlan device.

The fdb table can be configurable or mac learning from outside.

For the broadcast example.  In the ovs, it can only achieve this through multiple output actions to simulate the broadcast.

ovs-ofctl add-flow br0 in_port=server1,dl_dst=ff:ff:ff:ff:ff:ff,actions=set_field:172.168.0.1->tun_dst,output:vxlan,\

    set_field:172.168.0.2->tun_dst,output:vxlan.

But there are some limits for the number of output actions.
Pravin Shelar March 24, 2019, 6:46 p.m. UTC | #7
On Sun, Mar 24, 2019 at 12:03 AM wenxu <wenxu@ucloud.cn> wrote:
>
> On 2019/3/24 上午5:39, Pravin Shelar wrote:
> > On Sat, Mar 23, 2019 at 2:18 AM wenxu <wenxu@ucloud.cn> wrote:
> >> On 2019/3/23 下午3:50, Pravin Shelar wrote:
> >>
> >> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
> >>
> >> From: wenxu <wenxu@ucloud.cn>
> >>
> >> There is currently no support for the multicasti/broadcst aspects
> >> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
> >> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
> >> And the packet can forward through the fdb of vxlan devcice. In
> >> this mode the broadcast/multicast packet can be sent through the
> >> following ways in ovs.
> >>
> >> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
> >>         options:key=1000 options:remote_ip=flow
> >> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
> >>         action=output:vxlan
> >>
> >> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
> >>         src_vni 1000 vni 1000 self
> >> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> >> src_vni 1000 vni 1000 self
> >>
> >> This would make datapath bit complicated, can you give example of such use-case?
> >>
> >> There is currently no support for the multicast/broadcast aspects
> >> of VXLAN in ovs. To get around the lack of multicast support, it is possible to
> >> pre-provision MAC to IP address mappings either manually or from a controller.
> >>
> >> With this patch we can achieve this through the fdb of the lower vxlan
> >> device.
> >>
> >> For example. three severs connects with vxlan.
> >> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
> >> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
> >> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
> >>
> >> All the broadcast arp request from server1, can be send to vxlan_sys_4789
> >> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
> >> the fdb table in the vxlan device as following:
> >>
> >> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
> >>         src_vni 1000 vni 1000 self
> >> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> >> src_vni 1000 vni 1000 self
> >>
> >>
> >> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
> >> table of lower vxlan device.
> > Have you tried OVS mac learning?
> >
> The key point is that it makes ovs vxlan tunnel can make use of the fdb table of lower vxlan device.
>
> The fdb table can be configurable or mac learning from outside.
>
> For the broadcast example.  In the ovs, it can only achieve this through multiple output actions to simulate the broadcast.
>
> ovs-ofctl add-flow br0 in_port=server1,dl_dst=ff:ff:ff:ff:ff:ff,actions=set_field:172.168.0.1->tun_dst,output:vxlan,\
>
>     set_field:172.168.0.2->tun_dst,output:vxlan.
>
> But there are some limits for the number of output actions.
>
I was referring to mac-learning feature in OVS i.e. using learn
action. I wanted to see if there is something that you are not able to
do with OVS learn action.
Tonghao Zhang March 25, 2019, 1:18 a.m. UTC | #8
On Sat, Mar 23, 2019 at 5:28 PM wenxu <wenxu@ucloud.cn> wrote:
>
> On 2019/3/23 下午3:50, Pravin Shelar wrote:
> > On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
> >> From: wenxu <wenxu@ucloud.cn>
> >>
> >> There is currently no support for the multicasti/broadcst aspects
> >> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
> >> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
> >> And the packet can forward through the fdb of vxlan devcice. In
> >> this mode the broadcast/multicast packet can be sent through the
> >> following ways in ovs.
> >>
> >> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
> >>         options:key=1000 options:remote_ip=flow
> >> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
> >>         action=output:vxlan
> >>
> >> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
> >>         src_vni 1000 vni 1000 self
> >> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> >> src_vni 1000 vni 1000 self
> >>
> > This would make datapath bit complicated, can you give example of such use-case?
> >
>
> There is currently no support for the multicast aspects
> of VXLAN in ovs.
> With this patch we can achieve this through the fdb of the lower vxlan
> device.
You can create multi vxlan device in the ovs for example:
ovs-vsctl add-port br0 vxlan0 -- set in vxlan0 type=vxlan
options:key=1000 options:remote_ip= 172.168.0.1
ovs-vsctl add-port br0 vxlan1 -- set in vxlan1 type=vxlan
options:key=1000 options:remote_ip= 172.168.0.2

so the  ff:ff:ff:ff:ff:ff  packets will be send to vxlan0 and vxlan1 ports.
> For example. three severs connects with vxlan.
> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
>
> All the broadcast arp request from server1, can be send to vxlan_sys_4789
> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
> the fdb table in the vxlan device as following:
>
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>         src_vni 1000 vni 1000 self
> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> src_vni 1000 vni 1000 self
>
>
> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
> table of lower vxlan device.
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
wenxu March 25, 2019, 1:24 a.m. UTC | #9
On 2019/3/25 上午2:46, Pravin Shelar wrote:
> On Sun, Mar 24, 2019 at 12:03 AM wenxu <wenxu@ucloud.cn> wrote:
>> On 2019/3/24 上午5:39, Pravin Shelar wrote:
>>> On Sat, Mar 23, 2019 at 2:18 AM wenxu <wenxu@ucloud.cn> wrote:
>>>> On 2019/3/23 下午3:50, Pravin Shelar wrote:
>>>>
>>>> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
>>>>
>>>> From: wenxu <wenxu@ucloud.cn>
>>>>
>>>> There is currently no support for the multicasti/broadcst aspects
>>>> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
>>>> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
>>>> And the packet can forward through the fdb of vxlan devcice. In
>>>> this mode the broadcast/multicast packet can be sent through the
>>>> following ways in ovs.
>>>>
>>>> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
>>>>         options:key=1000 options:remote_ip=flow
>>>> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
>>>>         action=output:vxlan
>>>>
>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>>>         src_vni 1000 vni 1000 self
>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>>>> src_vni 1000 vni 1000 self
>>>>
>>>> This would make datapath bit complicated, can you give example of such use-case?
>>>>
>>>> There is currently no support for the multicast/broadcast aspects
>>>> of VXLAN in ovs. To get around the lack of multicast support, it is possible to
>>>> pre-provision MAC to IP address mappings either manually or from a controller.
>>>>
>>>> With this patch we can achieve this through the fdb of the lower vxlan
>>>> device.
>>>>
>>>> For example. three severs connects with vxlan.
>>>> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
>>>> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
>>>> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
>>>>
>>>> All the broadcast arp request from server1, can be send to vxlan_sys_4789
>>>> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
>>>> the fdb table in the vxlan device as following:
>>>>
>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>>>         src_vni 1000 vni 1000 self
>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>>>> src_vni 1000 vni 1000 self
>>>>
>>>>
>>>> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
>>>> table of lower vxlan device.
>>> Have you tried OVS mac learning?
>>>
>> The key point is that it makes ovs vxlan tunnel can make use of the fdb table of lower vxlan device.
>>
>> The fdb table can be configurable or mac learning from outside.
>>
>> For the broadcast example.  In the ovs, it can only achieve this through multiple output actions to simulate the broadcast.
>>
>> ovs-ofctl add-flow br0 in_port=server1,dl_dst=ff:ff:ff:ff:ff:ff,actions=set_field:172.168.0.1->tun_dst,output:vxlan,\
>>
>>     set_field:172.168.0.2->tun_dst,output:vxlan.
>>
>> But there are some limits for the number of output actions.
>>
> I was referring to mac-learning feature in OVS i.e. using learn
> action. I wanted to see if there is something that you are not able to
> do with OVS learn action.
>
Ovs mac learn action is only work for the specific vxlan tunnel port( fixed tun_dst, tun_id) like following.

ovs-vsctl set in vxlan options:remote_ip=172.168.0.1 options:key=1000

( This is the same problem for Linux bridge, It achieve this through IP_TUNNEL_INFO_BRIDGE mode work

with the fdb of lower vxlan device)


But it is not work for the flow based tunnel (remote_ip=flow),  There will be huge number of the tunnel peer.

It' hard to manage the tunnel port with the specific mode.
Tonghao Zhang March 25, 2019, 1:47 a.m. UTC | #10
On Mon, Mar 25, 2019 at 9:24 AM wenxu <wenxu@ucloud.cn> wrote:
>
> On 2019/3/25 上午2:46, Pravin Shelar wrote:
> > On Sun, Mar 24, 2019 at 12:03 AM wenxu <wenxu@ucloud.cn> wrote:
> >> On 2019/3/24 上午5:39, Pravin Shelar wrote:
> >>> On Sat, Mar 23, 2019 at 2:18 AM wenxu <wenxu@ucloud.cn> wrote:
> >>>> On 2019/3/23 下午3:50, Pravin Shelar wrote:
> >>>>
> >>>> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
> >>>>
> >>>> From: wenxu <wenxu@ucloud.cn>
> >>>>
> >>>> There is currently no support for the multicasti/broadcst aspects
> >>>> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
> >>>> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
> >>>> And the packet can forward through the fdb of vxlan devcice. In
> >>>> this mode the broadcast/multicast packet can be sent through the
> >>>> following ways in ovs.
> >>>>
> >>>> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
> >>>>         options:key=1000 options:remote_ip=flow
> >>>> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
> >>>>         action=output:vxlan
> >>>>
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
> >>>>         src_vni 1000 vni 1000 self
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> >>>> src_vni 1000 vni 1000 self
> >>>>
> >>>> This would make datapath bit complicated, can you give example of such use-case?
> >>>>
> >>>> There is currently no support for the multicast/broadcast aspects
> >>>> of VXLAN in ovs. To get around the lack of multicast support, it is possible to
> >>>> pre-provision MAC to IP address mappings either manually or from a controller.
> >>>>
> >>>> With this patch we can achieve this through the fdb of the lower vxlan
> >>>> device.
> >>>>
> >>>> For example. three severs connects with vxlan.
> >>>> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
> >>>> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
> >>>> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
> >>>>
> >>>> All the broadcast arp request from server1, can be send to vxlan_sys_4789
> >>>> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
> >>>> the fdb table in the vxlan device as following:
> >>>>
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
> >>>>         src_vni 1000 vni 1000 self
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> >>>> src_vni 1000 vni 1000 self
> >>>>
> >>>>
> >>>> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
> >>>> table of lower vxlan device.
> >>> Have you tried OVS mac learning?
> >>>
> >> The key point is that it makes ovs vxlan tunnel can make use of the fdb table of lower vxlan device.
> >>
> >> The fdb table can be configurable or mac learning from outside.
> >>
> >> For the broadcast example.  In the ovs, it can only achieve this through multiple output actions to simulate the broadcast.
> >>
> >> ovs-ofctl add-flow br0 in_port=server1,dl_dst=,ff:ff:ff:ff:ff:ffactions=set_field:172.168.0.1->tun_dst,output:vxlan,\
> >>
> >>     set_field:172.168.0.2->tun_dst,output:vxlan.
> >>
> >> But there are some limits for the number of output actions.
> >>
> > I was referring to mac-learning feature in OVS i.e. using learn
> > action. I wanted to see if there is something that you are not able to
> > do with OVS learn action.
> >
> Ovs mac learn action is only work for the specific vxlan tunnel port( fixed tun_dst, tun_id) like following.
>
> ovs-vsctl set in vxlan options:remote_ip=172.168.0.1 options:key=1000
>
> ( This is the same problem for Linux bridge, It achieve this through IP_TUNNEL_INFO_BRIDGE mode work
>
> with the fdb of lower vxlan device)
>
>
> But it is not work for the flow based tunnel (remote_ip=flow),  There will be huge number of the tunnel peer.
One question, why do you flood the ff:ff:ff:ff:ff:ff packets to  many
tunnel peer, in the cloud, this should be avoid(e.g arp). Because we
use the control plane to control the packets to vm/host related, not
all vm/host
> It' hard to manage the tunnel port with the specific mode.
>
>
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
wenxu March 25, 2019, 3:22 a.m. UTC | #11
On 3/25/2019 9:47 AM, Tonghao Zhang wrote:
> On Mon, Mar 25, 2019 at 9:24 AM wenxu <wenxu@ucloud.cn> wrote:
>> On 2019/3/25 上午2:46, Pravin Shelar wrote:
>>> On Sun, Mar 24, 2019 at 12:03 AM wenxu <wenxu@ucloud.cn> wrote:
>>>> On 2019/3/24 上午5:39, Pravin Shelar wrote:
>>>>> On Sat, Mar 23, 2019 at 2:18 AM wenxu <wenxu@ucloud.cn> wrote:
>>>>>> On 2019/3/23 下午3:50, Pravin Shelar wrote:
>>>>>>
>>>>>> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
>>>>>>
>>>>>> From: wenxu <wenxu@ucloud.cn>
>>>>>>
>>>>>> There is currently no support for the multicasti/broadcst aspects
>>>>>> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
>>>>>> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
>>>>>> And the packet can forward through the fdb of vxlan devcice. In
>>>>>> this mode the broadcast/multicast packet can be sent through the
>>>>>> following ways in ovs.
>>>>>>
>>>>>> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
>>>>>>         options:key=1000 options:remote_ip=flow
>>>>>> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
>>>>>>         action=output:vxlan
>>>>>>
>>>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>>>>>         src_vni 1000 vni 1000 self
>>>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>>>>>> src_vni 1000 vni 1000 self
>>>>>>
>>>>>> This would make datapath bit complicated, can you give example of such use-case?
>>>>>>
>>>>>> There is currently no support for the multicast/broadcast aspects
>>>>>> of VXLAN in ovs. To get around the lack of multicast support, it is possible to
>>>>>> pre-provision MAC to IP address mappings either manually or from a controller.
>>>>>>
>>>>>> With this patch we can achieve this through the fdb of the lower vxlan
>>>>>> device.
>>>>>>
>>>>>> For example. three severs connects with vxlan.
>>>>>> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
>>>>>> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
>>>>>> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
>>>>>>
>>>>>> All the broadcast arp request from server1, can be send to vxlan_sys_4789
>>>>>> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
>>>>>> the fdb table in the vxlan device as following:
>>>>>>
>>>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
>>>>>>         src_vni 1000 vni 1000 self
>>>>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
>>>>>> src_vni 1000 vni 1000 self
>>>>>>
>>>>>>
>>>>>> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
>>>>>> table of lower vxlan device.
>>>>> Have you tried OVS mac learning?
>>>>>
>>>> The key point is that it makes ovs vxlan tunnel can make use of the fdb table of lower vxlan device.
>>>>
>>>> The fdb table can be configurable or mac learning from outside.
>>>>
>>>> For the broadcast example.  In the ovs, it can only achieve this through multiple output actions to simulate the broadcast.
>>>>
>>>> ovs-ofctl add-flow br0 in_port=server1,dl_dst=,ff:ff:ff:ff:ff:ffactions=set_field:172.168.0.1->tun_dst,output:vxlan,\
>>>>
>>>>     set_field:172.168.0.2->tun_dst,output:vxlan.
>>>>
>>>> But there are some limits for the number of output actions.
>>>>
>>> I was referring to mac-learning feature in OVS i.e. using learn
>>> action. I wanted to see if there is something that you are not able to
>>> do with OVS learn action.
>>>
>> Ovs mac learn action is only work for the specific vxlan tunnel port( fixed tun_dst, tun_id) like following.
>>
>> ovs-vsctl set in vxlan options:remote_ip=172.168.0.1 options:key=1000
>>
>> ( This is the same problem for Linux bridge, It achieve this through IP_TUNNEL_INFO_BRIDGE mode work
>>
>> with the fdb of lower vxlan device)
>>
>>
>> But it is not work for the flow based tunnel (remote_ip=flow),  There will be huge number of the tunnel peer.
> One question, why do you flood the ff:ff:ff:ff:ff:ff packets to  many
> tunnel peer, in the cloud, this should be avoid(e.g arp). Because we
> use the control plane to control the packets to vm/host related, not
> all vm/host
>
> It' not only for arp, Some user also needs the IP broadcast service. Anyway it's a use-case.
Pravin Shelar March 25, 2019, 7:22 p.m. UTC | #12
On Sun, Mar 24, 2019 at 6:24 PM wenxu <wenxu@ucloud.cn> wrote:
>
> On 2019/3/25 上午2:46, Pravin Shelar wrote:
> > On Sun, Mar 24, 2019 at 12:03 AM wenxu <wenxu@ucloud.cn> wrote:
> >> On 2019/3/24 上午5:39, Pravin Shelar wrote:
> >>> On Sat, Mar 23, 2019 at 2:18 AM wenxu <wenxu@ucloud.cn> wrote:
> >>>> On 2019/3/23 下午3:50, Pravin Shelar wrote:
> >>>>
> >>>> On Thu, Mar 21, 2019 at 3:34 AM <wenxu@ucloud.cn> wrote:
> >>>>
> >>>> From: wenxu <wenxu@ucloud.cn>
> >>>>
> >>>> There is currently no support for the multicasti/broadcst aspects
> >>>> of VXLAN in ovs. In the datapath flow the tun_dst must specific.
> >>>> But in the IP_TUNNEL_INFO_BRIDGE mode the tun_dst can not be specific.
> >>>> And the packet can forward through the fdb of vxlan devcice. In
> >>>> this mode the broadcast/multicast packet can be sent through the
> >>>> following ways in ovs.
> >>>>
> >>>> ovs-vsctl add-port br0 vxlan -- set in vxlan type=vxlan \
> >>>>         options:key=1000 options:remote_ip=flow
> >>>> ovs-ofctl add-flow br0 in_port=LOCAL,dl_dst=ff:ff:ff:ff:ff:ff,\
> >>>>         action=output:vxlan
> >>>>
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
> >>>>         src_vni 1000 vni 1000 self
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> >>>> src_vni 1000 vni 1000 self
> >>>>
> >>>> This would make datapath bit complicated, can you give example of such use-case?
> >>>>
> >>>> There is currently no support for the multicast/broadcast aspects
> >>>> of VXLAN in ovs. To get around the lack of multicast support, it is possible to
> >>>> pre-provision MAC to IP address mappings either manually or from a controller.
> >>>>
> >>>> With this patch we can achieve this through the fdb of the lower vxlan
> >>>> device.
> >>>>
> >>>> For example. three severs connects with vxlan.
> >>>> server1 IP 10.0.0.1 tunnel IP  172.168.0.1 vni 1000
> >>>> server2 IP 10.0.0.2 tunnel IP  172.168.0.2 vni 1000
> >>>> server3 IP 10.0.0.3 tunnel IP  172.168.0.3 vni 1000
> >>>>
> >>>> All the broadcast arp request from server1, can be send to vxlan_sys_4789
> >>>> in IP_TUNNEL_INFO_BRIDGE mode. Then the broadcast packet can send through
> >>>> the fdb table in the vxlan device as following:
> >>>>
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.1 \
> >>>>         src_vni 1000 vni 1000 self
> >>>> bridge fdb append ff:ff:ff:ff:ff:ff dev vxlan_sys_4789 dst 172.168.0.2 \
> >>>> src_vni 1000 vni 1000 self
> >>>>
> >>>>
> >>>> Not any for multicast case. This patch make ovs vxlan tunnel using the fdb
> >>>> table of lower vxlan device.
> >>> Have you tried OVS mac learning?
> >>>
> >> The key point is that it makes ovs vxlan tunnel can make use of the fdb table of lower vxlan device.
> >>
> >> The fdb table can be configurable or mac learning from outside.
> >>
> >> For the broadcast example.  In the ovs, it can only achieve this through multiple output actions to simulate the broadcast.
> >>
> >> ovs-ofctl add-flow br0 in_port=server1,dl_dst=ff:ff:ff:ff:ff:ff,actions=set_field:172.168.0.1->tun_dst,output:vxlan,\
> >>
> >>     set_field:172.168.0.2->tun_dst,output:vxlan.
> >>
> >> But there are some limits for the number of output actions.
> >>
> > I was referring to mac-learning feature in OVS i.e. using learn
> > action. I wanted to see if there is something that you are not able to
> > do with OVS learn action.
> >
> Ovs mac learn action is only work for the specific vxlan tunnel port( fixed tun_dst, tun_id) like following.
>
> ovs-vsctl set in vxlan options:remote_ip=172.168.0.1 options:key=1000
>
> ( This is the same problem for Linux bridge, It achieve this through IP_TUNNEL_INFO_BRIDGE mode work
>
> with the fdb of lower vxlan device)
>
>
> But it is not work for the flow based tunnel (remote_ip=flow),  There will be huge number of the tunnel peer.
>
> It' hard to manage the tunnel port with the specific mode.
>
>
OK, So it is hard to use ovs learn action, but it is doable. Given
IP_TUNNEL_INFO_BRIDGE is not adding too much complexity to OVS, I am
fine adding this feature.

Thanks.
diff mbox series

Patch

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index dbe0cbe..696a308 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -364,6 +364,7 @@  enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_IPV6_DST,		/* struct in6_addr dst IPv6 address. */
 	OVS_TUNNEL_KEY_ATTR_PAD,
 	OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,	/* struct erspan_metadata */
+	OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST,	/* No argument. No dst IP address. */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 691da85..033df5c 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -403,6 +403,7 @@  size_t ovs_key_attr_size(void)
 	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
 	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
 	[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = OVS_ATTR_VARIABLE },
+	[OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST]   = { .len = 0 },
 };
 
 static const struct ovs_len_tbl
@@ -663,7 +664,7 @@  static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
 
 static int ip_tun_from_nlattr(const struct nlattr *attr,
 			      struct sw_flow_match *match, bool is_mask,
-			      bool log)
+			      bool log, bool *no_ipv4_dst)
 {
 	bool ttl = false, ipv4 = false, ipv6 = false;
 	__be16 tun_flags = 0;
@@ -671,6 +672,9 @@  static int ip_tun_from_nlattr(const struct nlattr *attr,
 	struct nlattr *a;
 	int rem;
 
+	if (no_ipv4_dst)
+		*no_ipv4_dst = false;
+
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
 		int err;
@@ -782,6 +786,12 @@  static int ip_tun_from_nlattr(const struct nlattr *attr,
 			tun_flags |= TUNNEL_ERSPAN_OPT;
 			opts_type = type;
 			break;
+		case OVS_TUNNEL_KEY_ATTR_NO_IPV4_DST:
+			if (no_ipv4_dst) {
+				*no_ipv4_dst = true;
+				ipv4 = true;
+			}
+			break;
 		default:
 			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 				  type);
@@ -812,9 +822,16 @@  static int ip_tun_from_nlattr(const struct nlattr *attr,
 			OVS_NLERR(log, "IP tunnel dst address not specified");
 			return -EINVAL;
 		}
-		if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
-			OVS_NLERR(log, "IPv4 tunnel dst address is zero");
-			return -EINVAL;
+		if (ipv4) {
+			bool no_dst = no_ipv4_dst ? *no_ipv4_dst : false;
+
+			if (no_dst && match->key->tun_key.u.ipv4.dst) {
+				OVS_NLERR(log, "IPv4 tunnel dst address is not zero");
+				return -EINVAL;
+			} else if (!no_dst && !match->key->tun_key.u.ipv4.dst) {
+				OVS_NLERR(log, "IPv4 tunnel dst address is zero");
+				return -EINVAL;
+			}
 		}
 		if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
 			OVS_NLERR(log, "IPv6 tunnel dst address is zero");
@@ -1178,7 +1195,7 @@  static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 	}
 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 		if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-				       is_mask, log) < 0)
+				       is_mask, log, NULL) < 0)
 			return -EINVAL;
 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 	}
@@ -2551,10 +2568,11 @@  static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct nlattr *a;
 	int err = 0, start, opts_type;
 	__be16 dst_opt_type;
+	bool no_ipv4_dst;
 
 	dst_opt_type = 0;
 	ovs_match_init(&match, &key, true, NULL);
-	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
+	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log, &no_ipv4_dst);
 	if (opts_type < 0)
 		return opts_type;
 
@@ -2605,6 +2623,8 @@  static int validate_and_copy_set_tun(const struct nlattr *attr,
 	tun_info->mode = IP_TUNNEL_INFO_TX;
 	if (key.tun_proto == AF_INET6)
 		tun_info->mode |= IP_TUNNEL_INFO_IPV6;
+	else if (key.tun_proto == AF_INET && no_ipv4_dst)
+		tun_info->mode |= IP_TUNNEL_INFO_BRIDGE;
 	tun_info->key = key.tun_key;
 
 	/* We need to store the options in the action itself since