diff mbox series

[ovs-dev,08/11] netdev-offload-dpdk: Support tnl/push using vxlan encap attribute

Message ID 20200518154026.18059-9-elibr@mellanox.com
State Changes Requested
Headers show
Series netdev datapath offload: Support IPv6 and VXLAN encap | expand

Commit Message

Eli Britstein May 18, 2020, 3:40 p.m. UTC
For DPDK, there is the RAW_ENCAP attribute which gets raw buffer of the
encapsulation header. For specific protocol, such as vxlan, there is a
more specific attribute, VXLAN_ENCAP, which gets the parsed fields of
the outer header. In case tunnel type is vxlan, parse the header
and use the specific attribute, with fallback to RAW_ENCAP.

Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roni Bar Yanai <roniba@mellanox.com>
---
 lib/netdev-offload-dpdk.c | 123 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 121 insertions(+), 2 deletions(-)

Comments

Sriharsha Basavapatna May 20, 2020, 9:12 a.m. UTC | #1
On Mon, May 18, 2020 at 9:10 PM Eli Britstein <elibr@mellanox.com> wrote:
>
> For DPDK, there is the RAW_ENCAP attribute which gets raw buffer of the
> encapsulation header. For specific protocol, such as vxlan, there is a
> more specific attribute, VXLAN_ENCAP, which gets the parsed fields of
> the outer header. In case tunnel type is vxlan, parse the header
> and use the specific attribute, with fallback to RAW_ENCAP.
>
> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> Reviewed-by: Roni Bar Yanai <roniba@mellanox.com>
> ---
>  lib/netdev-offload-dpdk.c | 123 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 121 insertions(+), 2 deletions(-)
>
> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> index 38f8f00ee..de313e40b 100644
> --- a/lib/netdev-offload-dpdk.c
> +++ b/lib/netdev-offload-dpdk.c
> @@ -359,6 +359,25 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
>          } else {
>              ds_put_cstr(s, "  Mask = null\n");
>          }
> +    } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
> +        const struct rte_flow_item_vxlan *vxlan_spec = item->spec;
> +        const struct rte_flow_item_vxlan *vxlan_mask = item->mask;
> +
> +        ds_put_cstr(s, "rte flow vxlan pattern:\n");
> +        if (vxlan_spec) {
> +            ds_put_format(s, "  Spec: flags=0x%x, vni=%"PRIu32"\n",
> +                          vxlan_spec->flags,
> +                          ntohl(*(ovs_be32 *)vxlan_spec->vni) >> 8);
> +        } else {
> +            ds_put_cstr(s, "  Spec = null\n");
> +        }
> +        if (vxlan_mask) {
> +            ds_put_format(s, "  Mask: flags=0x%x, vni=0x%06"PRIx32"\n",
> +                          vxlan_mask->flags,
> +                          ntohl(*(ovs_be32 *)vxlan_mask->vni) >> 8);
> +        } else {
> +            ds_put_cstr(s, "  Mask = null\n");
> +        }
>      } else {
>          ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
>      }
> @@ -486,6 +505,14 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions)
>          } else {
>              ds_put_cstr(s, "  Raw-encap = null\n");
>          }
> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
> +        const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
> +        const struct rte_flow_item *items = vxlan_encap->definition;
> +
> +        ds_put_cstr(s, "rte flow vxlan-encap action:\n");
> +        while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
> +            dump_flow_pattern(s, items++);
> +        }
>      } else {
>          ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>      }
> @@ -1129,6 +1156,93 @@ parse_set_actions(struct flow_actions *actions,
>      return 0;
>  }
>
> +/* Maximum number of items in struct rte_flow_action_vxlan_encap.
> + * ETH / IPv4(6) / UDP / VXLAN / END
> + */
> +#define ACTION_VXLAN_ENCAP_ITEMS_NUM 5

Max number of items as per rte_flow.h is 6 (including vlan header).

> +
> +static int
> +add_vxlan_encap_action(struct flow_actions *actions,
> +                       const void *header)
> +{
> +    const struct eth_header *eth;
> +    const struct udp_header *udp;
> +    struct vxlan_data {
> +        struct rte_flow_action_vxlan_encap conf;
> +        struct rte_flow_item items[0];
> +    } *vxlan_data;
> +    BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
> +    const void *vxlan;
> +    const void *l3;
> +    const void *l4;
> +    int field;
> +
> +    vxlan_data = xzalloc(sizeof *vxlan_data +
> +                         sizeof(struct rte_flow_item) *
> +                         ACTION_VXLAN_ENCAP_ITEMS_NUM);
> +    field = 0;
> +
> +    eth = header;

Copy the header provided by OVS (nlattr) before passing it to RTE ?
also the headers are being set directly as the 'spec' below, as
opposed to rte_flow_item structures for each item below. This is ok
since these are standard headers and so the formats match.  Maybe type
cast it to the respective rte_flow_item type at least while assigning
to indicate it explicitly ?  I'm fine with the rest of the changes in
this patch.

Thanks,
-Harsha


> +    /* Ethernet */
> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
> +    vxlan_data->items[field].spec = eth;
> +    vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
> +    field++;
> +
> +    l3 = eth + 1;
> +    /* IP */
> +    if (eth->eth_type == htons(ETH_TYPE_IP)) {
> +        /* IPv4 */
> +        const struct ip_header *ip = l3;
> +
> +        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
> +        vxlan_data->items[field].spec = ip;
> +        vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
> +
> +        if (ip->ip_proto != IPPROTO_UDP) {
> +            goto err;
> +        }
> +        l4 = (ip + 1);
> +    } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
> +        const struct ovs_16aligned_ip6_hdr *ip6 = l3;
> +
> +        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
> +        vxlan_data->items[field].spec = ip6;
> +        vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
> +
> +        if (ip6->ip6_nxt != IPPROTO_UDP) {
> +            goto err;
> +        }
> +        l4 = (ip6 + 1);
> +    } else {
> +        goto err;
> +    }
> +    field++;
> +
> +    udp = (const struct udp_header *)l4;
> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
> +    vxlan_data->items[field].spec = udp;
> +    vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
> +    field++;
> +
> +    vxlan = (udp + 1);
> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
> +    vxlan_data->items[field].spec = vxlan;
> +    vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
> +    field++;
> +
> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
> +
> +    vxlan_data->conf.definition = vxlan_data->items;
> +
> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
> +
> +    return 0;
> +err:
> +    free(vxlan_data);
> +    return -1;
> +}
> +
>  static int
>  parse_clone_actions(struct netdev *netdev,
>                      struct flow_actions *actions,
> @@ -1143,9 +1257,14 @@ parse_clone_actions(struct netdev *netdev,
>
>          if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
>              const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
> -            struct rte_flow_action_raw_encap *raw_encap =
> -                xzalloc(sizeof *raw_encap);
> +            struct rte_flow_action_raw_encap *raw_encap;
> +
> +            if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
> +                !add_vxlan_encap_action(actions, tnl_push->header)) {
> +                continue;
> +            }
>
> +            raw_encap = xzalloc(sizeof *raw_encap);
>              raw_encap->data = (uint8_t *)tnl_push->header;
>              raw_encap->preserve = NULL;
>              raw_encap->size = tnl_push->header_len;
> --
> 2.14.5
>
Eli Britstein May 20, 2020, 10:08 a.m. UTC | #2
On 5/20/2020 12:12 PM, Sriharsha Basavapatna wrote:
> On Mon, May 18, 2020 at 9:10 PM Eli Britstein <elibr@mellanox.com> wrote:
>> For DPDK, there is the RAW_ENCAP attribute which gets raw buffer of the
>> encapsulation header. For specific protocol, such as vxlan, there is a
>> more specific attribute, VXLAN_ENCAP, which gets the parsed fields of
>> the outer header. In case tunnel type is vxlan, parse the header
>> and use the specific attribute, with fallback to RAW_ENCAP.
>>
>> Signed-off-by: Eli Britstein <elibr@mellanox.com>
>> Reviewed-by: Roni Bar Yanai <roniba@mellanox.com>
>> ---
>>   lib/netdev-offload-dpdk.c | 123 +++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 121 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
>> index 38f8f00ee..de313e40b 100644
>> --- a/lib/netdev-offload-dpdk.c
>> +++ b/lib/netdev-offload-dpdk.c
>> @@ -359,6 +359,25 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
>>           } else {
>>               ds_put_cstr(s, "  Mask = null\n");
>>           }
>> +    } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
>> +        const struct rte_flow_item_vxlan *vxlan_spec = item->spec;
>> +        const struct rte_flow_item_vxlan *vxlan_mask = item->mask;
>> +
>> +        ds_put_cstr(s, "rte flow vxlan pattern:\n");
>> +        if (vxlan_spec) {
>> +            ds_put_format(s, "  Spec: flags=0x%x, vni=%"PRIu32"\n",
>> +                          vxlan_spec->flags,
>> +                          ntohl(*(ovs_be32 *)vxlan_spec->vni) >> 8);
>> +        } else {
>> +            ds_put_cstr(s, "  Spec = null\n");
>> +        }
>> +        if (vxlan_mask) {
>> +            ds_put_format(s, "  Mask: flags=0x%x, vni=0x%06"PRIx32"\n",
>> +                          vxlan_mask->flags,
>> +                          ntohl(*(ovs_be32 *)vxlan_mask->vni) >> 8);
>> +        } else {
>> +            ds_put_cstr(s, "  Mask = null\n");
>> +        }
>>       } else {
>>           ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
>>       }
>> @@ -486,6 +505,14 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions)
>>           } else {
>>               ds_put_cstr(s, "  Raw-encap = null\n");
>>           }
>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
>> +        const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
>> +        const struct rte_flow_item *items = vxlan_encap->definition;
>> +
>> +        ds_put_cstr(s, "rte flow vxlan-encap action:\n");
>> +        while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
>> +            dump_flow_pattern(s, items++);
>> +        }
>>       } else {
>>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
>>       }
>> @@ -1129,6 +1156,93 @@ parse_set_actions(struct flow_actions *actions,
>>       return 0;
>>   }
>>
>> +/* Maximum number of items in struct rte_flow_action_vxlan_encap.
>> + * ETH / IPv4(6) / UDP / VXLAN / END
>> + */
>> +#define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
> Max number of items as per rte_flow.h is 6 (including vlan header).

That's right from DPDK point of view. However, I didn't see OVS-DPDK 
supports that.

I followed format_odp_tnl_push_header function in lib/odp-util.c:8591. 
If VLAN is supported somehow, it will fallback to the raw_encap method.

>> +
>> +static int
>> +add_vxlan_encap_action(struct flow_actions *actions,
>> +                       const void *header)
>> +{
>> +    const struct eth_header *eth;
>> +    const struct udp_header *udp;
>> +    struct vxlan_data {
>> +        struct rte_flow_action_vxlan_encap conf;
>> +        struct rte_flow_item items[0];
>> +    } *vxlan_data;
>> +    BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
>> +    const void *vxlan;
>> +    const void *l3;
>> +    const void *l4;
>> +    int field;
>> +
>> +    vxlan_data = xzalloc(sizeof *vxlan_data +
>> +                         sizeof(struct rte_flow_item) *
>> +                         ACTION_VXLAN_ENCAP_ITEMS_NUM);
>> +    field = 0;
>> +
>> +    eth = header;
> Copy the header provided by OVS (nlattr) before passing it to RTE ?
There is no need. See comment in previous commit.
> also the headers are being set directly as the 'spec' below, as
> opposed to rte_flow_item structures for each item below. This is ok
> since these are standard headers and so the formats match.  Maybe type
> cast it to the respective rte_flow_item type at least while assigning
> to indicate it explicitly ?  I'm fine with the rest of the changes in
> this patch.
"spec" and "mask" are "const void *" by DPDK. what will such casting serve?
>
> Thanks,
> -Harsha
>
>
>> +    /* Ethernet */
>> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
>> +    vxlan_data->items[field].spec = eth;
>> +    vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
>> +    field++;
>> +
>> +    l3 = eth + 1;
>> +    /* IP */
>> +    if (eth->eth_type == htons(ETH_TYPE_IP)) {
>> +        /* IPv4 */
>> +        const struct ip_header *ip = l3;
>> +
>> +        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
>> +        vxlan_data->items[field].spec = ip;
>> +        vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
>> +
>> +        if (ip->ip_proto != IPPROTO_UDP) {
>> +            goto err;
>> +        }
>> +        l4 = (ip + 1);
>> +    } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
>> +        const struct ovs_16aligned_ip6_hdr *ip6 = l3;
>> +
>> +        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
>> +        vxlan_data->items[field].spec = ip6;
>> +        vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
>> +
>> +        if (ip6->ip6_nxt != IPPROTO_UDP) {
>> +            goto err;
>> +        }
>> +        l4 = (ip6 + 1);
>> +    } else {
>> +        goto err;
>> +    }
>> +    field++;
>> +
>> +    udp = (const struct udp_header *)l4;
>> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
>> +    vxlan_data->items[field].spec = udp;
>> +    vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
>> +    field++;
>> +
>> +    vxlan = (udp + 1);
>> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
>> +    vxlan_data->items[field].spec = vxlan;
>> +    vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
>> +    field++;
>> +
>> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
>> +
>> +    vxlan_data->conf.definition = vxlan_data->items;
>> +
>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
>> +
>> +    return 0;
>> +err:
>> +    free(vxlan_data);
>> +    return -1;
>> +}
>> +
>>   static int
>>   parse_clone_actions(struct netdev *netdev,
>>                       struct flow_actions *actions,
>> @@ -1143,9 +1257,14 @@ parse_clone_actions(struct netdev *netdev,
>>
>>           if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
>>               const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
>> -            struct rte_flow_action_raw_encap *raw_encap =
>> -                xzalloc(sizeof *raw_encap);
>> +            struct rte_flow_action_raw_encap *raw_encap;
>> +
>> +            if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
>> +                !add_vxlan_encap_action(actions, tnl_push->header)) {
>> +                continue;
>> +            }
>>
>> +            raw_encap = xzalloc(sizeof *raw_encap);
>>               raw_encap->data = (uint8_t *)tnl_push->header;
>>               raw_encap->preserve = NULL;
>>               raw_encap->size = tnl_push->header_len;
>> --
>> 2.14.5
>>
Sriharsha Basavapatna May 20, 2020, 6:17 p.m. UTC | #3
On Wed, May 20, 2020 at 3:38 PM Eli Britstein <elibr@mellanox.com> wrote:
>
>
> On 5/20/2020 12:12 PM, Sriharsha Basavapatna wrote:
> > On Mon, May 18, 2020 at 9:10 PM Eli Britstein <elibr@mellanox.com> wrote:
> >> For DPDK, there is the RAW_ENCAP attribute which gets raw buffer of the
> >> encapsulation header. For specific protocol, such as vxlan, there is a
> >> more specific attribute, VXLAN_ENCAP, which gets the parsed fields of
> >> the outer header. In case tunnel type is vxlan, parse the header
> >> and use the specific attribute, with fallback to RAW_ENCAP.
> >>
> >> Signed-off-by: Eli Britstein <elibr@mellanox.com>
> >> Reviewed-by: Roni Bar Yanai <roniba@mellanox.com>
> >> ---
> >>   lib/netdev-offload-dpdk.c | 123 +++++++++++++++++++++++++++++++++++++++++++++-
> >>   1 file changed, 121 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
> >> index 38f8f00ee..de313e40b 100644
> >> --- a/lib/netdev-offload-dpdk.c
> >> +++ b/lib/netdev-offload-dpdk.c
> >> @@ -359,6 +359,25 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
> >>           } else {
> >>               ds_put_cstr(s, "  Mask = null\n");
> >>           }
> >> +    } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
> >> +        const struct rte_flow_item_vxlan *vxlan_spec = item->spec;
> >> +        const struct rte_flow_item_vxlan *vxlan_mask = item->mask;
> >> +
> >> +        ds_put_cstr(s, "rte flow vxlan pattern:\n");
> >> +        if (vxlan_spec) {
> >> +            ds_put_format(s, "  Spec: flags=0x%x, vni=%"PRIu32"\n",
> >> +                          vxlan_spec->flags,
> >> +                          ntohl(*(ovs_be32 *)vxlan_spec->vni) >> 8);
> >> +        } else {
> >> +            ds_put_cstr(s, "  Spec = null\n");
> >> +        }
> >> +        if (vxlan_mask) {
> >> +            ds_put_format(s, "  Mask: flags=0x%x, vni=0x%06"PRIx32"\n",
> >> +                          vxlan_mask->flags,
> >> +                          ntohl(*(ovs_be32 *)vxlan_mask->vni) >> 8);
> >> +        } else {
> >> +            ds_put_cstr(s, "  Mask = null\n");
> >> +        }
> >>       } else {
> >>           ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
> >>       }
> >> @@ -486,6 +505,14 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions)
> >>           } else {
> >>               ds_put_cstr(s, "  Raw-encap = null\n");
> >>           }
> >> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
> >> +        const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
> >> +        const struct rte_flow_item *items = vxlan_encap->definition;
> >> +
> >> +        ds_put_cstr(s, "rte flow vxlan-encap action:\n");
> >> +        while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
> >> +            dump_flow_pattern(s, items++);
> >> +        }
> >>       } else {
> >>           ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
> >>       }
> >> @@ -1129,6 +1156,93 @@ parse_set_actions(struct flow_actions *actions,
> >>       return 0;
> >>   }
> >>
> >> +/* Maximum number of items in struct rte_flow_action_vxlan_encap.
> >> + * ETH / IPv4(6) / UDP / VXLAN / END
> >> + */
> >> +#define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
> > Max number of items as per rte_flow.h is 6 (including vlan header).
>
> That's right from DPDK point of view. However, I didn't see OVS-DPDK
> supports that.
>
> I followed format_odp_tnl_push_header function in lib/odp-util.c:8591.
> If VLAN is supported somehow, it will fallback to the raw_encap method.
>
> >> +
> >> +static int
> >> +add_vxlan_encap_action(struct flow_actions *actions,
> >> +                       const void *header)
> >> +{
> >> +    const struct eth_header *eth;
> >> +    const struct udp_header *udp;
> >> +    struct vxlan_data {
> >> +        struct rte_flow_action_vxlan_encap conf;
> >> +        struct rte_flow_item items[0];
> >> +    } *vxlan_data;
> >> +    BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
> >> +    const void *vxlan;
> >> +    const void *l3;
> >> +    const void *l4;
> >> +    int field;
> >> +
> >> +    vxlan_data = xzalloc(sizeof *vxlan_data +
> >> +                         sizeof(struct rte_flow_item) *
> >> +                         ACTION_VXLAN_ENCAP_ITEMS_NUM);
> >> +    field = 0;
> >> +
> >> +    eth = header;
> > Copy the header provided by OVS (nlattr) before passing it to RTE ?
> There is no need. See comment in previous commit.
> > also the headers are being set directly as the 'spec' below, as
> > opposed to rte_flow_item structures for each item below. This is ok
> > since these are standard headers and so the formats match.  Maybe type
> > cast it to the respective rte_flow_item type at least while assigning
> > to indicate it explicitly ?  I'm fine with the rest of the changes in
> > this patch.
> "spec" and "mask" are "const void *" by DPDK. what will such casting serve?
I was thinking they are pointers to specific data types (e.g
rte_flow_item_eth *), no need to change it then.
> >
> > Thanks,
> > -Harsha
> >
> >
> >> +    /* Ethernet */
> >> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
> >> +    vxlan_data->items[field].spec = eth;
> >> +    vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
> >> +    field++;
> >> +
> >> +    l3 = eth + 1;
> >> +    /* IP */
> >> +    if (eth->eth_type == htons(ETH_TYPE_IP)) {
> >> +        /* IPv4 */
> >> +        const struct ip_header *ip = l3;
> >> +
> >> +        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
> >> +        vxlan_data->items[field].spec = ip;
> >> +        vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
> >> +
> >> +        if (ip->ip_proto != IPPROTO_UDP) {
> >> +            goto err;
> >> +        }
> >> +        l4 = (ip + 1);
> >> +    } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
> >> +        const struct ovs_16aligned_ip6_hdr *ip6 = l3;
> >> +
> >> +        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
> >> +        vxlan_data->items[field].spec = ip6;
> >> +        vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
> >> +
> >> +        if (ip6->ip6_nxt != IPPROTO_UDP) {
> >> +            goto err;
> >> +        }
> >> +        l4 = (ip6 + 1);
> >> +    } else {
> >> +        goto err;
> >> +    }
> >> +    field++;
> >> +
> >> +    udp = (const struct udp_header *)l4;
> >> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
> >> +    vxlan_data->items[field].spec = udp;
> >> +    vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
> >> +    field++;
> >> +
> >> +    vxlan = (udp + 1);
> >> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
> >> +    vxlan_data->items[field].spec = vxlan;
> >> +    vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
> >> +    field++;
> >> +
> >> +    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
> >> +
> >> +    vxlan_data->conf.definition = vxlan_data->items;
> >> +
> >> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
> >> +
> >> +    return 0;
> >> +err:
> >> +    free(vxlan_data);
> >> +    return -1;
> >> +}
> >> +
> >>   static int
> >>   parse_clone_actions(struct netdev *netdev,
> >>                       struct flow_actions *actions,
> >> @@ -1143,9 +1257,14 @@ parse_clone_actions(struct netdev *netdev,
> >>
> >>           if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
> >>               const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
> >> -            struct rte_flow_action_raw_encap *raw_encap =
> >> -                xzalloc(sizeof *raw_encap);
> >> +            struct rte_flow_action_raw_encap *raw_encap;
> >> +
> >> +            if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
> >> +                !add_vxlan_encap_action(actions, tnl_push->header)) {
> >> +                continue;
> >> +            }
> >>
> >> +            raw_encap = xzalloc(sizeof *raw_encap);
> >>               raw_encap->data = (uint8_t *)tnl_push->header;
> >>               raw_encap->preserve = NULL;
> >>               raw_encap->size = tnl_push->header_len;
> >> --
> >> 2.14.5
> >>
diff mbox series

Patch

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 38f8f00ee..de313e40b 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -359,6 +359,25 @@  dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
         } else {
             ds_put_cstr(s, "  Mask = null\n");
         }
+    } else if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+        const struct rte_flow_item_vxlan *vxlan_spec = item->spec;
+        const struct rte_flow_item_vxlan *vxlan_mask = item->mask;
+
+        ds_put_cstr(s, "rte flow vxlan pattern:\n");
+        if (vxlan_spec) {
+            ds_put_format(s, "  Spec: flags=0x%x, vni=%"PRIu32"\n",
+                          vxlan_spec->flags,
+                          ntohl(*(ovs_be32 *)vxlan_spec->vni) >> 8);
+        } else {
+            ds_put_cstr(s, "  Spec = null\n");
+        }
+        if (vxlan_mask) {
+            ds_put_format(s, "  Mask: flags=0x%x, vni=0x%06"PRIx32"\n",
+                          vxlan_mask->flags,
+                          ntohl(*(ovs_be32 *)vxlan_mask->vni) >> 8);
+        } else {
+            ds_put_cstr(s, "  Mask = null\n");
+        }
     } else {
         ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
     }
@@ -486,6 +505,14 @@  dump_flow_action(struct ds *s, const struct rte_flow_action *actions)
         } else {
             ds_put_cstr(s, "  Raw-encap = null\n");
         }
+    } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
+        const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
+        const struct rte_flow_item *items = vxlan_encap->definition;
+
+        ds_put_cstr(s, "rte flow vxlan-encap action:\n");
+        while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
+            dump_flow_pattern(s, items++);
+        }
     } else {
         ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
     }
@@ -1129,6 +1156,93 @@  parse_set_actions(struct flow_actions *actions,
     return 0;
 }
 
+/* Maximum number of items in struct rte_flow_action_vxlan_encap.
+ * ETH / IPv4(6) / UDP / VXLAN / END
+ */
+#define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
+
+static int
+add_vxlan_encap_action(struct flow_actions *actions,
+                       const void *header)
+{
+    const struct eth_header *eth;
+    const struct udp_header *udp;
+    struct vxlan_data {
+        struct rte_flow_action_vxlan_encap conf;
+        struct rte_flow_item items[0];
+    } *vxlan_data;
+    BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
+    const void *vxlan;
+    const void *l3;
+    const void *l4;
+    int field;
+
+    vxlan_data = xzalloc(sizeof *vxlan_data +
+                         sizeof(struct rte_flow_item) *
+                         ACTION_VXLAN_ENCAP_ITEMS_NUM);
+    field = 0;
+
+    eth = header;
+    /* Ethernet */
+    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
+    vxlan_data->items[field].spec = eth;
+    vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
+    field++;
+
+    l3 = eth + 1;
+    /* IP */
+    if (eth->eth_type == htons(ETH_TYPE_IP)) {
+        /* IPv4 */
+        const struct ip_header *ip = l3;
+
+        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
+        vxlan_data->items[field].spec = ip;
+        vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
+
+        if (ip->ip_proto != IPPROTO_UDP) {
+            goto err;
+        }
+        l4 = (ip + 1);
+    } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
+        const struct ovs_16aligned_ip6_hdr *ip6 = l3;
+
+        vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
+        vxlan_data->items[field].spec = ip6;
+        vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
+
+        if (ip6->ip6_nxt != IPPROTO_UDP) {
+            goto err;
+        }
+        l4 = (ip6 + 1);
+    } else {
+        goto err;
+    }
+    field++;
+
+    udp = (const struct udp_header *)l4;
+    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
+    vxlan_data->items[field].spec = udp;
+    vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
+    field++;
+
+    vxlan = (udp + 1);
+    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+    vxlan_data->items[field].spec = vxlan;
+    vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
+    field++;
+
+    vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
+
+    vxlan_data->conf.definition = vxlan_data->items;
+
+    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
+
+    return 0;
+err:
+    free(vxlan_data);
+    return -1;
+}
+
 static int
 parse_clone_actions(struct netdev *netdev,
                     struct flow_actions *actions,
@@ -1143,9 +1257,14 @@  parse_clone_actions(struct netdev *netdev,
 
         if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
             const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
-            struct rte_flow_action_raw_encap *raw_encap =
-                xzalloc(sizeof *raw_encap);
+            struct rte_flow_action_raw_encap *raw_encap;
+
+            if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
+                !add_vxlan_encap_action(actions, tnl_push->header)) {
+                continue;
+            }
 
+            raw_encap = xzalloc(sizeof *raw_encap);
             raw_encap->data = (uint8_t *)tnl_push->header;
             raw_encap->preserve = NULL;
             raw_encap->size = tnl_push->header_len;