diff mbox

[ovs-dev] dpif-netilnk: convert packet_type netlink attribute

Message ID AM2PR07MB1042FAC9DE7F5B1E53E912C78AD60@AM2PR07MB1042.eurprd07.prod.outlook.com
State Superseded
Headers show

Commit Message

Zoltan Balogh July 3, 2017, 2:27 p.m. UTC
By introducing packet type-aware pipeline, match on ethertype was
removed when packet type is not Ethernet. As pointed out by Eric Garver,
this could have a side effect on the kernel datapath:
https://patchwork.ozlabs.org/patch/782991/

This patch does approach the problem from a different perspective.
Instead of re-introducing the unconditional match on Ethernet type in all
megaflow entries, as suggested by Eric, mapping of packet_type != PT_ETH to
dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.

Put_exclude_packet_type() filters the packet_type netlink attribute out,
before all attributes are sent from userspace to kernel. This commit modifies
the put_exclude_packet_type() function to do a proper conversation and add the
missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.

Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
Reported-by: Eric Garver <e@erig.me>
---
 lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

Comments

Eric Garver July 3, 2017, 6:51 p.m. UTC | #1
On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> By introducing packet type-aware pipeline, match on ethertype was
> removed when packet type is not Ethernet. As pointed out by Eric Garver,
> this could have a side effect on the kernel datapath:
> https://patchwork.ozlabs.org/patch/782991/
> 
> This patch does approach the problem from a different perspective.
> Instead of re-introducing the unconditional match on Ethernet type in all
> megaflow entries, as suggested by Eric, mapping of packet_type != PT_ETH to
> dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> 
> Put_exclude_packet_type() filters the packet_type netlink attribute out,
> before all attributes are sent from userspace to kernel. This commit modifies
> the put_exclude_packet_type() function to do a proper conversation and add the
> missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> 
> Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Reported-by: Eric Garver <e@erig.me>
> ---
>  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 562f6134c..d8051a09e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -3434,11 +3434,11 @@ dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
>  
>  
>  /*
> - * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out,
> - * then puts 'data' to 'buf'.
> + * If PACKET_TYPE attribute is present in 'data', converts it to ETHERNET and
> + * ETHERTYPE attributes, then puts 'data' to 'buf'.
>   */
>  static void
> -put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> +put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
>                          const struct nlattr *data, uint16_t data_len)
>  {
>      const struct nlattr *packet_type;
> @@ -3446,8 +3446,9 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
>      packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
>  
>      if (packet_type) {
> -        /* exclude PACKET_TYPE Netlink attribute. */
> +        /* Convert PACKET_TYPE Netlink attribute. */
>          ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
> +        ovs_be32 value = nl_attr_get_be32(packet_type);
>          size_t packet_type_len = NL_A_U32_SIZE;
>          size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
>          size_t second_chunk_size = data_len - first_chunk_size
> @@ -3455,10 +3456,31 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
>          uint8_t *first_attr = NULL;
>          struct nlattr *next_attr = nl_attr_next(packet_type);
>  
> +        bool ethernet_present = nl_attr_find__(data, data_len,
> +                                               OVS_KEY_ATTR_ETHERNET);
> +
>          first_attr = nl_msg_put_unspec_uninit(buf, type,
>                                                data_len - packet_type_len);
>          memcpy(first_attr, data, first_chunk_size);
>          memcpy(first_attr + first_chunk_size, next_attr, second_chunk_size);
> +
> +        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
> +        if (ntohl(value) == PT_ETH) {

I don't think this works for the mask case. I think value will be
OVS_BE32_MAX.

> +            ovs_assert(ethernet_present);
> +        } else {
> +            const struct nlattr *eth_type;
> +            ovs_be16 ns_type = pt_ns_type_be(value);
> +
> +            ovs_assert(ethernet_present == false);
> +
> +            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
> +            if (eth_type) {
> +                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
> +            } else {
> +                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, ns_type);

This will have to put OVS_BE16_MAX for mask case.

> +            }
> +        }
> +
>      } else {
>          nl_msg_put_unspec(buf, type, data, data_len);
>      }
> @@ -3489,11 +3511,11 @@ dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
>      }
>      if (!flow->ufid_terse || !flow->ufid_present) {
>          if (flow->key_len) {
> -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> +            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
>                                             flow->key_len);
>          }
>          if (flow->mask_len) {
> -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> +            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
>                                             flow->mask_len);
>          }
>          if (flow->actions || flow->actions_len) {
> -- 
> 2.11.0
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eric Garver July 3, 2017, 8:20 p.m. UTC | #2
On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> By introducing packet type-aware pipeline, match on ethertype was
> removed when packet type is not Ethernet. As pointed out by Eric Garver,
> this could have a side effect on the kernel datapath:
> https://patchwork.ozlabs.org/patch/782991/
> 
> This patch does approach the problem from a different perspective.
> Instead of re-introducing the unconditional match on Ethernet type in all
> megaflow entries, as suggested by Eric, mapping of packet_type != PT_ETH to
> dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> 
> Put_exclude_packet_type() filters the packet_type netlink attribute out,
> before all attributes are sent from userspace to kernel. This commit modifies
> the put_exclude_packet_type() function to do a proper conversation and add the
> missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> 
> Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Reported-by: Eric Garver <e@erig.me>
> ---
>  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 

[..]

This patch's subject has a typo. s/dpif-netilnk/dpif-netlink/
Yang, Yi July 4, 2017, 12:09 a.m. UTC | #3
Zoltan, I tested this patch for net-next, it will result in failure by ovs_assert, in addition, we need to handle  OVS_FLOW_ATTR_MASK and OVS_FLOW_ATTR_KEY differently, to be important, we need to verify it in kernel mode. From my test and verification, this patch can't work, instead, it can work without this patch.

-----Original Message-----
From: ovs-dev-bounces@openvswitch.org [mailto:ovs-dev-bounces@openvswitch.org] On Behalf Of Eric Garver
Sent: Tuesday, July 4, 2017 2:52 AM
To: Zoltán Balogh <zoltan.balogh@ericsson.com>
Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
Subject: Re: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute

On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> By introducing packet type-aware pipeline, match on ethertype was 
> removed when packet type is not Ethernet. As pointed out by Eric 
> Garver, this could have a side effect on the kernel datapath:
> https://patchwork.ozlabs.org/patch/782991/
> 
> This patch does approach the problem from a different perspective.
> Instead of re-introducing the unconditional match on Ethernet type in 
> all megaflow entries, as suggested by Eric, mapping of packet_type != 
> PT_ETH to dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> 
> Put_exclude_packet_type() filters the packet_type netlink attribute 
> out, before all attributes are sent from userspace to kernel. This 
> commit modifies the put_exclude_packet_type() function to do a proper 
> conversation and add the missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> 
> Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Reported-by: Eric Garver <e@erig.me>
> ---
>  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 
> 562f6134c..d8051a09e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -3434,11 +3434,11 @@ dpif_netlink_flow_from_ofpbuf(struct 
> dpif_netlink_flow *flow,
>  
>  
>  /*
> - * If PACKET_TYPE attribute is present in 'data', it filters 
> PACKET_TYPE out,
> - * then puts 'data' to 'buf'.
> + * If PACKET_TYPE attribute is present in 'data', converts it to 
> + ETHERNET and
> + * ETHERTYPE attributes, then puts 'data' to 'buf'.
>   */
>  static void
> -put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> +put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
>                          const struct nlattr *data, uint16_t data_len)  
> {
>      const struct nlattr *packet_type; @@ -3446,8 +3446,9 @@ 
> put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
>      packet_type = nl_attr_find__(data, data_len, 
> OVS_KEY_ATTR_PACKET_TYPE);
>  
>      if (packet_type) {
> -        /* exclude PACKET_TYPE Netlink attribute. */
> +        /* Convert PACKET_TYPE Netlink attribute. */
>          ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
> +        ovs_be32 value = nl_attr_get_be32(packet_type);
>          size_t packet_type_len = NL_A_U32_SIZE;
>          size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
>          size_t second_chunk_size = data_len - first_chunk_size @@ 
> -3455,10 +3456,31 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
>          uint8_t *first_attr = NULL;
>          struct nlattr *next_attr = nl_attr_next(packet_type);
>  
> +        bool ethernet_present = nl_attr_find__(data, data_len,
> +                                               
> + OVS_KEY_ATTR_ETHERNET);
> +
>          first_attr = nl_msg_put_unspec_uninit(buf, type,
>                                                data_len - packet_type_len);
>          memcpy(first_attr, data, first_chunk_size);
>          memcpy(first_attr + first_chunk_size, next_attr, 
> second_chunk_size);
> +
> +        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
> +        if (ntohl(value) == PT_ETH) {

I don't think this works for the mask case. I think value will be OVS_BE32_MAX.

> +            ovs_assert(ethernet_present);
> +        } else {
> +            const struct nlattr *eth_type;
> +            ovs_be16 ns_type = pt_ns_type_be(value);
> +
> +            ovs_assert(ethernet_present == false);
> +
> +            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
> +            if (eth_type) {
> +                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
> +            } else {
> +                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, 
> + ns_type);

This will have to put OVS_BE16_MAX for mask case.

> +            }
> +        }
> +
>      } else {
>          nl_msg_put_unspec(buf, type, data, data_len);
>      }
> @@ -3489,11 +3511,11 @@ dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
>      }
>      if (!flow->ufid_terse || !flow->ufid_present) {
>          if (flow->key_len) {
> -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> +            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY, 
> + flow->key,
>                                             flow->key_len);
>          }
>          if (flow->mask_len) {
> -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> +            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK, 
> + flow->mask,
>                                             flow->mask_len);
>          }
>          if (flow->actions || flow->actions_len) {
> --
> 2.11.0
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Zoltan Balogh July 4, 2017, 7:23 a.m. UTC | #4
Thank you for reviewing!
I'm going to fix it.
/Zoltan

> -----Original Message-----
> From: Eric Garver [mailto:e@erig.me]
> Sent: Monday, July 03, 2017 8:52 PM
> To: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> Subject: Re: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute
> 
> On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> > By introducing packet type-aware pipeline, match on ethertype was
> > removed when packet type is not Ethernet. As pointed out by Eric Garver,
> > this could have a side effect on the kernel datapath:
> > https://patchwork.ozlabs.org/patch/782991/
> >
> > This patch does approach the problem from a different perspective.
> > Instead of re-introducing the unconditional match on Ethernet type in all
> > megaflow entries, as suggested by Eric, mapping of packet_type != PT_ETH to
> > dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> >
> > Put_exclude_packet_type() filters the packet_type netlink attribute out,
> > before all attributes are sent from userspace to kernel. This commit modifies
> > the put_exclude_packet_type() function to do a proper conversation and add the
> > missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> >
> > Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> > Reported-by: Eric Garver <e@erig.me>
> > ---
> >  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
> >  1 file changed, 28 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > index 562f6134c..d8051a09e 100644
> > --- a/lib/dpif-netlink.c
> > +++ b/lib/dpif-netlink.c
> > @@ -3434,11 +3434,11 @@ dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
> >
> >
> >  /*
> > - * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out,
> > - * then puts 'data' to 'buf'.
> > + * If PACKET_TYPE attribute is present in 'data', converts it to ETHERNET and
> > + * ETHERTYPE attributes, then puts 'data' to 'buf'.
> >   */
> >  static void
> > -put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> > +put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
> >                          const struct nlattr *data, uint16_t data_len)
> >  {
> >      const struct nlattr *packet_type;
> > @@ -3446,8 +3446,9 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >      packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
> >
> >      if (packet_type) {
> > -        /* exclude PACKET_TYPE Netlink attribute. */
> > +        /* Convert PACKET_TYPE Netlink attribute. */
> >          ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
> > +        ovs_be32 value = nl_attr_get_be32(packet_type);
> >          size_t packet_type_len = NL_A_U32_SIZE;
> >          size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
> >          size_t second_chunk_size = data_len - first_chunk_size
> > @@ -3455,10 +3456,31 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >          uint8_t *first_attr = NULL;
> >          struct nlattr *next_attr = nl_attr_next(packet_type);
> >
> > +        bool ethernet_present = nl_attr_find__(data, data_len,
> > +                                               OVS_KEY_ATTR_ETHERNET);
> > +
> >          first_attr = nl_msg_put_unspec_uninit(buf, type,
> >                                                data_len - packet_type_len);
> >          memcpy(first_attr, data, first_chunk_size);
> >          memcpy(first_attr + first_chunk_size, next_attr, second_chunk_size);
> > +
> > +        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
> > +        if (ntohl(value) == PT_ETH) {
> 
> I don't think this works for the mask case. I think value will be
> OVS_BE32_MAX.
> 
> > +            ovs_assert(ethernet_present);
> > +        } else {
> > +            const struct nlattr *eth_type;
> > +            ovs_be16 ns_type = pt_ns_type_be(value);
> > +
> > +            ovs_assert(ethernet_present == false);
> > +
> > +            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
> > +            if (eth_type) {
> > +                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
> > +            } else {
> > +                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, ns_type);
> 
> This will have to put OVS_BE16_MAX for mask case.
> 
> > +            }
> > +        }
> > +
> >      } else {
> >          nl_msg_put_unspec(buf, type, data, data_len);
> >      }
> > @@ -3489,11 +3511,11 @@ dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
> >      }
> >      if (!flow->ufid_terse || !flow->ufid_present) {
> >          if (flow->key_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> >                                             flow->key_len);
> >          }
> >          if (flow->mask_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> >                                             flow->mask_len);
> >          }
> >          if (flow->actions || flow->actions_len) {
> > --
> > 2.11.0
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Zoltan Balogh July 4, 2017, 7:24 a.m. UTC | #5
Thank you for testing!
I'm going to fix it.
/Zoltan

> -----Original Message-----
> From: Yang, Yi Y [mailto:yi.y.yang@intel.com]
> Sent: Tuesday, July 04, 2017 2:10 AM
> To: Eric Garver <e@erig.me>; Zoltán Balogh <zoltan.balogh@ericsson.com>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> Subject: RE: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute
> 
> Zoltan, I tested this patch for net-next, it will result in failure by ovs_assert, in addition, we need to handle
> OVS_FLOW_ATTR_MASK and OVS_FLOW_ATTR_KEY differently, to be important, we need to verify it in kernel mode. From my
> test and verification, this patch can't work, instead, it can work without this patch.
> 
> -----Original Message-----
> From: ovs-dev-bounces@openvswitch.org [mailto:ovs-dev-bounces@openvswitch.org] On Behalf Of Eric Garver
> Sent: Tuesday, July 4, 2017 2:52 AM
> To: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> Subject: Re: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute
> 
> On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> > By introducing packet type-aware pipeline, match on ethertype was
> > removed when packet type is not Ethernet. As pointed out by Eric
> > Garver, this could have a side effect on the kernel datapath:
> > https://patchwork.ozlabs.org/patch/782991/
> >
> > This patch does approach the problem from a different perspective.
> > Instead of re-introducing the unconditional match on Ethernet type in
> > all megaflow entries, as suggested by Eric, mapping of packet_type !=
> > PT_ETH to dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> >
> > Put_exclude_packet_type() filters the packet_type netlink attribute
> > out, before all attributes are sent from userspace to kernel. This
> > commit modifies the put_exclude_packet_type() function to do a proper
> > conversation and add the missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> >
> > Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> > Reported-by: Eric Garver <e@erig.me>
> > ---
> >  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
> >  1 file changed, 28 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index
> > 562f6134c..d8051a09e 100644
> > --- a/lib/dpif-netlink.c
> > +++ b/lib/dpif-netlink.c
> > @@ -3434,11 +3434,11 @@ dpif_netlink_flow_from_ofpbuf(struct
> > dpif_netlink_flow *flow,
> >
> >
> >  /*
> > - * If PACKET_TYPE attribute is present in 'data', it filters
> > PACKET_TYPE out,
> > - * then puts 'data' to 'buf'.
> > + * If PACKET_TYPE attribute is present in 'data', converts it to
> > + ETHERNET and
> > + * ETHERTYPE attributes, then puts 'data' to 'buf'.
> >   */
> >  static void
> > -put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> > +put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
> >                          const struct nlattr *data, uint16_t data_len)
> > {
> >      const struct nlattr *packet_type; @@ -3446,8 +3446,9 @@
> > put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >      packet_type = nl_attr_find__(data, data_len,
> > OVS_KEY_ATTR_PACKET_TYPE);
> >
> >      if (packet_type) {
> > -        /* exclude PACKET_TYPE Netlink attribute. */
> > +        /* Convert PACKET_TYPE Netlink attribute. */
> >          ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
> > +        ovs_be32 value = nl_attr_get_be32(packet_type);
> >          size_t packet_type_len = NL_A_U32_SIZE;
> >          size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
> >          size_t second_chunk_size = data_len - first_chunk_size @@
> > -3455,10 +3456,31 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >          uint8_t *first_attr = NULL;
> >          struct nlattr *next_attr = nl_attr_next(packet_type);
> >
> > +        bool ethernet_present = nl_attr_find__(data, data_len,
> > +
> > + OVS_KEY_ATTR_ETHERNET);
> > +
> >          first_attr = nl_msg_put_unspec_uninit(buf, type,
> >                                                data_len - packet_type_len);
> >          memcpy(first_attr, data, first_chunk_size);
> >          memcpy(first_attr + first_chunk_size, next_attr,
> > second_chunk_size);
> > +
> > +        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
> > +        if (ntohl(value) == PT_ETH) {
> 
> I don't think this works for the mask case. I think value will be OVS_BE32_MAX.
> 
> > +            ovs_assert(ethernet_present);
> > +        } else {
> > +            const struct nlattr *eth_type;
> > +            ovs_be16 ns_type = pt_ns_type_be(value);
> > +
> > +            ovs_assert(ethernet_present == false);
> > +
> > +            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
> > +            if (eth_type) {
> > +                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
> > +            } else {
> > +                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE,
> > + ns_type);
> 
> This will have to put OVS_BE16_MAX for mask case.
> 
> > +            }
> > +        }
> > +
> >      } else {
> >          nl_msg_put_unspec(buf, type, data, data_len);
> >      }
> > @@ -3489,11 +3511,11 @@ dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
> >      }
> >      if (!flow->ufid_terse || !flow->ufid_present) {
> >          if (flow->key_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY,
> > + flow->key,
> >                                             flow->key_len);
> >          }
> >          if (flow->mask_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK,
> > + flow->mask,
> >                                             flow->mask_len);
> >          }
> >          if (flow->actions || flow->actions_len) {
> > --
> > 2.11.0
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Zoltan Balogh July 4, 2017, 9:25 p.m. UTC | #6
Hi Eric and Yi,

I tried to fix the issues. I posted v2 to the list. Could you have a look at it and test it, please?
https://patchwork.ozlabs.org/patch/784317/

Best regards,
Zoltan

> -----Original Message-----
> From: Yang, Yi Y [mailto:yi.y.yang@intel.com]
> Sent: Tuesday, July 04, 2017 2:10 AM
> To: Eric Garver <e@erig.me>; Zoltán Balogh <zoltan.balogh@ericsson.com>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> Subject: RE: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute
> 
> Zoltan, I tested this patch for net-next, it will result in failure by ovs_assert, in addition, we need to handle
> OVS_FLOW_ATTR_MASK and OVS_FLOW_ATTR_KEY differently, to be important, we need to verify it in kernel mode. From my
> test and verification, this patch can't work, instead, it can work without this patch.
> 
> -----Original Message-----
> From: ovs-dev-bounces@openvswitch.org [mailto:ovs-dev-bounces@openvswitch.org] On Behalf Of Eric Garver
> Sent: Tuesday, July 4, 2017 2:52 AM
> To: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> Subject: Re: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute
> 
> On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> > By introducing packet type-aware pipeline, match on ethertype was
> > removed when packet type is not Ethernet. As pointed out by Eric
> > Garver, this could have a side effect on the kernel datapath:
> > https://patchwork.ozlabs.org/patch/782991/
> >
> > This patch does approach the problem from a different perspective.
> > Instead of re-introducing the unconditional match on Ethernet type in
> > all megaflow entries, as suggested by Eric, mapping of packet_type !=
> > PT_ETH to dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> >
> > Put_exclude_packet_type() filters the packet_type netlink attribute
> > out, before all attributes are sent from userspace to kernel. This
> > commit modifies the put_exclude_packet_type() function to do a proper
> > conversation and add the missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> >
> > Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> > Reported-by: Eric Garver <e@erig.me>
> > ---
> >  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
> >  1 file changed, 28 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index
> > 562f6134c..d8051a09e 100644
> > --- a/lib/dpif-netlink.c
> > +++ b/lib/dpif-netlink.c
> > @@ -3434,11 +3434,11 @@ dpif_netlink_flow_from_ofpbuf(struct
> > dpif_netlink_flow *flow,
> >
> >
> >  /*
> > - * If PACKET_TYPE attribute is present in 'data', it filters
> > PACKET_TYPE out,
> > - * then puts 'data' to 'buf'.
> > + * If PACKET_TYPE attribute is present in 'data', converts it to
> > + ETHERNET and
> > + * ETHERTYPE attributes, then puts 'data' to 'buf'.
> >   */
> >  static void
> > -put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> > +put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
> >                          const struct nlattr *data, uint16_t data_len)
> > {
> >      const struct nlattr *packet_type; @@ -3446,8 +3446,9 @@
> > put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >      packet_type = nl_attr_find__(data, data_len,
> > OVS_KEY_ATTR_PACKET_TYPE);
> >
> >      if (packet_type) {
> > -        /* exclude PACKET_TYPE Netlink attribute. */
> > +        /* Convert PACKET_TYPE Netlink attribute. */
> >          ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
> > +        ovs_be32 value = nl_attr_get_be32(packet_type);
> >          size_t packet_type_len = NL_A_U32_SIZE;
> >          size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
> >          size_t second_chunk_size = data_len - first_chunk_size @@
> > -3455,10 +3456,31 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >          uint8_t *first_attr = NULL;
> >          struct nlattr *next_attr = nl_attr_next(packet_type);
> >
> > +        bool ethernet_present = nl_attr_find__(data, data_len,
> > +
> > + OVS_KEY_ATTR_ETHERNET);
> > +
> >          first_attr = nl_msg_put_unspec_uninit(buf, type,
> >                                                data_len - packet_type_len);
> >          memcpy(first_attr, data, first_chunk_size);
> >          memcpy(first_attr + first_chunk_size, next_attr,
> > second_chunk_size);
> > +
> > +        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
> > +        if (ntohl(value) == PT_ETH) {
> 
> I don't think this works for the mask case. I think value will be OVS_BE32_MAX.
> 
> > +            ovs_assert(ethernet_present);
> > +        } else {
> > +            const struct nlattr *eth_type;
> > +            ovs_be16 ns_type = pt_ns_type_be(value);
> > +
> > +            ovs_assert(ethernet_present == false);
> > +
> > +            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
> > +            if (eth_type) {
> > +                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
> > +            } else {
> > +                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE,
> > + ns_type);
> 
> This will have to put OVS_BE16_MAX for mask case.
> 
> > +            }
> > +        }
> > +
> >      } else {
> >          nl_msg_put_unspec(buf, type, data, data_len);
> >      }
> > @@ -3489,11 +3511,11 @@ dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
> >      }
> >      if (!flow->ufid_terse || !flow->ufid_present) {
> >          if (flow->key_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY,
> > + flow->key,
> >                                             flow->key_len);
> >          }
> >          if (flow->mask_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK,
> > + flow->mask,
> >                                             flow->mask_len);
> >          }
> >          if (flow->actions || flow->actions_len) {
> > --
> > 2.11.0
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Yang, Yi July 6, 2017, 9:04 a.m. UTC | #7
Hi, Zoltan

I checked odp_flow_key_from_flow__ in lib/odp-util.c, it has correctly handled PACKET_TYPE and ETHERNET, ETHERTYPE has had correct value no matter packet_type is PT_ETH or other L3 types, so I think the original put_exclude_packet_type has done the right thing, the only one thing to be filtered is PACKET_TYPE. I verified net-next and ovs compat mode with NSH patches, both of them can work with the original put_exclude_packet_type.

The only issue as Eric mentioned is we should change odp_flow_key_from_flow__ to put mask for ETHERTYPE if packet_type is L3 type. But I didn't see "match_validate complain" in my test.

    nl_msg_put_be32(buf, OVS_KEY_ATTR_PACKET_TYPE, data->packet_type);

    if (OVS_UNLIKELY(parms->probe)) {
        max_vlans = FLOW_MAX_VLAN_HEADERS;
    } else {
        max_vlans = MIN(parms->support.max_vlan_headers, flow_vlan_limit);
    }

    /* Conditionally add L2 attributes for Ethernet packets */
    if (flow->packet_type == htonl(PT_ETH)) {
        eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET,
                                           sizeof *eth_key);
        get_ethernet_key(data, eth_key);

        for (int encaps = 0; encaps < max_vlans; encaps++) {
            ovs_be16 tpid = flow->vlans[encaps].tpid;

            if (flow->vlans[encaps].tci == htons(0)) {
                if (eth_type_vlan(flow->dl_type)) {
                    /* If VLAN was truncated the tpid is in dl_type */
                    tpid = flow->dl_type;
                } else {
                    break;
                }
            }

            if (export_mask) {
                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX);
            } else {
                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, tpid);
            }
            nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, data->vlans[encaps].tci);
            encap[encaps] = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP);
            if (flow->vlans[encaps].tci == htons(0)) {
                goto unencap;
            }
        }
    }

    if (ntohs(flow->dl_type) < ETH_TYPE_MIN) {
        /* For backwards compatibility with kernels that don't support
         * wildcarding, the following convention is used to encode the
         * OVS_KEY_ATTR_ETHERTYPE for key and mask:
         *
         *   key      mask    matches
         * -------- --------  -------
         *  >0x5ff   0xffff   Specified Ethernet II Ethertype.
         *  >0x5ff      0     Any Ethernet II or non-Ethernet II frame.
         *  <none>   0xffff   Any non-Ethernet II frame (except valid
         *                    802.3 SNAP packet with valid eth_type).
         */
        if (export_mask) {
            nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX);
        }
        goto unencap;
    }

    nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type);

-----Original Message-----
From: Zoltán Balogh [mailto:zoltan.balogh@ericsson.com] 
Sent: Wednesday, July 5, 2017 5:25 AM
To: Yang, Yi Y <yi.y.yang@intel.com>; Eric Garver <e@erig.me>
Cc: 'dev@openvswitch.org' <dev@openvswitch.org>; Jan Scheurich <jan.scheurich@ericsson.com>
Subject: RE: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute

Hi Eric and Yi,

I tried to fix the issues. I posted v2 to the list. Could you have a look at it and test it, please?
https://patchwork.ozlabs.org/patch/784317/

Best regards,
Zoltan

> -----Original Message-----
> From: Yang, Yi Y [mailto:yi.y.yang@intel.com]
> Sent: Tuesday, July 04, 2017 2:10 AM
> To: Eric Garver <e@erig.me>; Zoltán Balogh 
> <zoltan.balogh@ericsson.com>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> Subject: RE: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type 
> netlink attribute
> 
> Zoltan, I tested this patch for net-next, it will result in failure by 
> ovs_assert, in addition, we need to handle OVS_FLOW_ATTR_MASK and 
> OVS_FLOW_ATTR_KEY differently, to be important, we need to verify it in kernel mode. From my test and verification, this patch can't work, instead, it can work without this patch.
> 
> -----Original Message-----
> From: ovs-dev-bounces@openvswitch.org 
> [mailto:ovs-dev-bounces@openvswitch.org] On Behalf Of Eric Garver
> Sent: Tuesday, July 4, 2017 2:52 AM
> To: Zoltán Balogh <zoltan.balogh@ericsson.com>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> Subject: Re: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type 
> netlink attribute
> 
> On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> > By introducing packet type-aware pipeline, match on ethertype was 
> > removed when packet type is not Ethernet. As pointed out by Eric 
> > Garver, this could have a side effect on the kernel datapath:
> > https://patchwork.ozlabs.org/patch/782991/
> >
> > This patch does approach the problem from a different perspective.
> > Instead of re-introducing the unconditional match on Ethernet type 
> > in all megaflow entries, as suggested by Eric, mapping of 
> > packet_type != PT_ETH to dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> >
> > Put_exclude_packet_type() filters the packet_type netlink attribute 
> > out, before all attributes are sent from userspace to kernel. This 
> > commit modifies the put_exclude_packet_type() function to do a 
> > proper conversation and add the missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> >
> > Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> > Reported-by: Eric Garver <e@erig.me>
> > ---
> >  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
> >  1 file changed, 28 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 
> > 562f6134c..d8051a09e 100644
> > --- a/lib/dpif-netlink.c
> > +++ b/lib/dpif-netlink.c
> > @@ -3434,11 +3434,11 @@ dpif_netlink_flow_from_ofpbuf(struct
> > dpif_netlink_flow *flow,
> >
> >
> >  /*
> > - * If PACKET_TYPE attribute is present in 'data', it filters 
> > PACKET_TYPE out,
> > - * then puts 'data' to 'buf'.
> > + * If PACKET_TYPE attribute is present in 'data', converts it to 
> > + ETHERNET and
> > + * ETHERTYPE attributes, then puts 'data' to 'buf'.
> >   */
> >  static void
> > -put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> > +put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
> >                          const struct nlattr *data, uint16_t 
> > data_len) {
> >      const struct nlattr *packet_type; @@ -3446,8 +3446,9 @@ 
> > put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >      packet_type = nl_attr_find__(data, data_len, 
> > OVS_KEY_ATTR_PACKET_TYPE);
> >
> >      if (packet_type) {
> > -        /* exclude PACKET_TYPE Netlink attribute. */
> > +        /* Convert PACKET_TYPE Netlink attribute. */
> >          ovs_assert(NLA_ALIGN(packet_type->nla_len) == 
> > NL_A_U32_SIZE);
> > +        ovs_be32 value = nl_attr_get_be32(packet_type);
> >          size_t packet_type_len = NL_A_U32_SIZE;
> >          size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
> >          size_t second_chunk_size = data_len - first_chunk_size @@
> > -3455,10 +3456,31 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> >          uint8_t *first_attr = NULL;
> >          struct nlattr *next_attr = nl_attr_next(packet_type);
> >
> > +        bool ethernet_present = nl_attr_find__(data, data_len,
> > +
> > + OVS_KEY_ATTR_ETHERNET);
> > +
> >          first_attr = nl_msg_put_unspec_uninit(buf, type,
> >                                                data_len - packet_type_len);
> >          memcpy(first_attr, data, first_chunk_size);
> >          memcpy(first_attr + first_chunk_size, next_attr, 
> > second_chunk_size);
> > +
> > +        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
> > +        if (ntohl(value) == PT_ETH) {
> 
> I don't think this works for the mask case. I think value will be OVS_BE32_MAX.
> 
> > +            ovs_assert(ethernet_present);
> > +        } else {
> > +            const struct nlattr *eth_type;
> > +            ovs_be16 ns_type = pt_ns_type_be(value);
> > +
> > +            ovs_assert(ethernet_present == false);
> > +
> > +            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
> > +            if (eth_type) {
> > +                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
> > +            } else {
> > +                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, 
> > + ns_type);
> 
> This will have to put OVS_BE16_MAX for mask case.
> 
> > +            }
> > +        }
> > +
> >      } else {
> >          nl_msg_put_unspec(buf, type, data, data_len);
> >      }
> > @@ -3489,11 +3511,11 @@ dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
> >      }
> >      if (!flow->ufid_terse || !flow->ufid_present) {
> >          if (flow->key_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY,
> > + flow->key,
> >                                             flow->key_len);
> >          }
> >          if (flow->mask_len) {
> > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK,
> > + flow->mask,
> >                                             flow->mask_len);
> >          }
> >          if (flow->actions || flow->actions_len) {
> > --
> > 2.11.0
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Eric Garver July 6, 2017, 1:01 p.m. UTC | #8
On Thu, Jul 06, 2017 at 09:04:04AM +0000, Yang, Yi Y wrote:
> Hi, Zoltan
> 
> I checked odp_flow_key_from_flow__ in lib/odp-util.c, it has correctly
> handled PACKET_TYPE and ETHERNET, ETHERTYPE has had correct value no
> matter packet_type is PT_ETH or other L3 types, so I think the
> original put_exclude_packet_type has done the right thing, the only
> one thing to be filtered is PACKET_TYPE. I verified net-next and ovs
> compat mode with NSH patches, both of them can work with the original
> put_exclude_packet_type.
> 
> The only issue as Eric mentioned is we should change
> odp_flow_key_from_flow__ to put mask for ETHERTYPE if packet_type is

This is what my original patch did, but it was suggested to isolate the
all the packet_type workarounds into the same function,
put_exclude_packet_type().

> L3 type. But I didn't see "match_validate complain" in my test.

I see the failure for the new test cases that I've added, layer3 tunnels
for kernel datapath.

You can see that work-in-progress here (includes Zoltan's patch):

  https://github.com/erig0/ovs/tree/temp-layer3-rtnetlink

> 
>     nl_msg_put_be32(buf, OVS_KEY_ATTR_PACKET_TYPE, data->packet_type);
> 
>     if (OVS_UNLIKELY(parms->probe)) {
>         max_vlans = FLOW_MAX_VLAN_HEADERS;
>     } else {
>         max_vlans = MIN(parms->support.max_vlan_headers, flow_vlan_limit);
>     }
> 
>     /* Conditionally add L2 attributes for Ethernet packets */
>     if (flow->packet_type == htonl(PT_ETH)) {
>         eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET,
>                                            sizeof *eth_key);
>         get_ethernet_key(data, eth_key);
> 
>         for (int encaps = 0; encaps < max_vlans; encaps++) {
>             ovs_be16 tpid = flow->vlans[encaps].tpid;
> 
>             if (flow->vlans[encaps].tci == htons(0)) {
>                 if (eth_type_vlan(flow->dl_type)) {
>                     /* If VLAN was truncated the tpid is in dl_type */
>                     tpid = flow->dl_type;
>                 } else {
>                     break;
>                 }
>             }
> 
>             if (export_mask) {
>                 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX);
>             } else {
>                 nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, tpid);
>             }
>             nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, data->vlans[encaps].tci);
>             encap[encaps] = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP);
>             if (flow->vlans[encaps].tci == htons(0)) {
>                 goto unencap;
>             }
>         }
>     }
> 
>     if (ntohs(flow->dl_type) < ETH_TYPE_MIN) {
>         /* For backwards compatibility with kernels that don't support
>          * wildcarding, the following convention is used to encode the
>          * OVS_KEY_ATTR_ETHERTYPE for key and mask:
>          *
>          *   key      mask    matches
>          * -------- --------  -------
>          *  >0x5ff   0xffff   Specified Ethernet II Ethertype.
>          *  >0x5ff      0     Any Ethernet II or non-Ethernet II frame.
>          *  <none>   0xffff   Any non-Ethernet II frame (except valid
>          *                    802.3 SNAP packet with valid eth_type).
>          */
>         if (export_mask) {
>             nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX);
>         }
>         goto unencap;
>     }
> 
>     nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type);
> 
> -----Original Message-----
> From: Zoltán Balogh [mailto:zoltan.balogh@ericsson.com] 
> Sent: Wednesday, July 5, 2017 5:25 AM
> To: Yang, Yi Y <yi.y.yang@intel.com>; Eric Garver <e@erig.me>
> Cc: 'dev@openvswitch.org' <dev@openvswitch.org>; Jan Scheurich <jan.scheurich@ericsson.com>
> Subject: RE: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type netlink attribute
> 
> Hi Eric and Yi,
> 
> I tried to fix the issues. I posted v2 to the list. Could you have a look at it and test it, please?
> https://patchwork.ozlabs.org/patch/784317/
> 
> Best regards,
> Zoltan
> 
> > -----Original Message-----
> > From: Yang, Yi Y [mailto:yi.y.yang@intel.com]
> > Sent: Tuesday, July 04, 2017 2:10 AM
> > To: Eric Garver <e@erig.me>; Zoltán Balogh 
> > <zoltan.balogh@ericsson.com>
> > Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> > Subject: RE: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type 
> > netlink attribute
> > 
> > Zoltan, I tested this patch for net-next, it will result in failure by 
> > ovs_assert, in addition, we need to handle OVS_FLOW_ATTR_MASK and 
> > OVS_FLOW_ATTR_KEY differently, to be important, we need to verify it in kernel mode. From my test and verification, this patch can't work, instead, it can work without this patch.
> > 
> > -----Original Message-----
> > From: ovs-dev-bounces@openvswitch.org 
> > [mailto:ovs-dev-bounces@openvswitch.org] On Behalf Of Eric Garver
> > Sent: Tuesday, July 4, 2017 2:52 AM
> > To: Zoltán Balogh <zoltan.balogh@ericsson.com>
> > Cc: 'dev@openvswitch.org' <dev@openvswitch.org>
> > Subject: Re: [ovs-dev] [PATCH] dpif-netilnk: convert packet_type 
> > netlink attribute
> > 
> > On Mon, Jul 03, 2017 at 02:27:18PM +0000, Zoltán Balogh wrote:
> > > By introducing packet type-aware pipeline, match on ethertype was 
> > > removed when packet type is not Ethernet. As pointed out by Eric 
> > > Garver, this could have a side effect on the kernel datapath:
> > > https://patchwork.ozlabs.org/patch/782991/
> > >
> > > This patch does approach the problem from a different perspective.
> > > Instead of re-introducing the unconditional match on Ethernet type 
> > > in all megaflow entries, as suggested by Eric, mapping of 
> > > packet_type != PT_ETH to dl_type could be handled in the put_exclude_packet_type() in dpif-netlink.c.
> > >
> > > Put_exclude_packet_type() filters the packet_type netlink attribute 
> > > out, before all attributes are sent from userspace to kernel. This 
> > > commit modifies the put_exclude_packet_type() function to do a 
> > > proper conversation and add the missing OVS_KEY_ATTR_ETHERTYPE attribute if it's needed.
> > >
> > > Signed-off-by: Zoltán Balogh <zoltan.balogh@ericsson.com>
> > > Reported-by: Eric Garver <e@erig.me>
> > > ---
> > >  lib/dpif-netlink.c | 34 ++++++++++++++++++++++++++++------
> > >  1 file changed, 28 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 
> > > 562f6134c..d8051a09e 100644
> > > --- a/lib/dpif-netlink.c
> > > +++ b/lib/dpif-netlink.c
> > > @@ -3434,11 +3434,11 @@ dpif_netlink_flow_from_ofpbuf(struct
> > > dpif_netlink_flow *flow,
> > >
> > >
> > >  /*
> > > - * If PACKET_TYPE attribute is present in 'data', it filters 
> > > PACKET_TYPE out,
> > > - * then puts 'data' to 'buf'.
> > > + * If PACKET_TYPE attribute is present in 'data', converts it to 
> > > + ETHERNET and
> > > + * ETHERTYPE attributes, then puts 'data' to 'buf'.
> > >   */
> > >  static void
> > > -put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> > > +put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
> > >                          const struct nlattr *data, uint16_t 
> > > data_len) {
> > >      const struct nlattr *packet_type; @@ -3446,8 +3446,9 @@ 
> > > put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> > >      packet_type = nl_attr_find__(data, data_len, 
> > > OVS_KEY_ATTR_PACKET_TYPE);
> > >
> > >      if (packet_type) {
> > > -        /* exclude PACKET_TYPE Netlink attribute. */
> > > +        /* Convert PACKET_TYPE Netlink attribute. */
> > >          ovs_assert(NLA_ALIGN(packet_type->nla_len) == 
> > > NL_A_U32_SIZE);
> > > +        ovs_be32 value = nl_attr_get_be32(packet_type);
> > >          size_t packet_type_len = NL_A_U32_SIZE;
> > >          size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
> > >          size_t second_chunk_size = data_len - first_chunk_size @@
> > > -3455,10 +3456,31 @@ put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
> > >          uint8_t *first_attr = NULL;
> > >          struct nlattr *next_attr = nl_attr_next(packet_type);
> > >
> > > +        bool ethernet_present = nl_attr_find__(data, data_len,
> > > +
> > > + OVS_KEY_ATTR_ETHERNET);
> > > +
> > >          first_attr = nl_msg_put_unspec_uninit(buf, type,
> > >                                                data_len - packet_type_len);
> > >          memcpy(first_attr, data, first_chunk_size);
> > >          memcpy(first_attr + first_chunk_size, next_attr, 
> > > second_chunk_size);
> > > +
> > > +        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
> > > +        if (ntohl(value) == PT_ETH) {
> > 
> > I don't think this works for the mask case. I think value will be OVS_BE32_MAX.
> > 
> > > +            ovs_assert(ethernet_present);
> > > +        } else {
> > > +            const struct nlattr *eth_type;
> > > +            ovs_be16 ns_type = pt_ns_type_be(value);
> > > +
> > > +            ovs_assert(ethernet_present == false);
> > > +
> > > +            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
> > > +            if (eth_type) {
> > > +                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
> > > +            } else {
> > > +                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, 
> > > + ns_type);
> > 
> > This will have to put OVS_BE16_MAX for mask case.
> > 
> > > +            }
> > > +        }
> > > +
> > >      } else {
> > >          nl_msg_put_unspec(buf, type, data, data_len);
> > >      }
> > > @@ -3489,11 +3511,11 @@ dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
> > >      }
> > >      if (!flow->ufid_terse || !flow->ufid_present) {
> > >          if (flow->key_len) {
> > > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
> > > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY,
> > > + flow->key,
> > >                                             flow->key_len);
> > >          }
> > >          if (flow->mask_len) {
> > > -            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
> > > +            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK,
> > > + flow->mask,
> > >                                             flow->mask_len);
> > >          }
> > >          if (flow->actions || flow->actions_len) {
> > > --
> > > 2.11.0
> > >
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox

Patch

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 562f6134c..d8051a09e 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -3434,11 +3434,11 @@  dpif_netlink_flow_from_ofpbuf(struct dpif_netlink_flow *flow,
 
 
 /*
- * If PACKET_TYPE attribute is present in 'data', it filters PACKET_TYPE out,
- * then puts 'data' to 'buf'.
+ * If PACKET_TYPE attribute is present in 'data', converts it to ETHERNET and
+ * ETHERTYPE attributes, then puts 'data' to 'buf'.
  */
 static void
-put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
+put_convert_packet_type(struct ofpbuf *buf, uint16_t type,
                         const struct nlattr *data, uint16_t data_len)
 {
     const struct nlattr *packet_type;
@@ -3446,8 +3446,9 @@  put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
     packet_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_PACKET_TYPE);
 
     if (packet_type) {
-        /* exclude PACKET_TYPE Netlink attribute. */
+        /* Convert PACKET_TYPE Netlink attribute. */
         ovs_assert(NLA_ALIGN(packet_type->nla_len) == NL_A_U32_SIZE);
+        ovs_be32 value = nl_attr_get_be32(packet_type);
         size_t packet_type_len = NL_A_U32_SIZE;
         size_t first_chunk_size = (uint8_t *)packet_type - (uint8_t *)data;
         size_t second_chunk_size = data_len - first_chunk_size
@@ -3455,10 +3456,31 @@  put_exclude_packet_type(struct ofpbuf *buf, uint16_t type,
         uint8_t *first_attr = NULL;
         struct nlattr *next_attr = nl_attr_next(packet_type);
 
+        bool ethernet_present = nl_attr_find__(data, data_len,
+                                               OVS_KEY_ATTR_ETHERNET);
+
         first_attr = nl_msg_put_unspec_uninit(buf, type,
                                               data_len - packet_type_len);
         memcpy(first_attr, data, first_chunk_size);
         memcpy(first_attr + first_chunk_size, next_attr, second_chunk_size);
+
+        /* Put OVS_KEY_ATTR_ETHERTYPE if needed. */
+        if (ntohl(value) == PT_ETH) {
+            ovs_assert(ethernet_present);
+        } else {
+            const struct nlattr *eth_type;
+            ovs_be16 ns_type = pt_ns_type_be(value);
+
+            ovs_assert(ethernet_present == false);
+
+            eth_type = nl_attr_find__(data, data_len, OVS_KEY_ATTR_ETHERTYPE);
+            if (eth_type) {
+                ovs_assert(nl_attr_get_be16(eth_type) == ns_type);
+            } else {
+                nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, ns_type);
+            }
+        }
+
     } else {
         nl_msg_put_unspec(buf, type, data, data_len);
     }
@@ -3489,11 +3511,11 @@  dpif_netlink_flow_to_ofpbuf(const struct dpif_netlink_flow *flow,
     }
     if (!flow->ufid_terse || !flow->ufid_present) {
         if (flow->key_len) {
-            put_exclude_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
+            put_convert_packet_type(buf, OVS_FLOW_ATTR_KEY, flow->key,
                                            flow->key_len);
         }
         if (flow->mask_len) {
-            put_exclude_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
+            put_convert_packet_type(buf, OVS_FLOW_ATTR_MASK, flow->mask,
                                            flow->mask_len);
         }
         if (flow->actions || flow->actions_len) {