[v2] Change in Openvswitch to support MPLS label depth of 3 in ingress direction
diff mbox series

Message ID 1571580702-18476-1-git-send-email-martinvarghesenokia@gmail.com
State Changes Requested
Delegated to: David Miller
Headers show
Series
  • [v2] Change in Openvswitch to support MPLS label depth of 3 in ingress direction
Related show

Commit Message

Martin Varghese Oct. 20, 2019, 2:11 p.m. UTC
From: Martin Varghese <martin.varghese@nokia.com>

The openvswitch was supporting a MPLS label depth of 1 in the ingress
direction though the userspace OVS supports a max depth of 3 labels.
This change enables openvswitch module to support a max depth of
3 labels in the ingress.

Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
---
Changes in v2
   - Moved MPLS count validation from datapath to configuration.
   - Fixed set mpls function.

 net/openvswitch/actions.c      |  2 +-
 net/openvswitch/flow.c         | 20 ++++++++++-----
 net/openvswitch/flow.h         |  9 ++++---
 net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
 4 files changed, 66 insertions(+), 22 deletions(-)

Comments

Pravin Shelar Oct. 22, 2019, 7:03 a.m. UTC | #1
On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese
<martinvarghesenokia@gmail.com> wrote:
>
> From: Martin Varghese <martin.varghese@nokia.com>
>
> The openvswitch was supporting a MPLS label depth of 1 in the ingress
> direction though the userspace OVS supports a max depth of 3 labels.
> This change enables openvswitch module to support a max depth of
> 3 labels in the ingress.
>
> Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> ---
> Changes in v2
>    - Moved MPLS count validation from datapath to configuration.
>    - Fixed set mpls function.
>
This patch looks pretty close now.

>  net/openvswitch/actions.c      |  2 +-
>  net/openvswitch/flow.c         | 20 ++++++++++-----
>  net/openvswitch/flow.h         |  9 ++++---
>  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
>  4 files changed, 66 insertions(+), 22 deletions(-)
>
...
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index d7559c6..21de061 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
>         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
>         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
>                                      .next = ovs_tunnel_key_lens, },
> -       [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
> +       [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
>         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
>         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
>         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
> @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
>
>         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
>                 const struct ovs_key_mpls *mpls_key;
> +               u32 hdr_len;
> +               u32 label_count, label_count_mask, i;
>
>                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> -               SW_FLOW_KEY_PUT(match, mpls.top_lse,
> -                               mpls_key->mpls_lse, is_mask);
> +               hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> +               label_count = hdr_len / sizeof(struct ovs_key_mpls);
> +
> +               if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> +                   hdr_len % sizeof(struct ovs_key_mpls))
> +                       return -EINVAL;
> +
> +               label_count_mask =  GENMASK(label_count - 1, 0);
> +
> +               for (i = 0 ; i < label_count; i++)
> +                       SW_FLOW_KEY_PUT(match, mpls.lse[i],
> +                                       mpls_key[i].mpls_lse, is_mask);
> +
> +               SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> +                               label_count_mask, is_mask);
>
>                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
>          }
> @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
>                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
>                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
>         } else if (eth_p_mpls(swkey->eth.type)) {
> +               u8 i, num_labels;
>                 struct ovs_key_mpls *mpls_key;
>
> -               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> +               num_labels = hweight_long(output->mpls.num_labels_mask);
> +               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> +                                 num_labels * sizeof(*mpls_key));
>                 if (!nla)
>                         goto nla_put_failure;
> +
>                 mpls_key = nla_data(nla);
> -               mpls_key->mpls_lse = output->mpls.top_lse;
> +               for (i = 0; i < num_labels; i++)
> +                       mpls_key[i].mpls_lse = output->mpls.lse[i];
>         }
>
>         if ((swkey->eth.type == htons(ETH_P_IP) ||
> @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>         u8 mac_proto = ovs_key_mac_proto(key);
>         const struct nlattr *a;
>         int rem, err;
> +       u32 mpls_label_count = 0;
> +
> +       if (eth_p_mpls(eth_type))
> +               mpls_label_count = hweight_long(key->mpls.num_labels_mask);
>
The MPLS push and pop action could be part of nested actions in
sample, so the count needs to be global count across such nested
actions. have a look at validate_and_copy_sample().

>         nla_for_each_nested(a, attr, rem) {
>                 /* Expected argument lengths, (u32)-1 for variable length. */
> @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>                              !eth_p_mpls(eth_type)))
>                                 return -EINVAL;
>                         eth_type = mpls->mpls_ethertype;
> +                       mpls_label_count++;
>                         break;
>                 }
>
> -               case OVS_ACTION_ATTR_POP_MPLS:
> +               case OVS_ACTION_ATTR_POP_MPLS: {
> +                       __be16  proto;
>                         if (vlan_tci & htons(VLAN_CFI_MASK) ||
>                             !eth_p_mpls(eth_type))
>                                 return -EINVAL;
>
> -                       /* Disallow subsequent L2.5+ set and mpls_pop actions
> -                        * as there is no check here to ensure that the new
> -                        * eth_type is valid and thus set actions could
> -                        * write off the end of the packet or otherwise
> -                        * corrupt it.
> +                       /* Disallow subsequent L2.5+ set actions as there is
> +                        * no check here to ensure that the new eth type is
> +                        * valid and thus set actions could write off the
> +                        * end of the packet or otherwise corrupt it.
>                          *
>                          * Support for these actions is planned using packet
>                          * recirculation.
>                          */
This comment needs updated.


> -                       eth_type = htons(0);
> +                       proto = nla_get_be16(a);
> +                       mpls_label_count--;
> +
> +                       if (!eth_p_mpls(proto) || !mpls_label_count)
> +                               eth_type = htons(0);
> +                       else
> +                               eth_type =  proto;
> +
>                         break;
> +               }
>
>                 case OVS_ACTION_ATTR_SET:
>                         err = validate_set(a, key, sfa,
> --
> 1.8.3.1
>
Martin Varghese Oct. 22, 2019, 12:51 p.m. UTC | #2
On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote:
> On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese
> <martinvarghesenokia@gmail.com> wrote:
> >
> > From: Martin Varghese <martin.varghese@nokia.com>
> >
> > The openvswitch was supporting a MPLS label depth of 1 in the ingress
> > direction though the userspace OVS supports a max depth of 3 labels.
> > This change enables openvswitch module to support a max depth of
> > 3 labels in the ingress.
> >
> > Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> > ---
> > Changes in v2
> >    - Moved MPLS count validation from datapath to configuration.
> >    - Fixed set mpls function.
> >
> This patch looks pretty close now.
> 
> >  net/openvswitch/actions.c      |  2 +-
> >  net/openvswitch/flow.c         | 20 ++++++++++-----
> >  net/openvswitch/flow.h         |  9 ++++---
> >  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
> >  4 files changed, 66 insertions(+), 22 deletions(-)
> >
> ...
> > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > index d7559c6..21de061 100644
> > --- a/net/openvswitch/flow_netlink.c
> > +++ b/net/openvswitch/flow_netlink.c
> > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
> >         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
> >         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
> >                                      .next = ovs_tunnel_key_lens, },
> > -       [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
> > +       [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
> >         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
> >         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
> >         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
> > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
> >
> >         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
> >                 const struct ovs_key_mpls *mpls_key;
> > +               u32 hdr_len;
> > +               u32 label_count, label_count_mask, i;
> >
> >                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> > -               SW_FLOW_KEY_PUT(match, mpls.top_lse,
> > -                               mpls_key->mpls_lse, is_mask);
> > +               hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> > +               label_count = hdr_len / sizeof(struct ovs_key_mpls);
> > +
> > +               if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> > +                   hdr_len % sizeof(struct ovs_key_mpls))
> > +                       return -EINVAL;
> > +
> > +               label_count_mask =  GENMASK(label_count - 1, 0);
> > +
> > +               for (i = 0 ; i < label_count; i++)
> > +                       SW_FLOW_KEY_PUT(match, mpls.lse[i],
> > +                                       mpls_key[i].mpls_lse, is_mask);
> > +
> > +               SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> > +                               label_count_mask, is_mask);
> >
> >                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> >          }
> > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> >                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
> >                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
> >         } else if (eth_p_mpls(swkey->eth.type)) {
> > +               u8 i, num_labels;
> >                 struct ovs_key_mpls *mpls_key;
> >
> > -               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> > +               num_labels = hweight_long(output->mpls.num_labels_mask);
> > +               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> > +                                 num_labels * sizeof(*mpls_key));
> >                 if (!nla)
> >                         goto nla_put_failure;
> > +
> >                 mpls_key = nla_data(nla);
> > -               mpls_key->mpls_lse = output->mpls.top_lse;
> > +               for (i = 0; i < num_labels; i++)
> > +                       mpls_key[i].mpls_lse = output->mpls.lse[i];
> >         }
> >
> >         if ((swkey->eth.type == htons(ETH_P_IP) ||
> > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >         u8 mac_proto = ovs_key_mac_proto(key);
> >         const struct nlattr *a;
> >         int rem, err;
> > +       u32 mpls_label_count = 0;
> > +
> > +       if (eth_p_mpls(eth_type))
> > +               mpls_label_count = hweight_long(key->mpls.num_labels_mask);
> >
> The MPLS push and pop action could be part of nested actions in
> sample, so the count needs to be global count across such nested
> actions. have a look at validate_and_copy_sample().
> 

can we embed the mpls_label_count in struct sw_flow_actions? 

> >         nla_for_each_nested(a, attr, rem) {
> >                 /* Expected argument lengths, (u32)-1 for variable length. */
> > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >                              !eth_p_mpls(eth_type)))
> >                                 return -EINVAL;
> >                         eth_type = mpls->mpls_ethertype;
> > +                       mpls_label_count++;
> >                         break;
> >                 }
> >
> > -               case OVS_ACTION_ATTR_POP_MPLS:
> > +               case OVS_ACTION_ATTR_POP_MPLS: {
> > +                       __be16  proto;
> >                         if (vlan_tci & htons(VLAN_CFI_MASK) ||
> >                             !eth_p_mpls(eth_type))
> >                                 return -EINVAL;
> >
> > -                       /* Disallow subsequent L2.5+ set and mpls_pop actions
> > -                        * as there is no check here to ensure that the new
> > -                        * eth_type is valid and thus set actions could
> > -                        * write off the end of the packet or otherwise
> > -                        * corrupt it.
> > +                       /* Disallow subsequent L2.5+ set actions as there is
> > +                        * no check here to ensure that the new eth type is
> > +                        * valid and thus set actions could write off the
> > +                        * end of the packet or otherwise corrupt it.
> >                          *
> >                          * Support for these actions is planned using packet
> >                          * recirculation.
> >                          */
> This comment needs updated.
> 
> 

Disallow subsequent L2.5+ set and mpls_pop actions
once the last MPLS label in the packet is popped
as there is no check here to ensure that the new
eth_type is valid and thus set actions could
write off the end of the packet or otherwise
corrupt it.
 
Support for these actions is planned using packet
recirculation.
> > -                       eth_type = htons(0);
> > +                       proto = nla_get_be16(a);
> > +                       mpls_label_count--;
> > +
> > +                       if (!eth_p_mpls(proto) || !mpls_label_count)
> > +                               eth_type = htons(0);
> > +                       else
> > +                               eth_type =  proto;
> > +
> >                         break;
> > +               }
> >
> >                 case OVS_ACTION_ATTR_SET:
> >                         err = validate_set(a, key, sfa,
> > --
> > 1.8.3.1
> >
Martin Varghese Oct. 22, 2019, 3:29 p.m. UTC | #3
On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote:
> On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese
> <martinvarghesenokia@gmail.com> wrote:
> >
> > From: Martin Varghese <martin.varghese@nokia.com>
> >
> > The openvswitch was supporting a MPLS label depth of 1 in the ingress
> > direction though the userspace OVS supports a max depth of 3 labels.
> > This change enables openvswitch module to support a max depth of
> > 3 labels in the ingress.
> >
> > Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> > ---
> > Changes in v2
> >    - Moved MPLS count validation from datapath to configuration.
> >    - Fixed set mpls function.
> >
> This patch looks pretty close now.
> 
> >  net/openvswitch/actions.c      |  2 +-
> >  net/openvswitch/flow.c         | 20 ++++++++++-----
> >  net/openvswitch/flow.h         |  9 ++++---
> >  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
> >  4 files changed, 66 insertions(+), 22 deletions(-)
> >
> ...
> > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > index d7559c6..21de061 100644
> > --- a/net/openvswitch/flow_netlink.c
> > +++ b/net/openvswitch/flow_netlink.c
> > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
> >         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
> >         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
> >                                      .next = ovs_tunnel_key_lens, },
> > -       [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
> > +       [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
> >         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
> >         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
> >         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
> > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
> >
> >         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
> >                 const struct ovs_key_mpls *mpls_key;
> > +               u32 hdr_len;
> > +               u32 label_count, label_count_mask, i;
> >
> >                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> > -               SW_FLOW_KEY_PUT(match, mpls.top_lse,
> > -                               mpls_key->mpls_lse, is_mask);
> > +               hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> > +               label_count = hdr_len / sizeof(struct ovs_key_mpls);
> > +
> > +               if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> > +                   hdr_len % sizeof(struct ovs_key_mpls))
> > +                       return -EINVAL;
> > +
> > +               label_count_mask =  GENMASK(label_count - 1, 0);
> > +
> > +               for (i = 0 ; i < label_count; i++)
> > +                       SW_FLOW_KEY_PUT(match, mpls.lse[i],
> > +                                       mpls_key[i].mpls_lse, is_mask);
> > +
> > +               SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> > +                               label_count_mask, is_mask);
> >
> >                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> >          }
> > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> >                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
> >                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
> >         } else if (eth_p_mpls(swkey->eth.type)) {
> > +               u8 i, num_labels;
> >                 struct ovs_key_mpls *mpls_key;
> >
> > -               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> > +               num_labels = hweight_long(output->mpls.num_labels_mask);
> > +               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> > +                                 num_labels * sizeof(*mpls_key));
> >                 if (!nla)
> >                         goto nla_put_failure;
> > +
> >                 mpls_key = nla_data(nla);
> > -               mpls_key->mpls_lse = output->mpls.top_lse;
> > +               for (i = 0; i < num_labels; i++)
> > +                       mpls_key[i].mpls_lse = output->mpls.lse[i];
> >         }
> >
> >         if ((swkey->eth.type == htons(ETH_P_IP) ||
> > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >         u8 mac_proto = ovs_key_mac_proto(key);
> >         const struct nlattr *a;
> >         int rem, err;
> > +       u32 mpls_label_count = 0;
> > +
> > +       if (eth_p_mpls(eth_type))
> > +               mpls_label_count = hweight_long(key->mpls.num_labels_mask);
> >
> The MPLS push and pop action could be part of nested actions in
> sample, so the count needs to be global count across such nested
> actions. have a look at validate_and_copy_sample().
>
Embedding mpls_label_count in struct sw_flow_actions will not work for clone

I guess we need to move the below code to ovs_nla_copy_actions and extend the  arguments of __ovs_nla_copy_actions to take mpls_label_count also
if (eth_p_mpls(eth_type))
                mpls_label_count = hweight_long(key->mpls.num_labels_mask)

 
> >         nla_for_each_nested(a, attr, rem) {
> >                 /* Expected argument lengths, (u32)-1 for variable length. */
> > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >                              !eth_p_mpls(eth_type)))
> >                                 return -EINVAL;
> >                         eth_type = mpls->mpls_ethertype;
> > +                       mpls_label_count++;
> >                         break;
> >                 }
> >
> > -               case OVS_ACTION_ATTR_POP_MPLS:
> > +               case OVS_ACTION_ATTR_POP_MPLS: {
> > +                       __be16  proto;
> >                         if (vlan_tci & htons(VLAN_CFI_MASK) ||
> >                             !eth_p_mpls(eth_type))
> >                                 return -EINVAL;
> >
> > -                       /* Disallow subsequent L2.5+ set and mpls_pop actions
> > -                        * as there is no check here to ensure that the new
> > -                        * eth_type is valid and thus set actions could
> > -                        * write off the end of the packet or otherwise
> > -                        * corrupt it.
> > +                       /* Disallow subsequent L2.5+ set actions as there is
> > +                        * no check here to ensure that the new eth type is
> > +                        * valid and thus set actions could write off the
> > +                        * end of the packet or otherwise corrupt it.
> >                          *
> >                          * Support for these actions is planned using packet
> >                          * recirculation.
> >                          */
> This comment needs updated.
> 
> 
> > -                       eth_type = htons(0);
> > +                       proto = nla_get_be16(a);
> > +                       mpls_label_count--;
> > +
> > +                       if (!eth_p_mpls(proto) || !mpls_label_count)
> > +                               eth_type = htons(0);
> > +                       else
> > +                               eth_type =  proto;
> > +
> >                         break;
> > +               }
> >
> >                 case OVS_ACTION_ATTR_SET:
> >                         err = validate_set(a, key, sfa,
> > --
> > 1.8.3.1
> >
Pravin Shelar Oct. 23, 2019, 3:59 a.m. UTC | #4
On Tue, Oct 22, 2019 at 8:29 AM Martin Varghese
<martinvarghesenokia@gmail.com> wrote:
>
> On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote:
> > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese
> > <martinvarghesenokia@gmail.com> wrote:
> > >
> > > From: Martin Varghese <martin.varghese@nokia.com>
> > >
> > > The openvswitch was supporting a MPLS label depth of 1 in the ingress
> > > direction though the userspace OVS supports a max depth of 3 labels.
> > > This change enables openvswitch module to support a max depth of
> > > 3 labels in the ingress.
> > >
> > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> > > ---
> > > Changes in v2
> > >    - Moved MPLS count validation from datapath to configuration.
> > >    - Fixed set mpls function.
> > >
> > This patch looks pretty close now.
> >
> > >  net/openvswitch/actions.c      |  2 +-
> > >  net/openvswitch/flow.c         | 20 ++++++++++-----
> > >  net/openvswitch/flow.h         |  9 ++++---
> > >  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
> > >  4 files changed, 66 insertions(+), 22 deletions(-)
> > >
> > ...
> > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > > index d7559c6..21de061 100644
> > > --- a/net/openvswitch/flow_netlink.c
> > > +++ b/net/openvswitch/flow_netlink.c
> > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
> > >         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
> > >         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
> > >                                      .next = ovs_tunnel_key_lens, },
> > > -       [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
> > > +       [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
> > >         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
> > >         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
> > >         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
> > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
> > >
> > >         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
> > >                 const struct ovs_key_mpls *mpls_key;
> > > +               u32 hdr_len;
> > > +               u32 label_count, label_count_mask, i;
> > >
> > >                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> > > -               SW_FLOW_KEY_PUT(match, mpls.top_lse,
> > > -                               mpls_key->mpls_lse, is_mask);
> > > +               hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> > > +               label_count = hdr_len / sizeof(struct ovs_key_mpls);
> > > +
> > > +               if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> > > +                   hdr_len % sizeof(struct ovs_key_mpls))
> > > +                       return -EINVAL;
> > > +
> > > +               label_count_mask =  GENMASK(label_count - 1, 0);
> > > +
> > > +               for (i = 0 ; i < label_count; i++)
> > > +                       SW_FLOW_KEY_PUT(match, mpls.lse[i],
> > > +                                       mpls_key[i].mpls_lse, is_mask);
> > > +
> > > +               SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> > > +                               label_count_mask, is_mask);
> > >
> > >                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> > >          }
> > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> > >                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
> > >                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
> > >         } else if (eth_p_mpls(swkey->eth.type)) {
> > > +               u8 i, num_labels;
> > >                 struct ovs_key_mpls *mpls_key;
> > >
> > > -               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> > > +               num_labels = hweight_long(output->mpls.num_labels_mask);
> > > +               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> > > +                                 num_labels * sizeof(*mpls_key));
> > >                 if (!nla)
> > >                         goto nla_put_failure;
> > > +
> > >                 mpls_key = nla_data(nla);
> > > -               mpls_key->mpls_lse = output->mpls.top_lse;
> > > +               for (i = 0; i < num_labels; i++)
> > > +                       mpls_key[i].mpls_lse = output->mpls.lse[i];
> > >         }
> > >
> > >         if ((swkey->eth.type == htons(ETH_P_IP) ||
> > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> > >         u8 mac_proto = ovs_key_mac_proto(key);
> > >         const struct nlattr *a;
> > >         int rem, err;
> > > +       u32 mpls_label_count = 0;
> > > +
> > > +       if (eth_p_mpls(eth_type))
> > > +               mpls_label_count = hweight_long(key->mpls.num_labels_mask);
> > >
> > The MPLS push and pop action could be part of nested actions in
> > sample, so the count needs to be global count across such nested
> > actions. have a look at validate_and_copy_sample().
> >
> Embedding mpls_label_count in struct sw_flow_actions will not work for clone
>
> I guess we need to move the below code to ovs_nla_copy_actions and extend the  arguments of __ovs_nla_copy_actions to take mpls_label_count also
> if (eth_p_mpls(eth_type))
>                 mpls_label_count = hweight_long(key->mpls.num_labels_mask)
>
>
I am not suggesting changing sw_flow_actions, You can define count
variable in ovs_nla_copy_actions() and pass it as a pointer to nested
function. That can be used to keep track of MPLS labels at all nested
actions.
Martin Varghese Oct. 23, 2019, 5:04 a.m. UTC | #5
On Tue, Oct 22, 2019 at 08:59:46PM -0700, Pravin Shelar wrote:
> On Tue, Oct 22, 2019 at 8:29 AM Martin Varghese
> <martinvarghesenokia@gmail.com> wrote:
> >
> > On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote:
> > > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese
> > > <martinvarghesenokia@gmail.com> wrote:
> > > >
> > > > From: Martin Varghese <martin.varghese@nokia.com>
> > > >
> > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress
> > > > direction though the userspace OVS supports a max depth of 3 labels.
> > > > This change enables openvswitch module to support a max depth of
> > > > 3 labels in the ingress.
> > > >
> > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> > > > ---
> > > > Changes in v2
> > > >    - Moved MPLS count validation from datapath to configuration.
> > > >    - Fixed set mpls function.
> > > >
> > > This patch looks pretty close now.
> > >
> > > >  net/openvswitch/actions.c      |  2 +-
> > > >  net/openvswitch/flow.c         | 20 ++++++++++-----
> > > >  net/openvswitch/flow.h         |  9 ++++---
> > > >  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
> > > >  4 files changed, 66 insertions(+), 22 deletions(-)
> > > >
> > > ...
> > > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > > > index d7559c6..21de061 100644
> > > > --- a/net/openvswitch/flow_netlink.c
> > > > +++ b/net/openvswitch/flow_netlink.c
> > > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
> > > >         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
> > > >         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
> > > >                                      .next = ovs_tunnel_key_lens, },
> > > > -       [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
> > > > +       [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
> > > >         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
> > > >         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
> > > >         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
> > > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
> > > >
> > > >         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
> > > >                 const struct ovs_key_mpls *mpls_key;
> > > > +               u32 hdr_len;
> > > > +               u32 label_count, label_count_mask, i;
> > > >
> > > >                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> > > > -               SW_FLOW_KEY_PUT(match, mpls.top_lse,
> > > > -                               mpls_key->mpls_lse, is_mask);
> > > > +               hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> > > > +               label_count = hdr_len / sizeof(struct ovs_key_mpls);
> > > > +
> > > > +               if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> > > > +                   hdr_len % sizeof(struct ovs_key_mpls))
> > > > +                       return -EINVAL;
> > > > +
> > > > +               label_count_mask =  GENMASK(label_count - 1, 0);
> > > > +
> > > > +               for (i = 0 ; i < label_count; i++)
> > > > +                       SW_FLOW_KEY_PUT(match, mpls.lse[i],
> > > > +                                       mpls_key[i].mpls_lse, is_mask);
> > > > +
> > > > +               SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> > > > +                               label_count_mask, is_mask);
> > > >
> > > >                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> > > >          }
> > > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> > > >                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
> > > >                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
> > > >         } else if (eth_p_mpls(swkey->eth.type)) {
> > > > +               u8 i, num_labels;
> > > >                 struct ovs_key_mpls *mpls_key;
> > > >
> > > > -               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> > > > +               num_labels = hweight_long(output->mpls.num_labels_mask);
> > > > +               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> > > > +                                 num_labels * sizeof(*mpls_key));
> > > >                 if (!nla)
> > > >                         goto nla_put_failure;
> > > > +
> > > >                 mpls_key = nla_data(nla);
> > > > -               mpls_key->mpls_lse = output->mpls.top_lse;
> > > > +               for (i = 0; i < num_labels; i++)
> > > > +                       mpls_key[i].mpls_lse = output->mpls.lse[i];
> > > >         }
> > > >
> > > >         if ((swkey->eth.type == htons(ETH_P_IP) ||
> > > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> > > >         u8 mac_proto = ovs_key_mac_proto(key);
> > > >         const struct nlattr *a;
> > > >         int rem, err;
> > > > +       u32 mpls_label_count = 0;
> > > > +
> > > > +       if (eth_p_mpls(eth_type))
> > > > +               mpls_label_count = hweight_long(key->mpls.num_labels_mask);
> > > >
> > > The MPLS push and pop action could be part of nested actions in
> > > sample, so the count needs to be global count across such nested
> > > actions. have a look at validate_and_copy_sample().
> > >
> > Embedding mpls_label_count in struct sw_flow_actions will not work for clone
> >
> > I guess we need to move the below code to ovs_nla_copy_actions and extend the  arguments of __ovs_nla_copy_actions to take mpls_label_count also
> > if (eth_p_mpls(eth_type))
> >                 mpls_label_count = hweight_long(key->mpls.num_labels_mask)
> >
> >
> I am not suggesting changing sw_flow_actions, You can define count
> variable in ovs_nla_copy_actions() and pass it as a pointer to nested
> function. That can be used to keep track of MPLS labels at all nested
> actions.

Actions clone & sample does a clone of SKB correct?
Hence shouldn't they maintain a seperate mpls_label count for each nested action set
I assume instead of passing as pointer from ovs_nla_copy_actions ,if passed by value it should 
solve the problem.Need to try that though.
William Tu Oct. 24, 2019, 8:47 p.m. UTC | #6
On Sun, Oct 20, 2019 at 07:41:42PM +0530, Martin Varghese wrote:
> From: Martin Varghese <martin.varghese@nokia.com>
> 
> The openvswitch was supporting a MPLS label depth of 1 in the ingress
> direction though the userspace OVS supports a max depth of 3 labels.
> This change enables openvswitch module to support a max depth of
> 3 labels in the ingress.
> 

Hi Martin,
Thanks for the patch. I have one comment below.

> Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> ---
> Changes in v2
>    - Moved MPLS count validation from datapath to configuration.
>    - Fixed set mpls function.
> 
>  net/openvswitch/actions.c      |  2 +-
>  net/openvswitch/flow.c         | 20 ++++++++++-----
>  net/openvswitch/flow.h         |  9 ++++---
>  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
>  4 files changed, 66 insertions(+), 22 deletions(-)
> 
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 3572e11..f3125d7 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -199,7 +199,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
>  	if (err)
>  		return err;
>  
> -	flow_key->mpls.top_lse = lse;
> +	flow_key->mpls.lse[0] = lse;
>  	return 0;
>  }
>  
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index dca3b1e..c101355 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -699,27 +699,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
>  			memset(&key->ipv4, 0, sizeof(key->ipv4));
>  		}
>  	} else if (eth_p_mpls(key->eth.type)) {
> -		size_t stack_len = MPLS_HLEN;
> +		u8 label_count = 1;
>  
> +		memset(&key->mpls, 0, sizeof(key->mpls));
>  		skb_set_inner_network_header(skb, skb->mac_len);
>  		while (1) {
>  			__be32 lse;
>  
> -			error = check_header(skb, skb->mac_len + stack_len);
> +			error = check_header(skb, skb->mac_len +
> +					     label_count * MPLS_HLEN);
>  			if (unlikely(error))
>  				return 0;
>  
>  			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
>  
> -			if (stack_len == MPLS_HLEN)
> -				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
> +			if (label_count <= MPLS_LABEL_DEPTH)
> +				memcpy(&key->mpls.lse[label_count - 1], &lse,
> +				       MPLS_HLEN);
>  
> -			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
> +			skb_set_inner_network_header(skb, skb->mac_len +
> +						     label_count * MPLS_HLEN);
>  			if (lse & htonl(MPLS_LS_S_MASK))
>  				break;
>  
> -			stack_len += MPLS_HLEN;
> +			label_count++;
>  		}
> +		if (label_count > MPLS_LABEL_DEPTH)
> +			label_count = MPLS_LABEL_DEPTH;
> +
> +		key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);

>  	} else if (key->eth.type == htons(ETH_P_IPV6)) {
>  		int nh_len;             /* IPv6 Header + Extensions */
>  
> diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> index 3e2cc22..d9eccbe 100644
> --- a/net/openvswitch/flow.h
> +++ b/net/openvswitch/flow.h
> @@ -30,6 +30,7 @@ enum sw_flow_mac_proto {
>  	MAC_PROTO_ETHERNET,
>  };
>  #define SW_FLOW_KEY_INVALID	0x80
> +#define MPLS_LABEL_DEPTH       3
>  
>  /* Store options at the end of the array if they are less than the
>   * maximum size. This allows us to get the benefits of variable length
> @@ -85,9 +86,6 @@ struct sw_flow_key {
>  					 */
>  	union {
>  		struct {
> -			__be32 top_lse;	/* top label stack entry */
> -		} mpls;
> -		struct {
>  			u8     proto;	/* IP protocol or lower 8 bits of ARP opcode. */
>  			u8     tos;	    /* IP ToS. */
>  			u8     ttl;	    /* IP TTL/hop limit. */
> @@ -135,6 +133,11 @@ struct sw_flow_key {
>  				} nd;
>  			};
>  		} ipv6;
> +		struct {
> +			u32 num_labels_mask;    /* labels present bitmap of effective length MPLS_LABEL_DEPTH */

Why using a bitmap here? why not just num_labels?
I saw that you have to convert it using hweight_long()
to num_labels a couple places below.

Regards,
William

> +			__be32 lse[MPLS_LABEL_DEPTH];     /* label stack entry  */
> +		} mpls;
> +
>  		struct ovs_key_nsh nsh;         /* network service header */
>  	};
>  	struct {
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index d7559c6..21de061 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
>  	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
>  	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
>  				     .next = ovs_tunnel_key_lens, },
> -	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
> +	[OVS_KEY_ATTR_MPLS]	 = { .len = OVS_ATTR_VARIABLE },
>  	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },
>  	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
>  	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
> @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
>  
>  	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
>  		const struct ovs_key_mpls *mpls_key;
> +		u32 hdr_len;
> +		u32 label_count, label_count_mask, i;
>  
>  		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> -		SW_FLOW_KEY_PUT(match, mpls.top_lse,
> -				mpls_key->mpls_lse, is_mask);
> +		hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> +		label_count = hdr_len / sizeof(struct ovs_key_mpls);
> +
> +		if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> +		    hdr_len % sizeof(struct ovs_key_mpls))
> +			return -EINVAL;
> +
> +		label_count_mask =  GENMASK(label_count - 1, 0);
> +
> +		for (i = 0 ; i < label_count; i++)
> +			SW_FLOW_KEY_PUT(match, mpls.lse[i],
> +					mpls_key[i].mpls_lse, is_mask);
> +
> +		SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> +				label_count_mask, is_mask);
>  
>  		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
>  	 }
> @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
>  		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
>  		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
>  	} else if (eth_p_mpls(swkey->eth.type)) {
> +		u8 i, num_labels;
>  		struct ovs_key_mpls *mpls_key;
>  
> -		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> +		num_labels = hweight_long(output->mpls.num_labels_mask);
> +		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> +				  num_labels * sizeof(*mpls_key));
>  		if (!nla)
>  			goto nla_put_failure;
> +
>  		mpls_key = nla_data(nla);
> -		mpls_key->mpls_lse = output->mpls.top_lse;
> +		for (i = 0; i < num_labels; i++)
> +			mpls_key[i].mpls_lse = output->mpls.lse[i];
>  	}
>  
>  	if ((swkey->eth.type == htons(ETH_P_IP) ||
> @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  	u8 mac_proto = ovs_key_mac_proto(key);
>  	const struct nlattr *a;
>  	int rem, err;
> +	u32 mpls_label_count = 0;
> +
> +	if (eth_p_mpls(eth_type))
> +		mpls_label_count = hweight_long(key->mpls.num_labels_mask);
>  
>  	nla_for_each_nested(a, attr, rem) {
>  		/* Expected argument lengths, (u32)-1 for variable length. */
> @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  			     !eth_p_mpls(eth_type)))
>  				return -EINVAL;
>  			eth_type = mpls->mpls_ethertype;
> +			mpls_label_count++;
>  			break;
>  		}
>  
> -		case OVS_ACTION_ATTR_POP_MPLS:
> +		case OVS_ACTION_ATTR_POP_MPLS: {
> +			__be16  proto;
>  			if (vlan_tci & htons(VLAN_CFI_MASK) ||
>  			    !eth_p_mpls(eth_type))
>  				return -EINVAL;
>  
> -			/* Disallow subsequent L2.5+ set and mpls_pop actions
> -			 * as there is no check here to ensure that the new
> -			 * eth_type is valid and thus set actions could
> -			 * write off the end of the packet or otherwise
> -			 * corrupt it.
> +			/* Disallow subsequent L2.5+ set actions as there is
> +			 * no check here to ensure that the new eth type is
> +			 * valid and thus set actions could write off the
> +			 * end of the packet or otherwise corrupt it.
>  			 *
>  			 * Support for these actions is planned using packet
>  			 * recirculation.
>  			 */
> -			eth_type = htons(0);
> +			proto = nla_get_be16(a);
> +			mpls_label_count--;
> +
> +			if (!eth_p_mpls(proto) || !mpls_label_count)
> +				eth_type = htons(0);
> +			else
> +				eth_type =  proto;
> +
>  			break;
> +		}
>  
>  		case OVS_ACTION_ATTR_SET:
>  			err = validate_set(a, key, sfa,
> -- 
> 1.8.3.1
>
Martin Varghese Oct. 25, 2019, 2:34 a.m. UTC | #7
On Thu, Oct 24, 2019 at 01:47:40PM -0700, William Tu wrote:
> On Sun, Oct 20, 2019 at 07:41:42PM +0530, Martin Varghese wrote:
> > From: Martin Varghese <martin.varghese@nokia.com>
> > 
> > The openvswitch was supporting a MPLS label depth of 1 in the ingress
> > direction though the userspace OVS supports a max depth of 3 labels.
> > This change enables openvswitch module to support a max depth of
> > 3 labels in the ingress.
> > 
> 
> Hi Martin,
> Thanks for the patch. I have one comment below.
> 
> > Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> > ---
> > Changes in v2
> >    - Moved MPLS count validation from datapath to configuration.
> >    - Fixed set mpls function.
> > 
> >  net/openvswitch/actions.c      |  2 +-
> >  net/openvswitch/flow.c         | 20 ++++++++++-----
> >  net/openvswitch/flow.h         |  9 ++++---
> >  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
> >  4 files changed, 66 insertions(+), 22 deletions(-)
> > 
> > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> > index 3572e11..f3125d7 100644
> > --- a/net/openvswitch/actions.c
> > +++ b/net/openvswitch/actions.c
> > @@ -199,7 +199,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
> >  	if (err)
> >  		return err;
> >  
> > -	flow_key->mpls.top_lse = lse;
> > +	flow_key->mpls.lse[0] = lse;
> >  	return 0;
> >  }
> >  
> > diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> > index dca3b1e..c101355 100644
> > --- a/net/openvswitch/flow.c
> > +++ b/net/openvswitch/flow.c
> > @@ -699,27 +699,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> >  			memset(&key->ipv4, 0, sizeof(key->ipv4));
> >  		}
> >  	} else if (eth_p_mpls(key->eth.type)) {
> > -		size_t stack_len = MPLS_HLEN;
> > +		u8 label_count = 1;
> >  
> > +		memset(&key->mpls, 0, sizeof(key->mpls));
> >  		skb_set_inner_network_header(skb, skb->mac_len);
> >  		while (1) {
> >  			__be32 lse;
> >  
> > -			error = check_header(skb, skb->mac_len + stack_len);
> > +			error = check_header(skb, skb->mac_len +
> > +					     label_count * MPLS_HLEN);
> >  			if (unlikely(error))
> >  				return 0;
> >  
> >  			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
> >  
> > -			if (stack_len == MPLS_HLEN)
> > -				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
> > +			if (label_count <= MPLS_LABEL_DEPTH)
> > +				memcpy(&key->mpls.lse[label_count - 1], &lse,
> > +				       MPLS_HLEN);
> >  
> > -			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
> > +			skb_set_inner_network_header(skb, skb->mac_len +
> > +						     label_count * MPLS_HLEN);
> >  			if (lse & htonl(MPLS_LS_S_MASK))
> >  				break;
> >  
> > -			stack_len += MPLS_HLEN;
> > +			label_count++;
> >  		}
> > +		if (label_count > MPLS_LABEL_DEPTH)
> > +			label_count = MPLS_LABEL_DEPTH;
> > +
> > +		key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
> 
> >  	} else if (key->eth.type == htons(ETH_P_IPV6)) {
> >  		int nh_len;             /* IPv6 Header + Extensions */
> >  
> > diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> > index 3e2cc22..d9eccbe 100644
> > --- a/net/openvswitch/flow.h
> > +++ b/net/openvswitch/flow.h
> > @@ -30,6 +30,7 @@ enum sw_flow_mac_proto {
> >  	MAC_PROTO_ETHERNET,
> >  };
> >  #define SW_FLOW_KEY_INVALID	0x80
> > +#define MPLS_LABEL_DEPTH       3
> >  
> >  /* Store options at the end of the array if they are less than the
> >   * maximum size. This allows us to get the benefits of variable length
> > @@ -85,9 +86,6 @@ struct sw_flow_key {
> >  					 */
> >  	union {
> >  		struct {
> > -			__be32 top_lse;	/* top label stack entry */
> > -		} mpls;
> > -		struct {
> >  			u8     proto;	/* IP protocol or lower 8 bits of ARP opcode. */
> >  			u8     tos;	    /* IP ToS. */
> >  			u8     ttl;	    /* IP TTL/hop limit. */
> > @@ -135,6 +133,11 @@ struct sw_flow_key {
> >  				} nd;
> >  			};
> >  		} ipv6;
> > +		struct {
> > +			u32 num_labels_mask;    /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
> 
> Why using a bitmap here? why not just num_labels?
> I saw that you have to convert it using hweight_long()
> to num_labels a couple places below.
>

num_labels will not work when used in flow_key for flow match.
Assume a case where a packet with 3 labels are received and the configured
flow has a match condition for the top most label only.Num_labels cannot be 
used in that case

My original patch was with num_labels.And we found that it will not work for
the above case. 
Jbenc@redhat.com proposed the idea of num_labels_mask.


Thanks for checking the patch.


 
> Regards,
> William
> 
> > +			__be32 lse[MPLS_LABEL_DEPTH];     /* label stack entry  */
> > +		} mpls;
> > +
> >  		struct ovs_key_nsh nsh;         /* network service header */
> >  	};
> >  	struct {
> > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > index d7559c6..21de061 100644
> > --- a/net/openvswitch/flow_netlink.c
> > +++ b/net/openvswitch/flow_netlink.c
> > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
> >  	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
> >  	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
> >  				     .next = ovs_tunnel_key_lens, },
> > -	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
> > +	[OVS_KEY_ATTR_MPLS]	 = { .len = OVS_ATTR_VARIABLE },
> >  	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },
> >  	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
> >  	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
> > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
> >  
> >  	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
> >  		const struct ovs_key_mpls *mpls_key;
> > +		u32 hdr_len;
> > +		u32 label_count, label_count_mask, i;
> >  
> >  		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> > -		SW_FLOW_KEY_PUT(match, mpls.top_lse,
> > -				mpls_key->mpls_lse, is_mask);
> > +		hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> > +		label_count = hdr_len / sizeof(struct ovs_key_mpls);
> > +
> > +		if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> > +		    hdr_len % sizeof(struct ovs_key_mpls))
> > +			return -EINVAL;
> > +
> > +		label_count_mask =  GENMASK(label_count - 1, 0);
> > +
> > +		for (i = 0 ; i < label_count; i++)
> > +			SW_FLOW_KEY_PUT(match, mpls.lse[i],
> > +					mpls_key[i].mpls_lse, is_mask);
> > +
> > +		SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> > +				label_count_mask, is_mask);
> >  
> >  		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> >  	 }
> > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> >  		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
> >  		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
> >  	} else if (eth_p_mpls(swkey->eth.type)) {
> > +		u8 i, num_labels;
> >  		struct ovs_key_mpls *mpls_key;
> >  
> > -		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> > +		num_labels = hweight_long(output->mpls.num_labels_mask);
> > +		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> > +				  num_labels * sizeof(*mpls_key));
> >  		if (!nla)
> >  			goto nla_put_failure;
> > +
> >  		mpls_key = nla_data(nla);
> > -		mpls_key->mpls_lse = output->mpls.top_lse;
> > +		for (i = 0; i < num_labels; i++)
> > +			mpls_key[i].mpls_lse = output->mpls.lse[i];
> >  	}
> >  
> >  	if ((swkey->eth.type == htons(ETH_P_IP) ||
> > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >  	u8 mac_proto = ovs_key_mac_proto(key);
> >  	const struct nlattr *a;
> >  	int rem, err;
> > +	u32 mpls_label_count = 0;
> > +
> > +	if (eth_p_mpls(eth_type))
> > +		mpls_label_count = hweight_long(key->mpls.num_labels_mask);
> >  
> >  	nla_for_each_nested(a, attr, rem) {
> >  		/* Expected argument lengths, (u32)-1 for variable length. */
> > @@ -3065,25 +3089,34 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >  			     !eth_p_mpls(eth_type)))
> >  				return -EINVAL;
> >  			eth_type = mpls->mpls_ethertype;
> > +			mpls_label_count++;
> >  			break;
> >  		}
> >  
> > -		case OVS_ACTION_ATTR_POP_MPLS:
> > +		case OVS_ACTION_ATTR_POP_MPLS: {
> > +			__be16  proto;
> >  			if (vlan_tci & htons(VLAN_CFI_MASK) ||
> >  			    !eth_p_mpls(eth_type))
> >  				return -EINVAL;
> >  
> > -			/* Disallow subsequent L2.5+ set and mpls_pop actions
> > -			 * as there is no check here to ensure that the new
> > -			 * eth_type is valid and thus set actions could
> > -			 * write off the end of the packet or otherwise
> > -			 * corrupt it.
> > +			/* Disallow subsequent L2.5+ set actions as there is
> > +			 * no check here to ensure that the new eth type is
> > +			 * valid and thus set actions could write off the
> > +			 * end of the packet or otherwise corrupt it.
> >  			 *
> >  			 * Support for these actions is planned using packet
> >  			 * recirculation.
> >  			 */
> > -			eth_type = htons(0);
> > +			proto = nla_get_be16(a);
> > +			mpls_label_count--;
> > +
> > +			if (!eth_p_mpls(proto) || !mpls_label_count)
> > +				eth_type = htons(0);
> > +			else
> > +				eth_type =  proto;
> > +
> >  			break;
> > +		}
> >  
> >  		case OVS_ACTION_ATTR_SET:
> >  			err = validate_set(a, key, sfa,
> > -- 
> > 1.8.3.1
> >
Pravin Shelar Oct. 25, 2019, 3:31 a.m. UTC | #8
On Tue, Oct 22, 2019 at 10:05 PM Martin Varghese
<martinvarghesenokia@gmail.com> wrote:
>
> On Tue, Oct 22, 2019 at 08:59:46PM -0700, Pravin Shelar wrote:
> > On Tue, Oct 22, 2019 at 8:29 AM Martin Varghese
> > <martinvarghesenokia@gmail.com> wrote:
> > >
> > > On Tue, Oct 22, 2019 at 12:03:49AM -0700, Pravin Shelar wrote:
> > > > On Sun, Oct 20, 2019 at 7:12 AM Martin Varghese
> > > > <martinvarghesenokia@gmail.com> wrote:
> > > > >
> > > > > From: Martin Varghese <martin.varghese@nokia.com>
> > > > >
> > > > > The openvswitch was supporting a MPLS label depth of 1 in the ingress
> > > > > direction though the userspace OVS supports a max depth of 3 labels.
> > > > > This change enables openvswitch module to support a max depth of
> > > > > 3 labels in the ingress.
> > > > >
> > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> > > > > ---
> > > > > Changes in v2
> > > > >    - Moved MPLS count validation from datapath to configuration.
> > > > >    - Fixed set mpls function.
> > > > >
> > > > This patch looks pretty close now.
> > > >
> > > > >  net/openvswitch/actions.c      |  2 +-
> > > > >  net/openvswitch/flow.c         | 20 ++++++++++-----
> > > > >  net/openvswitch/flow.h         |  9 ++++---
> > > > >  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
> > > > >  4 files changed, 66 insertions(+), 22 deletions(-)
> > > > >
> > > > ...
> > > > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > > > > index d7559c6..21de061 100644
> > > > > --- a/net/openvswitch/flow_netlink.c
> > > > > +++ b/net/openvswitch/flow_netlink.c
> > > > > @@ -424,7 +424,7 @@ size_t ovs_key_attr_size(void)
> > > > >         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
> > > > >         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
> > > > >                                      .next = ovs_tunnel_key_lens, },
> > > > > -       [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
> > > > > +       [OVS_KEY_ATTR_MPLS]      = { .len = OVS_ATTR_VARIABLE },
> > > > >         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
> > > > >         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
> > > > >         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
> > > > > @@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
> > > > >
> > > > >         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
> > > > >                 const struct ovs_key_mpls *mpls_key;
> > > > > +               u32 hdr_len;
> > > > > +               u32 label_count, label_count_mask, i;
> > > > >
> > > > >                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
> > > > > -               SW_FLOW_KEY_PUT(match, mpls.top_lse,
> > > > > -                               mpls_key->mpls_lse, is_mask);
> > > > > +               hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
> > > > > +               label_count = hdr_len / sizeof(struct ovs_key_mpls);
> > > > > +
> > > > > +               if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
> > > > > +                   hdr_len % sizeof(struct ovs_key_mpls))
> > > > > +                       return -EINVAL;
> > > > > +
> > > > > +               label_count_mask =  GENMASK(label_count - 1, 0);
> > > > > +
> > > > > +               for (i = 0 ; i < label_count; i++)
> > > > > +                       SW_FLOW_KEY_PUT(match, mpls.lse[i],
> > > > > +                                       mpls_key[i].mpls_lse, is_mask);
> > > > > +
> > > > > +               SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
> > > > > +                               label_count_mask, is_mask);
> > > > >
> > > > >                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
> > > > >          }
> > > > > @@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
> > > > >                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
> > > > >                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
> > > > >         } else if (eth_p_mpls(swkey->eth.type)) {
> > > > > +               u8 i, num_labels;
> > > > >                 struct ovs_key_mpls *mpls_key;
> > > > >
> > > > > -               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
> > > > > +               num_labels = hweight_long(output->mpls.num_labels_mask);
> > > > > +               nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
> > > > > +                                 num_labels * sizeof(*mpls_key));
> > > > >                 if (!nla)
> > > > >                         goto nla_put_failure;
> > > > > +
> > > > >                 mpls_key = nla_data(nla);
> > > > > -               mpls_key->mpls_lse = output->mpls.top_lse;
> > > > > +               for (i = 0; i < num_labels; i++)
> > > > > +                       mpls_key[i].mpls_lse = output->mpls.lse[i];
> > > > >         }
> > > > >
> > > > >         if ((swkey->eth.type == htons(ETH_P_IP) ||
> > > > > @@ -2957,6 +2977,10 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> > > > >         u8 mac_proto = ovs_key_mac_proto(key);
> > > > >         const struct nlattr *a;
> > > > >         int rem, err;
> > > > > +       u32 mpls_label_count = 0;
> > > > > +
> > > > > +       if (eth_p_mpls(eth_type))
> > > > > +               mpls_label_count = hweight_long(key->mpls.num_labels_mask);
> > > > >
> > > > The MPLS push and pop action could be part of nested actions in
> > > > sample, so the count needs to be global count across such nested
> > > > actions. have a look at validate_and_copy_sample().
> > > >
> > > Embedding mpls_label_count in struct sw_flow_actions will not work for clone
> > >
> > > I guess we need to move the below code to ovs_nla_copy_actions and extend the  arguments of __ovs_nla_copy_actions to take mpls_label_count also
> > > if (eth_p_mpls(eth_type))
> > >                 mpls_label_count = hweight_long(key->mpls.num_labels_mask)
> > >
> > >
> > I am not suggesting changing sw_flow_actions, You can define count
> > variable in ovs_nla_copy_actions() and pass it as a pointer to nested
> > function. That can be used to keep track of MPLS labels at all nested
> > actions.
>
> Actions clone & sample does a clone of SKB correct?

You can pass pointer to a separate (cloned) variable when verifying
those actions in those cases.

> Hence shouldn't they maintain a seperate mpls_label count for each nested action set
> I assume instead of passing as pointer from ovs_nla_copy_actions ,if passed by value it should
> solve the problem.Need to try that though.
William Tu Oct. 25, 2019, 3:23 p.m. UTC | #9
On Thu, Oct 24, 2019 at 7:34 PM Martin Varghese
<martinvarghesenokia@gmail.com> wrote:
>
> On Thu, Oct 24, 2019 at 01:47:40PM -0700, William Tu wrote:
> > On Sun, Oct 20, 2019 at 07:41:42PM +0530, Martin Varghese wrote:
> > > From: Martin Varghese <martin.varghese@nokia.com>
> > >
> > > The openvswitch was supporting a MPLS label depth of 1 in the ingress
> > > direction though the userspace OVS supports a max depth of 3 labels.
> > > This change enables openvswitch module to support a max depth of
> > > 3 labels in the ingress.
> > >
> >
> > Hi Martin,
> > Thanks for the patch. I have one comment below.
> >
> > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
> > > ---
> > > Changes in v2
> > >    - Moved MPLS count validation from datapath to configuration.
> > >    - Fixed set mpls function.
> > >
> > >  net/openvswitch/actions.c      |  2 +-
> > >  net/openvswitch/flow.c         | 20 ++++++++++-----
> > >  net/openvswitch/flow.h         |  9 ++++---
> > >  net/openvswitch/flow_netlink.c | 57 +++++++++++++++++++++++++++++++++---------
> > >  4 files changed, 66 insertions(+), 22 deletions(-)
> > >
> > > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> > > index 3572e11..f3125d7 100644
> > > --- a/net/openvswitch/actions.c
> > > +++ b/net/openvswitch/actions.c
> > > @@ -199,7 +199,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
> > >     if (err)
> > >             return err;
> > >
> > > -   flow_key->mpls.top_lse = lse;
> > > +   flow_key->mpls.lse[0] = lse;
> > >     return 0;
> > >  }
> > >
> > > diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> > > index dca3b1e..c101355 100644
> > > --- a/net/openvswitch/flow.c
> > > +++ b/net/openvswitch/flow.c
> > > @@ -699,27 +699,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
> > >                     memset(&key->ipv4, 0, sizeof(key->ipv4));
> > >             }
> > >     } else if (eth_p_mpls(key->eth.type)) {
> > > -           size_t stack_len = MPLS_HLEN;
> > > +           u8 label_count = 1;
> > >
> > > +           memset(&key->mpls, 0, sizeof(key->mpls));
> > >             skb_set_inner_network_header(skb, skb->mac_len);
> > >             while (1) {
> > >                     __be32 lse;
> > >
> > > -                   error = check_header(skb, skb->mac_len + stack_len);
> > > +                   error = check_header(skb, skb->mac_len +
> > > +                                        label_count * MPLS_HLEN);
> > >                     if (unlikely(error))
> > >                             return 0;
> > >
> > >                     memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
> > >
> > > -                   if (stack_len == MPLS_HLEN)
> > > -                           memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
> > > +                   if (label_count <= MPLS_LABEL_DEPTH)
> > > +                           memcpy(&key->mpls.lse[label_count - 1], &lse,
> > > +                                  MPLS_HLEN);
> > >
> > > -                   skb_set_inner_network_header(skb, skb->mac_len + stack_len);
> > > +                   skb_set_inner_network_header(skb, skb->mac_len +
> > > +                                                label_count * MPLS_HLEN);
> > >                     if (lse & htonl(MPLS_LS_S_MASK))
> > >                             break;
> > >
> > > -                   stack_len += MPLS_HLEN;
> > > +                   label_count++;
> > >             }
> > > +           if (label_count > MPLS_LABEL_DEPTH)
> > > +                   label_count = MPLS_LABEL_DEPTH;
> > > +
> > > +           key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
> >
> > >     } else if (key->eth.type == htons(ETH_P_IPV6)) {
> > >             int nh_len;             /* IPv6 Header + Extensions */
> > >
> > > diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
> > > index 3e2cc22..d9eccbe 100644
> > > --- a/net/openvswitch/flow.h
> > > +++ b/net/openvswitch/flow.h
> > > @@ -30,6 +30,7 @@ enum sw_flow_mac_proto {
> > >     MAC_PROTO_ETHERNET,
> > >  };
> > >  #define SW_FLOW_KEY_INVALID        0x80
> > > +#define MPLS_LABEL_DEPTH       3
> > >
> > >  /* Store options at the end of the array if they are less than the
> > >   * maximum size. This allows us to get the benefits of variable length
> > > @@ -85,9 +86,6 @@ struct sw_flow_key {
> > >                                      */
> > >     union {
> > >             struct {
> > > -                   __be32 top_lse; /* top label stack entry */
> > > -           } mpls;
> > > -           struct {
> > >                     u8     proto;   /* IP protocol or lower 8 bits of ARP opcode. */
> > >                     u8     tos;         /* IP ToS. */
> > >                     u8     ttl;         /* IP TTL/hop limit. */
> > > @@ -135,6 +133,11 @@ struct sw_flow_key {
> > >                             } nd;
> > >                     };
> > >             } ipv6;
> > > +           struct {
> > > +                   u32 num_labels_mask;    /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
> >
> > Why using a bitmap here? why not just num_labels?
> > I saw that you have to convert it using hweight_long()
> > to num_labels a couple places below.
> >
>
> num_labels will not work when used in flow_key for flow match.
> Assume a case where a packet with 3 labels are received and the configured
> flow has a match condition for the top most label only.Num_labels cannot be
> used in that case
>
> My original patch was with num_labels.And we found that it will not work for
> the above case.
> Jbenc@redhat.com proposed the idea of num_labels_mask.
>

Thank you. Now I understand.
William

Patch
diff mbox series

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 3572e11..f3125d7 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -199,7 +199,7 @@  static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
 	if (err)
 		return err;
 
-	flow_key->mpls.top_lse = lse;
+	flow_key->mpls.lse[0] = lse;
 	return 0;
 }
 
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dca3b1e..c101355 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -699,27 +699,35 @@  static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 			memset(&key->ipv4, 0, sizeof(key->ipv4));
 		}
 	} else if (eth_p_mpls(key->eth.type)) {
-		size_t stack_len = MPLS_HLEN;
+		u8 label_count = 1;
 
+		memset(&key->mpls, 0, sizeof(key->mpls));
 		skb_set_inner_network_header(skb, skb->mac_len);
 		while (1) {
 			__be32 lse;
 
-			error = check_header(skb, skb->mac_len + stack_len);
+			error = check_header(skb, skb->mac_len +
+					     label_count * MPLS_HLEN);
 			if (unlikely(error))
 				return 0;
 
 			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
 
-			if (stack_len == MPLS_HLEN)
-				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+			if (label_count <= MPLS_LABEL_DEPTH)
+				memcpy(&key->mpls.lse[label_count - 1], &lse,
+				       MPLS_HLEN);
 
-			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
+			skb_set_inner_network_header(skb, skb->mac_len +
+						     label_count * MPLS_HLEN);
 			if (lse & htonl(MPLS_LS_S_MASK))
 				break;
 
-			stack_len += MPLS_HLEN;
+			label_count++;
 		}
+		if (label_count > MPLS_LABEL_DEPTH)
+			label_count = MPLS_LABEL_DEPTH;
+
+		key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 3e2cc22..d9eccbe 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -30,6 +30,7 @@  enum sw_flow_mac_proto {
 	MAC_PROTO_ETHERNET,
 };
 #define SW_FLOW_KEY_INVALID	0x80
+#define MPLS_LABEL_DEPTH       3
 
 /* Store options at the end of the array if they are less than the
  * maximum size. This allows us to get the benefits of variable length
@@ -85,9 +86,6 @@  struct sw_flow_key {
 					 */
 	union {
 		struct {
-			__be32 top_lse;	/* top label stack entry */
-		} mpls;
-		struct {
 			u8     proto;	/* IP protocol or lower 8 bits of ARP opcode. */
 			u8     tos;	    /* IP ToS. */
 			u8     ttl;	    /* IP TTL/hop limit. */
@@ -135,6 +133,11 @@  struct sw_flow_key {
 				} nd;
 			};
 		} ipv6;
+		struct {
+			u32 num_labels_mask;    /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
+			__be32 lse[MPLS_LABEL_DEPTH];     /* label stack entry  */
+		} mpls;
+
 		struct ovs_key_nsh nsh;         /* network service header */
 	};
 	struct {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d7559c6..21de061 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -424,7 +424,7 @@  size_t ovs_key_attr_size(void)
 	[OVS_KEY_ATTR_DP_HASH]	 = { .len = sizeof(u32) },
 	[OVS_KEY_ATTR_TUNNEL]	 = { .len = OVS_ATTR_NESTED,
 				     .next = ovs_tunnel_key_lens, },
-	[OVS_KEY_ATTR_MPLS]	 = { .len = sizeof(struct ovs_key_mpls) },
+	[OVS_KEY_ATTR_MPLS]	 = { .len = OVS_ATTR_VARIABLE },
 	[OVS_KEY_ATTR_CT_STATE]	 = { .len = sizeof(u32) },
 	[OVS_KEY_ATTR_CT_ZONE]	 = { .len = sizeof(u16) },
 	[OVS_KEY_ATTR_CT_MARK]	 = { .len = sizeof(u32) },
@@ -1628,10 +1628,25 @@  static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
 
 	if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
 		const struct ovs_key_mpls *mpls_key;
+		u32 hdr_len;
+		u32 label_count, label_count_mask, i;
 
 		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
-		SW_FLOW_KEY_PUT(match, mpls.top_lse,
-				mpls_key->mpls_lse, is_mask);
+		hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
+		label_count = hdr_len / sizeof(struct ovs_key_mpls);
+
+		if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
+		    hdr_len % sizeof(struct ovs_key_mpls))
+			return -EINVAL;
+
+		label_count_mask =  GENMASK(label_count - 1, 0);
+
+		for (i = 0 ; i < label_count; i++)
+			SW_FLOW_KEY_PUT(match, mpls.lse[i],
+					mpls_key[i].mpls_lse, is_mask);
+
+		SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
+				label_count_mask, is_mask);
 
 		attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
 	 }
@@ -2114,13 +2129,18 @@  static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 		ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
 		ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
 	} else if (eth_p_mpls(swkey->eth.type)) {
+		u8 i, num_labels;
 		struct ovs_key_mpls *mpls_key;
 
-		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+		num_labels = hweight_long(output->mpls.num_labels_mask);
+		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
+				  num_labels * sizeof(*mpls_key));
 		if (!nla)
 			goto nla_put_failure;
+
 		mpls_key = nla_data(nla);
-		mpls_key->mpls_lse = output->mpls.top_lse;
+		for (i = 0; i < num_labels; i++)
+			mpls_key[i].mpls_lse = output->mpls.lse[i];
 	}
 
 	if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -2957,6 +2977,10 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 	u8 mac_proto = ovs_key_mac_proto(key);
 	const struct nlattr *a;
 	int rem, err;
+	u32 mpls_label_count = 0;
+
+	if (eth_p_mpls(eth_type))
+		mpls_label_count = hweight_long(key->mpls.num_labels_mask);
 
 	nla_for_each_nested(a, attr, rem) {
 		/* Expected argument lengths, (u32)-1 for variable length. */
@@ -3065,25 +3089,34 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			     !eth_p_mpls(eth_type)))
 				return -EINVAL;
 			eth_type = mpls->mpls_ethertype;
+			mpls_label_count++;
 			break;
 		}
 
-		case OVS_ACTION_ATTR_POP_MPLS:
+		case OVS_ACTION_ATTR_POP_MPLS: {
+			__be16  proto;
 			if (vlan_tci & htons(VLAN_CFI_MASK) ||
 			    !eth_p_mpls(eth_type))
 				return -EINVAL;
 
-			/* Disallow subsequent L2.5+ set and mpls_pop actions
-			 * as there is no check here to ensure that the new
-			 * eth_type is valid and thus set actions could
-			 * write off the end of the packet or otherwise
-			 * corrupt it.
+			/* Disallow subsequent L2.5+ set actions as there is
+			 * no check here to ensure that the new eth type is
+			 * valid and thus set actions could write off the
+			 * end of the packet or otherwise corrupt it.
 			 *
 			 * Support for these actions is planned using packet
 			 * recirculation.
 			 */
-			eth_type = htons(0);
+			proto = nla_get_be16(a);
+			mpls_label_count--;
+
+			if (!eth_p_mpls(proto) || !mpls_label_count)
+				eth_type = htons(0);
+			else
+				eth_type =  proto;
+
 			break;
+		}
 
 		case OVS_ACTION_ATTR_SET:
 			err = validate_set(a, key, sfa,