Message ID | 1574351342-16308-1-git-send-email-martinvarghesenokia@gmail.com |
---|---|
State | Superseded |
Headers | show |
Series | [ovs-dev,v2] Change in openvswitch kernel module to support MPLS label depth of 3 in ingress direction. | expand |
On 11/21/2019 7:49 AM, Martin Varghese wrote: > From: Martin Varghese <martin.varghese@nokia.com> > > The openvswitch kernel module was supporting a MPLS label depth of 1 > in the ingress direction though the userspace OVS supports a max depth > of 3 labels. This change enables openvswitch module to support a max > depth of 3 labels in the ingress. > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > --- > Changes in v2 > - support added for nested actions. Thanks Martin, I reviewed the code and it looks fine to me. I also ran it through Travis CI and check-kmod and that's all good too. Tested-by: Greg Rose <gvrose8192@gmail.com> Reviewed-by: Greg Rose <gvrose8192@gmail.com> > > datapath/actions.c | 2 +- > datapath/flow.c | 20 ++++++++---- > datapath/flow.h | 8 +++-- > datapath/flow_netlink.c | 85 ++++++++++++++++++++++++++++++++++++------------- > tests/system-traffic.at | 39 +++++++++++++++++++++++ > 5 files changed, 122 insertions(+), 32 deletions(-) > > diff --git a/datapath/actions.c b/datapath/actions.c > index a44e804..fbf4457 100644 > --- a/datapath/actions.c > +++ b/datapath/actions.c > @@ -276,7 +276,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, > } > > stack->label_stack_entry = lse; > - flow_key->mpls.top_lse = lse; > + flow_key->mpls.lse[0] = lse; > return 0; > } > > diff --git a/datapath/flow.c b/datapath/flow.c > index 916f7f4..6dc7402 100644 > --- a/datapath/flow.c > +++ b/datapath/flow.c > @@ -659,27 +659,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) > memset(&key->ipv4, 0, sizeof(key->ipv4)); > } > } else if (eth_p_mpls(key->eth.type)) { > - size_t stack_len = MPLS_HLEN; > + u8 label_count = 1; > > + memset(&key->mpls, 0, sizeof(key->mpls)); > skb_set_inner_network_header(skb, skb->mac_len); > while (1) { > __be32 lse; > > - error = check_header(skb, skb->mac_len + stack_len); > + error = check_header(skb, skb->mac_len + > + label_count * MPLS_HLEN); > if (unlikely(error)) > return 0; > > memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); > > - if (stack_len == MPLS_HLEN) > - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); > + if (label_count <= MPLS_LABEL_DEPTH) > + memcpy(&key->mpls.lse[label_count - 1], &lse, > + MPLS_HLEN); > > - skb_set_inner_network_header(skb, skb->mac_len + stack_len); > + skb_set_inner_network_header(skb, skb->mac_len + > + label_count * MPLS_HLEN); > if (lse & htonl(MPLS_LS_S_MASK)) > break; > > - stack_len += MPLS_HLEN; > + label_count++; > } > + if (label_count > MPLS_LABEL_DEPTH) > + label_count = MPLS_LABEL_DEPTH; > + > + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); > } else if (key->eth.type == htons(ETH_P_IPV6)) { > int nh_len; /* IPv6 Header + Extensions */ > > diff --git a/datapath/flow.h b/datapath/flow.h > index 5560300..4ad5363 100644 > --- a/datapath/flow.h > +++ b/datapath/flow.h > @@ -43,6 +43,7 @@ enum sw_flow_mac_proto { > MAC_PROTO_ETHERNET, > }; > #define SW_FLOW_KEY_INVALID 0x80 > +#define MPLS_LABEL_DEPTH 3 > > /* Store options at the end of the array if they are less than the > * maximum size. This allows us to get the benefits of variable length > @@ -98,9 +99,6 @@ struct sw_flow_key { > */ > union { > struct { > - __be32 top_lse; /* top label stack entry */ > - } mpls; > - struct { > u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ > u8 tos; /* IP ToS. */ > u8 ttl; /* IP TTL/hop limit. */ > @@ -148,6 +146,10 @@ struct sw_flow_key { > } nd; > }; > } ipv6; > + struct { > + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ > + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ > + } mpls; > struct ovs_key_nsh nsh; /* network service header */ > }; > struct { > diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c > index 35f13d7..9fc1a19 100644 > --- a/datapath/flow_netlink.c > +++ b/datapath/flow_netlink.c > @@ -438,7 +438,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > .next = ovs_tunnel_key_lens, }, > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > @@ -1619,10 +1619,27 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) { > const struct ovs_key_mpls *mpls_key; > + u32 hdr_len; > + u32 label_count, label_count_mask, i; > + > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > - mpls_key->mpls_lse, is_mask); > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > + > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > + hdr_len % sizeof(struct ovs_key_mpls)) > + return -EINVAL; > + > + label_count_mask = GENMASK(label_count - 1, 0); > + > + for (i = 0 ; i < label_count; i++) > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > + mpls_key[i].mpls_lse, is_mask); > + > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > + label_count_mask, is_mask); > + > > attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS); > } > @@ -2104,13 +2121,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > } else if (eth_p_mpls(swkey->eth.type)) { > + u8 num_labels, i; > struct ovs_key_mpls *mpls_key; > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > + num_labels = hweight_long(output->mpls.num_labels_mask); > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > + num_labels * sizeof(*mpls_key)); > if (!nla) > goto nla_put_failure; > + > mpls_key = nla_data(nla); > - mpls_key->mpls_lse = output->mpls.top_lse; > + for (i = 0; i < num_labels; i++) > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > } > > if ((swkey->eth.type == htons(ETH_P_IP) || > @@ -2400,13 +2422,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > - __be16 eth_type, __be16 vlan_tci, bool log); > + __be16 eth_type, __be16 vlan_tci, > + u32 mpls_label_count, bool log); > > static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > __be16 eth_type, __be16 vlan_tci, > - bool log, bool last) > + u32 mpls_label_count, bool log, bool last) > { > const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; > const struct nlattr *probability, *actions; > @@ -2457,7 +2480,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > return err; > > err = __ovs_nla_copy_actions(net, actions, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > > if (err) > return err; > @@ -2472,7 +2495,7 @@ static int validate_and_copy_clone(struct net *net, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > __be16 eth_type, __be16 vlan_tci, > - bool log, bool last) > + u32 mpls_label_count, bool log, bool last) > { > int start, err; > u32 exec; > @@ -2492,7 +2515,7 @@ static int validate_and_copy_clone(struct net *net, > return err; > > err = __ovs_nla_copy_actions(net, attr, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > if (err) > return err; > > @@ -2859,6 +2882,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > __be16 eth_type, __be16 vlan_tci, > + u32 mpls_label_count, > bool log, bool last) > { > const struct nlattr *acts_if_greater, *acts_if_lesser_eq; > @@ -2906,7 +2930,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > return nested_acts_start; > > err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > > if (err) > return err; > @@ -2919,7 +2943,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > return nested_acts_start; > > err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > > if (err) > return err; > @@ -2946,7 +2970,8 @@ static int copy_action(const struct nlattr *from, > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > - __be16 eth_type, __be16 vlan_tci, bool log) > + __be16 eth_type, __be16 vlan_tci, > + u32 mpls_label_count, bool log) > { > u8 mac_proto = ovs_key_mac_proto(key); > const struct nlattr *a; > @@ -3059,26 +3084,35 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > !eth_p_mpls(eth_type))) > return -EINVAL; > eth_type = mpls->mpls_ethertype; > + mpls_label_count++; > break; > } > > - case OVS_ACTION_ATTR_POP_MPLS: > + case OVS_ACTION_ATTR_POP_MPLS: { > + __be16 proto; > if (vlan_tci & htons(VLAN_CFI_MASK) || > !eth_p_mpls(eth_type)) > return -EINVAL; > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > - * as there is no check here to ensure that the new > - * eth_type is valid and thus set actions could > - * write off the end of the packet or otherwise > - * corrupt it. > + /* Disallow subsequent L2.5+ set actions and mpls_pop > + * actions once the last MPLS label in the packet is > + * popped as there is no check here to ensure that > + * the new eth type is valid and thus set actions could > + * write off the end of the packet or otherwise corrupt > + * it. > * > * Support for these actions is planned using packet > * recirculation. > */ > - eth_type = htons(0); > - break; > + proto = nla_get_be16(a); > + mpls_label_count--; > > + if (!eth_p_mpls(proto) || !mpls_label_count) > + eth_type = htons(0); > + else > + eth_type = proto; > + break; > + } > case OVS_ACTION_ATTR_SET: > err = validate_set(a, key, sfa, > &skip_copy, mac_proto, eth_type, > @@ -3100,6 +3134,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > err = validate_and_copy_sample(net, a, key, sfa, > eth_type, vlan_tci, > + mpls_label_count, > log, last); > if (err) > return err; > @@ -3170,6 +3205,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > err = validate_and_copy_clone(net, a, key, sfa, > eth_type, vlan_tci, > + mpls_label_count, > log, last); > if (err) > return err; > @@ -3183,6 +3219,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > err = validate_and_copy_check_pkt_len(net, a, key, sfa, > eth_type, > vlan_tci, log, > + mpls_label_count, > last); > if (err) > return err; > @@ -3213,14 +3250,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > struct sw_flow_actions **sfa, bool log) > { > int err; > + u32 mpls_label_count = 0; > > *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); > if (IS_ERR(*sfa)) > return PTR_ERR(*sfa); > > + if (eth_p_mpls(key->eth.type)) > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > + > (*sfa)->orig_len = nla_len(attr); > err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, > - key->eth.vlan.tci, log); > + key->eth.vlan.tci, mpls_label_count, log); > if (err) > ovs_nla_free_flow_actions(*sfa); > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 870a05e..cde7429 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -992,6 +992,45 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], > > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > + > +AT_SETUP([datapath - multiple mpls label pop]) > +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) > + > +ADD_NAMESPACES(at_ns0, at_ns1) > + > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") > +ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24") > + > +AT_CHECK([ip link add patch0 type veth peer name patch1]) > +on_exit 'ip link del patch0' > + > +AT_CHECK([ip link set dev patch0 up]) > +AT_CHECK([ip link set dev patch1 up]) > +AT_CHECK([ovs-vsctl add-port br0 patch0]) > +AT_CHECK([ovs-vsctl add-port br1 patch1]) > + > +AT_DATA([flows.txt], [dnl > +table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,push_mpls:0x8847,set_mpls_label:2,push_mpls:0x8847,set_mpls_label:1,resubmit(,3) > +table=0,priority=100,dl_type=0x8847,mpls_label=1 actions=pop_mpls:0x8847,resubmit(,1) > +table=1,priority=100,dl_type=0x8847,mpls_label=2 actions=pop_mpls:0x8847,resubmit(,2) > +table=2,priority=100,dl_type=0x8847,mpls_label=3 actions=pop_mpls:0x0800,resubmit(,3) > +table=0,priority=10 actions=resubmit(,3) > +table=3,priority=10 actions=normal > +]) > + > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > +AT_CHECK([ovs-ofctl add-flows br1 flows.txt]) > + > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([datapath - basic truncate action]) > AT_SKIP_IF([test $HAVE_NC = no]) > OVS_TRAFFIC_VSWITCHD_START()
On 11/21/2019 7:49 AM, Martin Varghese wrote: > From: Martin Varghese <martin.varghese@nokia.com> > > The openvswitch kernel module was supporting a MPLS label depth of 1 > in the ingress direction though the userspace OVS supports a max depth > of 3 labels. This change enables openvswitch module to support a max > depth of 3 labels in the ingress. > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> Did you submit this patch upstream as well? - Greg > --- > Changes in v2 > - support added for nested actions. > > datapath/actions.c | 2 +- > datapath/flow.c | 20 ++++++++---- > datapath/flow.h | 8 +++-- > datapath/flow_netlink.c | 85 ++++++++++++++++++++++++++++++++++++------------- > tests/system-traffic.at | 39 +++++++++++++++++++++++ > 5 files changed, 122 insertions(+), 32 deletions(-) > > diff --git a/datapath/actions.c b/datapath/actions.c > index a44e804..fbf4457 100644 > --- a/datapath/actions.c > +++ b/datapath/actions.c > @@ -276,7 +276,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, > } > > stack->label_stack_entry = lse; > - flow_key->mpls.top_lse = lse; > + flow_key->mpls.lse[0] = lse; > return 0; > } > > diff --git a/datapath/flow.c b/datapath/flow.c > index 916f7f4..6dc7402 100644 > --- a/datapath/flow.c > +++ b/datapath/flow.c > @@ -659,27 +659,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) > memset(&key->ipv4, 0, sizeof(key->ipv4)); > } > } else if (eth_p_mpls(key->eth.type)) { > - size_t stack_len = MPLS_HLEN; > + u8 label_count = 1; > > + memset(&key->mpls, 0, sizeof(key->mpls)); > skb_set_inner_network_header(skb, skb->mac_len); > while (1) { > __be32 lse; > > - error = check_header(skb, skb->mac_len + stack_len); > + error = check_header(skb, skb->mac_len + > + label_count * MPLS_HLEN); > if (unlikely(error)) > return 0; > > memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); > > - if (stack_len == MPLS_HLEN) > - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); > + if (label_count <= MPLS_LABEL_DEPTH) > + memcpy(&key->mpls.lse[label_count - 1], &lse, > + MPLS_HLEN); > > - skb_set_inner_network_header(skb, skb->mac_len + stack_len); > + skb_set_inner_network_header(skb, skb->mac_len + > + label_count * MPLS_HLEN); > if (lse & htonl(MPLS_LS_S_MASK)) > break; > > - stack_len += MPLS_HLEN; > + label_count++; > } > + if (label_count > MPLS_LABEL_DEPTH) > + label_count = MPLS_LABEL_DEPTH; > + > + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); > } else if (key->eth.type == htons(ETH_P_IPV6)) { > int nh_len; /* IPv6 Header + Extensions */ > > diff --git a/datapath/flow.h b/datapath/flow.h > index 5560300..4ad5363 100644 > --- a/datapath/flow.h > +++ b/datapath/flow.h > @@ -43,6 +43,7 @@ enum sw_flow_mac_proto { > MAC_PROTO_ETHERNET, > }; > #define SW_FLOW_KEY_INVALID 0x80 > +#define MPLS_LABEL_DEPTH 3 > > /* Store options at the end of the array if they are less than the > * maximum size. This allows us to get the benefits of variable length > @@ -98,9 +99,6 @@ struct sw_flow_key { > */ > union { > struct { > - __be32 top_lse; /* top label stack entry */ > - } mpls; > - struct { > u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ > u8 tos; /* IP ToS. */ > u8 ttl; /* IP TTL/hop limit. */ > @@ -148,6 +146,10 @@ struct sw_flow_key { > } nd; > }; > } ipv6; > + struct { > + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ > + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ > + } mpls; > struct ovs_key_nsh nsh; /* network service header */ > }; > struct { > diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c > index 35f13d7..9fc1a19 100644 > --- a/datapath/flow_netlink.c > +++ b/datapath/flow_netlink.c > @@ -438,7 +438,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > .next = ovs_tunnel_key_lens, }, > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > @@ -1619,10 +1619,27 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) { > const struct ovs_key_mpls *mpls_key; > + u32 hdr_len; > + u32 label_count, label_count_mask, i; > + > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > - mpls_key->mpls_lse, is_mask); > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > + > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > + hdr_len % sizeof(struct ovs_key_mpls)) > + return -EINVAL; > + > + label_count_mask = GENMASK(label_count - 1, 0); > + > + for (i = 0 ; i < label_count; i++) > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > + mpls_key[i].mpls_lse, is_mask); > + > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > + label_count_mask, is_mask); > + > > attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS); > } > @@ -2104,13 +2121,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > } else if (eth_p_mpls(swkey->eth.type)) { > + u8 num_labels, i; > struct ovs_key_mpls *mpls_key; > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > + num_labels = hweight_long(output->mpls.num_labels_mask); > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > + num_labels * sizeof(*mpls_key)); > if (!nla) > goto nla_put_failure; > + > mpls_key = nla_data(nla); > - mpls_key->mpls_lse = output->mpls.top_lse; > + for (i = 0; i < num_labels; i++) > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > } > > if ((swkey->eth.type == htons(ETH_P_IP) || > @@ -2400,13 +2422,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > - __be16 eth_type, __be16 vlan_tci, bool log); > + __be16 eth_type, __be16 vlan_tci, > + u32 mpls_label_count, bool log); > > static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > __be16 eth_type, __be16 vlan_tci, > - bool log, bool last) > + u32 mpls_label_count, bool log, bool last) > { > const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; > const struct nlattr *probability, *actions; > @@ -2457,7 +2480,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > return err; > > err = __ovs_nla_copy_actions(net, actions, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > > if (err) > return err; > @@ -2472,7 +2495,7 @@ static int validate_and_copy_clone(struct net *net, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > __be16 eth_type, __be16 vlan_tci, > - bool log, bool last) > + u32 mpls_label_count, bool log, bool last) > { > int start, err; > u32 exec; > @@ -2492,7 +2515,7 @@ static int validate_and_copy_clone(struct net *net, > return err; > > err = __ovs_nla_copy_actions(net, attr, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > if (err) > return err; > > @@ -2859,6 +2882,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > __be16 eth_type, __be16 vlan_tci, > + u32 mpls_label_count, > bool log, bool last) > { > const struct nlattr *acts_if_greater, *acts_if_lesser_eq; > @@ -2906,7 +2930,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > return nested_acts_start; > > err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > > if (err) > return err; > @@ -2919,7 +2943,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > return nested_acts_start; > > err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, > - eth_type, vlan_tci, log); > + eth_type, vlan_tci, mpls_label_count, log); > > if (err) > return err; > @@ -2946,7 +2970,8 @@ static int copy_action(const struct nlattr *from, > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > const struct sw_flow_key *key, > struct sw_flow_actions **sfa, > - __be16 eth_type, __be16 vlan_tci, bool log) > + __be16 eth_type, __be16 vlan_tci, > + u32 mpls_label_count, bool log) > { > u8 mac_proto = ovs_key_mac_proto(key); > const struct nlattr *a; > @@ -3059,26 +3084,35 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > !eth_p_mpls(eth_type))) > return -EINVAL; > eth_type = mpls->mpls_ethertype; > + mpls_label_count++; > break; > } > > - case OVS_ACTION_ATTR_POP_MPLS: > + case OVS_ACTION_ATTR_POP_MPLS: { > + __be16 proto; > if (vlan_tci & htons(VLAN_CFI_MASK) || > !eth_p_mpls(eth_type)) > return -EINVAL; > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > - * as there is no check here to ensure that the new > - * eth_type is valid and thus set actions could > - * write off the end of the packet or otherwise > - * corrupt it. > + /* Disallow subsequent L2.5+ set actions and mpls_pop > + * actions once the last MPLS label in the packet is > + * popped as there is no check here to ensure that > + * the new eth type is valid and thus set actions could > + * write off the end of the packet or otherwise corrupt > + * it. > * > * Support for these actions is planned using packet > * recirculation. > */ > - eth_type = htons(0); > - break; > + proto = nla_get_be16(a); > + mpls_label_count--; > > + if (!eth_p_mpls(proto) || !mpls_label_count) > + eth_type = htons(0); > + else > + eth_type = proto; > + break; > + } > case OVS_ACTION_ATTR_SET: > err = validate_set(a, key, sfa, > &skip_copy, mac_proto, eth_type, > @@ -3100,6 +3134,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > err = validate_and_copy_sample(net, a, key, sfa, > eth_type, vlan_tci, > + mpls_label_count, > log, last); > if (err) > return err; > @@ -3170,6 +3205,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > err = validate_and_copy_clone(net, a, key, sfa, > eth_type, vlan_tci, > + mpls_label_count, > log, last); > if (err) > return err; > @@ -3183,6 +3219,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > err = validate_and_copy_check_pkt_len(net, a, key, sfa, > eth_type, > vlan_tci, log, > + mpls_label_count, > last); > if (err) > return err; > @@ -3213,14 +3250,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > struct sw_flow_actions **sfa, bool log) > { > int err; > + u32 mpls_label_count = 0; > > *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); > if (IS_ERR(*sfa)) > return PTR_ERR(*sfa); > > + if (eth_p_mpls(key->eth.type)) > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > + > (*sfa)->orig_len = nla_len(attr); > err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, > - key->eth.vlan.tci, log); > + key->eth.vlan.tci, mpls_label_count, log); > if (err) > ovs_nla_free_flow_actions(*sfa); > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 870a05e..cde7429 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -992,6 +992,45 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], > > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > + > +AT_SETUP([datapath - multiple mpls label pop]) > +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) > + > +ADD_NAMESPACES(at_ns0, at_ns1) > + > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") > +ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24") > + > +AT_CHECK([ip link add patch0 type veth peer name patch1]) > +on_exit 'ip link del patch0' > + > +AT_CHECK([ip link set dev patch0 up]) > +AT_CHECK([ip link set dev patch1 up]) > +AT_CHECK([ovs-vsctl add-port br0 patch0]) > +AT_CHECK([ovs-vsctl add-port br1 patch1]) > + > +AT_DATA([flows.txt], [dnl > +table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,push_mpls:0x8847,set_mpls_label:2,push_mpls:0x8847,set_mpls_label:1,resubmit(,3) > +table=0,priority=100,dl_type=0x8847,mpls_label=1 actions=pop_mpls:0x8847,resubmit(,1) > +table=1,priority=100,dl_type=0x8847,mpls_label=2 actions=pop_mpls:0x8847,resubmit(,2) > +table=2,priority=100,dl_type=0x8847,mpls_label=3 actions=pop_mpls:0x0800,resubmit(,3) > +table=0,priority=10 actions=resubmit(,3) > +table=3,priority=10 actions=normal > +]) > + > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > +AT_CHECK([ovs-ofctl add-flows br1 flows.txt]) > + > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > + > +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > +]) > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([datapath - basic truncate action]) > AT_SKIP_IF([test $HAVE_NC = no]) > OVS_TRAFFIC_VSWITCHD_START()
On Fri, Nov 22, 2019 at 2:15 AM Gregory Rose <gvrose8192@gmail.com> wrote: > > > On 11/21/2019 7:49 AM, Martin Varghese wrote: > > From: Martin Varghese <martin.varghese@nokia.com> > > > > The openvswitch kernel module was supporting a MPLS label depth of 1 > > in the ingress direction though the userspace OVS supports a max depth > > of 3 labels. This change enables openvswitch module to support a max > > depth of 3 labels in the ingress. > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > Did you submit this patch upstream as well? This patch was applied in upstream, commit id fbdcdd78da7c95f1b970d371e1b23cbd3aa990f3 Hi Martin, when you backport the patch, the commit message should be formatted: http://docs.openvswitch.org/en/latest/internals/contributing/backporting-patches/ > - Greg > > > --- > > Changes in v2 > > - support added for nested actions. > > > > datapath/actions.c | 2 +- > > datapath/flow.c | 20 ++++++++---- > > datapath/flow.h | 8 +++-- > > datapath/flow_netlink.c | 85 ++++++++++++++++++++++++++++++++++++------------- > > tests/system-traffic.at | 39 +++++++++++++++++++++++ > > 5 files changed, 122 insertions(+), 32 deletions(-) > > > > diff --git a/datapath/actions.c b/datapath/actions.c > > index a44e804..fbf4457 100644 > > --- a/datapath/actions.c > > +++ b/datapath/actions.c > > @@ -276,7 +276,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, > > } > > > > stack->label_stack_entry = lse; > > - flow_key->mpls.top_lse = lse; > > + flow_key->mpls.lse[0] = lse; > > return 0; > > } > > > > diff --git a/datapath/flow.c b/datapath/flow.c > > index 916f7f4..6dc7402 100644 > > --- a/datapath/flow.c > > +++ b/datapath/flow.c > > @@ -659,27 +659,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) > > memset(&key->ipv4, 0, sizeof(key->ipv4)); > > } > > } else if (eth_p_mpls(key->eth.type)) { > > - size_t stack_len = MPLS_HLEN; > > + u8 label_count = 1; > > > > + memset(&key->mpls, 0, sizeof(key->mpls)); > > skb_set_inner_network_header(skb, skb->mac_len); > > while (1) { > > __be32 lse; > > > > - error = check_header(skb, skb->mac_len + stack_len); > > + error = check_header(skb, skb->mac_len + > > + label_count * MPLS_HLEN); > > if (unlikely(error)) > > return 0; > > > > memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); > > > > - if (stack_len == MPLS_HLEN) > > - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); > > + if (label_count <= MPLS_LABEL_DEPTH) > > + memcpy(&key->mpls.lse[label_count - 1], &lse, > > + MPLS_HLEN); > > > > - skb_set_inner_network_header(skb, skb->mac_len + stack_len); > > + skb_set_inner_network_header(skb, skb->mac_len + > > + label_count * MPLS_HLEN); > > if (lse & htonl(MPLS_LS_S_MASK)) > > break; > > > > - stack_len += MPLS_HLEN; > > + label_count++; > > } > > + if (label_count > MPLS_LABEL_DEPTH) > > + label_count = MPLS_LABEL_DEPTH; > > + > > + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); > > } else if (key->eth.type == htons(ETH_P_IPV6)) { > > int nh_len; /* IPv6 Header + Extensions */ > > > > diff --git a/datapath/flow.h b/datapath/flow.h > > index 5560300..4ad5363 100644 > > --- a/datapath/flow.h > > +++ b/datapath/flow.h > > @@ -43,6 +43,7 @@ enum sw_flow_mac_proto { > > MAC_PROTO_ETHERNET, > > }; > > #define SW_FLOW_KEY_INVALID 0x80 > > +#define MPLS_LABEL_DEPTH 3 > > > > /* Store options at the end of the array if they are less than the > > * maximum size. This allows us to get the benefits of variable length > > @@ -98,9 +99,6 @@ struct sw_flow_key { > > */ > > union { > > struct { > > - __be32 top_lse; /* top label stack entry */ > > - } mpls; > > - struct { > > u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ > > u8 tos; /* IP ToS. */ > > u8 ttl; /* IP TTL/hop limit. */ > > @@ -148,6 +146,10 @@ struct sw_flow_key { > > } nd; > > }; > > } ipv6; > > + struct { > > + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ > > + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ > > + } mpls; > > struct ovs_key_nsh nsh; /* network service header */ > > }; > > struct { > > diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c > > index 35f13d7..9fc1a19 100644 > > --- a/datapath/flow_netlink.c > > +++ b/datapath/flow_netlink.c > > @@ -438,7 +438,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > .next = ovs_tunnel_key_lens, }, > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > @@ -1619,10 +1619,27 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, > > > > if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) { > > const struct ovs_key_mpls *mpls_key; > > + u32 hdr_len; > > + u32 label_count, label_count_mask, i; > > + > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > - mpls_key->mpls_lse, is_mask); > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > + > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > + hdr_len % sizeof(struct ovs_key_mpls)) > > + return -EINVAL; > > + > > + label_count_mask = GENMASK(label_count - 1, 0); > > + > > + for (i = 0 ; i < label_count; i++) > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > + mpls_key[i].mpls_lse, is_mask); > > + > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > + label_count_mask, is_mask); > > + > > > > attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS); > > } > > @@ -2104,13 +2121,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > } else if (eth_p_mpls(swkey->eth.type)) { > > + u8 num_labels, i; > > struct ovs_key_mpls *mpls_key; > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > + num_labels * sizeof(*mpls_key)); > > if (!nla) > > goto nla_put_failure; > > + > > mpls_key = nla_data(nla); > > - mpls_key->mpls_lse = output->mpls.top_lse; > > + for (i = 0; i < num_labels; i++) > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > } > > > > if ((swkey->eth.type == htons(ETH_P_IP) || > > @@ -2400,13 +2422,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, > > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > - __be16 eth_type, __be16 vlan_tci, bool log); > > + __be16 eth_type, __be16 vlan_tci, > > + u32 mpls_label_count, bool log); > > > > static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > __be16 eth_type, __be16 vlan_tci, > > - bool log, bool last) > > + u32 mpls_label_count, bool log, bool last) > > { > > const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; > > const struct nlattr *probability, *actions; > > @@ -2457,7 +2480,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > > return err; > > > > err = __ovs_nla_copy_actions(net, actions, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, mpls_label_count, log); > > > > if (err) > > return err; > > @@ -2472,7 +2495,7 @@ static int validate_and_copy_clone(struct net *net, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > __be16 eth_type, __be16 vlan_tci, > > - bool log, bool last) > > + u32 mpls_label_count, bool log, bool last) > > { > > int start, err; > > u32 exec; > > @@ -2492,7 +2515,7 @@ static int validate_and_copy_clone(struct net *net, > > return err; > > > > err = __ovs_nla_copy_actions(net, attr, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, mpls_label_count, log); > > if (err) > > return err; > > > > @@ -2859,6 +2882,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > __be16 eth_type, __be16 vlan_tci, > > + u32 mpls_label_count, > > bool log, bool last) > > { > > const struct nlattr *acts_if_greater, *acts_if_lesser_eq; > > @@ -2906,7 +2930,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > > return nested_acts_start; > > > > err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, mpls_label_count, log); > > > > if (err) > > return err; > > @@ -2919,7 +2943,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > > return nested_acts_start; > > > > err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, mpls_label_count, log); > > > > if (err) > > return err; > > @@ -2946,7 +2970,8 @@ static int copy_action(const struct nlattr *from, > > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > - __be16 eth_type, __be16 vlan_tci, bool log) > > + __be16 eth_type, __be16 vlan_tci, > > + u32 mpls_label_count, bool log) > > { > > u8 mac_proto = ovs_key_mac_proto(key); > > const struct nlattr *a; > > @@ -3059,26 +3084,35 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > !eth_p_mpls(eth_type))) > > return -EINVAL; > > eth_type = mpls->mpls_ethertype; > > + mpls_label_count++; > > break; > > } > > > > - case OVS_ACTION_ATTR_POP_MPLS: > > + case OVS_ACTION_ATTR_POP_MPLS: { > > + __be16 proto; > > if (vlan_tci & htons(VLAN_CFI_MASK) || > > !eth_p_mpls(eth_type)) > > return -EINVAL; > > > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > > - * as there is no check here to ensure that the new > > - * eth_type is valid and thus set actions could > > - * write off the end of the packet or otherwise > > - * corrupt it. > > + /* Disallow subsequent L2.5+ set actions and mpls_pop > > + * actions once the last MPLS label in the packet is > > + * popped as there is no check here to ensure that > > + * the new eth type is valid and thus set actions could > > + * write off the end of the packet or otherwise corrupt > > + * it. > > * > > * Support for these actions is planned using packet > > * recirculation. > > */ > > - eth_type = htons(0); > > - break; > > + proto = nla_get_be16(a); > > + mpls_label_count--; > > > > + if (!eth_p_mpls(proto) || !mpls_label_count) > > + eth_type = htons(0); > > + else > > + eth_type = proto; > > + break; > > + } > > case OVS_ACTION_ATTR_SET: > > err = validate_set(a, key, sfa, > > &skip_copy, mac_proto, eth_type, > > @@ -3100,6 +3134,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > > > err = validate_and_copy_sample(net, a, key, sfa, > > eth_type, vlan_tci, > > + mpls_label_count, > > log, last); > > if (err) > > return err; > > @@ -3170,6 +3205,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > > > err = validate_and_copy_clone(net, a, key, sfa, > > eth_type, vlan_tci, > > + mpls_label_count, > > log, last); > > if (err) > > return err; > > @@ -3183,6 +3219,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > err = validate_and_copy_check_pkt_len(net, a, key, sfa, > > eth_type, > > vlan_tci, log, > > + mpls_label_count, > > last); > > if (err) > > return err; > > @@ -3213,14 +3250,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > struct sw_flow_actions **sfa, bool log) > > { > > int err; > > + u32 mpls_label_count = 0; > > > > *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); > > if (IS_ERR(*sfa)) > > return PTR_ERR(*sfa); > > > > + if (eth_p_mpls(key->eth.type)) > > + mpls_label_count = hweight_long(key->mpls.num_labels_mask); > > + > > (*sfa)->orig_len = nla_len(attr); > > err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, > > - key->eth.vlan.tci, log); > > + key->eth.vlan.tci, mpls_label_count, log); > > if (err) > > ovs_nla_free_flow_actions(*sfa); > > > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > > index 870a05e..cde7429 100644 > > --- a/tests/system-traffic.at > > +++ b/tests/system-traffic.at > > @@ -992,6 +992,45 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], > > > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > + > > +AT_SETUP([datapath - multiple mpls label pop]) > > +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) > > + > > +ADD_NAMESPACES(at_ns0, at_ns1) > > + > > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") > > +ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24") > > + > > +AT_CHECK([ip link add patch0 type veth peer name patch1]) > > +on_exit 'ip link del patch0' > > + > > +AT_CHECK([ip link set dev patch0 up]) > > +AT_CHECK([ip link set dev patch1 up]) > > +AT_CHECK([ovs-vsctl add-port br0 patch0]) > > +AT_CHECK([ovs-vsctl add-port br1 patch1]) > > + > > +AT_DATA([flows.txt], [dnl > > +table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,push_mpls:0x8847,set_mpls_label:2,push_mpls:0x8847,set_mpls_label:1,resubmit(,3) > > +table=0,priority=100,dl_type=0x8847,mpls_label=1 actions=pop_mpls:0x8847,resubmit(,1) > > +table=1,priority=100,dl_type=0x8847,mpls_label=2 actions=pop_mpls:0x8847,resubmit(,2) > > +table=2,priority=100,dl_type=0x8847,mpls_label=3 actions=pop_mpls:0x0800,resubmit(,3) > > +table=0,priority=10 actions=resubmit(,3) > > +table=3,priority=10 actions=normal > > +]) > > + > > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > > +AT_CHECK([ovs-ofctl add-flows br1 flows.txt]) > > + > > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > + > > +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +OVS_TRAFFIC_VSWITCHD_STOP > > +AT_CLEANUP > > + > > AT_SETUP([datapath - basic truncate action]) > > AT_SKIP_IF([test $HAVE_NC = no]) > > OVS_TRAFFIC_VSWITCHD_START() > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Hi I have posted again. Thanks, Martin -----Original Message----- From: Tonghao Zhang <xiangxia.m.yue@gmail.com> Sent: Friday, November 22, 2019 7:47 AM To: Gregory Rose <gvrose8192@gmail.com>; Varghese, Martin (Nokia - IN/Bangalore) <martin.varghese@nokia.com> Cc: ovs dev <dev@openvswitch.org> Subject: Re: [ovs-dev] [PATCH v2] Change in openvswitch kernel module to support MPLS label depth of 3 in ingress direction. On Fri, Nov 22, 2019 at 2:15 AM Gregory Rose <gvrose8192@gmail.com> wrote: > > > On 11/21/2019 7:49 AM, Martin Varghese wrote: > > From: Martin Varghese <martin.varghese@nokia.com> > > > > The openvswitch kernel module was supporting a MPLS label depth of 1 > > in the ingress direction though the userspace OVS supports a max > > depth of 3 labels. This change enables openvswitch module to support > > a max depth of 3 labels in the ingress. > > > > Signed-off-by: Martin Varghese <martin.varghese@nokia.com> > > Did you submit this patch upstream as well? This patch was applied in upstream, commit id fbdcdd78da7c95f1b970d371e1b23cbd3aa990f3 Hi Martin, when you backport the patch, the commit message should be formatted: http://docs.openvswitch.org/en/latest/internals/contributing/backporting-patches/ > - Greg > > > --- > > Changes in v2 > > - support added for nested actions. > > > > datapath/actions.c | 2 +- > > datapath/flow.c | 20 ++++++++---- > > datapath/flow.h | 8 +++-- > > datapath/flow_netlink.c | 85 ++++++++++++++++++++++++++++++++++++------------- > > tests/system-traffic.at | 39 +++++++++++++++++++++++ > > 5 files changed, 122 insertions(+), 32 deletions(-) > > > > diff --git a/datapath/actions.c b/datapath/actions.c index > > a44e804..fbf4457 100644 > > --- a/datapath/actions.c > > +++ b/datapath/actions.c > > @@ -276,7 +276,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, > > } > > > > stack->label_stack_entry = lse; > > - flow_key->mpls.top_lse = lse; > > + flow_key->mpls.lse[0] = lse; > > return 0; > > } > > > > diff --git a/datapath/flow.c b/datapath/flow.c index > > 916f7f4..6dc7402 100644 > > --- a/datapath/flow.c > > +++ b/datapath/flow.c > > @@ -659,27 +659,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) > > memset(&key->ipv4, 0, sizeof(key->ipv4)); > > } > > } else if (eth_p_mpls(key->eth.type)) { > > - size_t stack_len = MPLS_HLEN; > > + u8 label_count = 1; > > > > + memset(&key->mpls, 0, sizeof(key->mpls)); > > skb_set_inner_network_header(skb, skb->mac_len); > > while (1) { > > __be32 lse; > > > > - error = check_header(skb, skb->mac_len + stack_len); > > + error = check_header(skb, skb->mac_len + > > + label_count * MPLS_HLEN); > > if (unlikely(error)) > > return 0; > > > > memcpy(&lse, skb_inner_network_header(skb), > > MPLS_HLEN); > > > > - if (stack_len == MPLS_HLEN) > > - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); > > + if (label_count <= MPLS_LABEL_DEPTH) > > + memcpy(&key->mpls.lse[label_count - 1], &lse, > > + MPLS_HLEN); > > > > - skb_set_inner_network_header(skb, skb->mac_len + stack_len); > > + skb_set_inner_network_header(skb, skb->mac_len + > > + label_count * > > + MPLS_HLEN); > > if (lse & htonl(MPLS_LS_S_MASK)) > > break; > > > > - stack_len += MPLS_HLEN; > > + label_count++; > > } > > + if (label_count > MPLS_LABEL_DEPTH) > > + label_count = MPLS_LABEL_DEPTH; > > + > > + key->mpls.num_labels_mask = GENMASK(label_count - 1, > > + 0); > > } else if (key->eth.type == htons(ETH_P_IPV6)) { > > int nh_len; /* IPv6 Header + Extensions */ > > > > diff --git a/datapath/flow.h b/datapath/flow.h index > > 5560300..4ad5363 100644 > > --- a/datapath/flow.h > > +++ b/datapath/flow.h > > @@ -43,6 +43,7 @@ enum sw_flow_mac_proto { > > MAC_PROTO_ETHERNET, > > }; > > #define SW_FLOW_KEY_INVALID 0x80 > > +#define MPLS_LABEL_DEPTH 3 > > > > /* Store options at the end of the array if they are less than the > > * maximum size. This allows us to get the benefits of variable > > length @@ -98,9 +99,6 @@ struct sw_flow_key { > > */ > > union { > > struct { > > - __be32 top_lse; /* top label stack entry */ > > - } mpls; > > - struct { > > u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ > > u8 tos; /* IP ToS. */ > > u8 ttl; /* IP TTL/hop limit. */ > > @@ -148,6 +146,10 @@ struct sw_flow_key { > > } nd; > > }; > > } ipv6; > > + struct { > > + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ > > + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ > > + } mpls; > > struct ovs_key_nsh nsh; /* network service header */ > > }; > > struct { > > diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index > > 35f13d7..9fc1a19 100644 > > --- a/datapath/flow_netlink.c > > +++ b/datapath/flow_netlink.c > > @@ -438,7 +438,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { > > [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, > > .next = ovs_tunnel_key_lens, }, > > - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, > > + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, > > [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, > > [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, > > [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, > > @@ -1619,10 +1619,27 @@ static int ovs_key_from_nlattrs(struct net > > *net, struct sw_flow_match *match, > > > > if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) { > > const struct ovs_key_mpls *mpls_key; > > + u32 hdr_len; > > + u32 label_count, label_count_mask, i; > > + > > > > mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); > > - SW_FLOW_KEY_PUT(match, mpls.top_lse, > > - mpls_key->mpls_lse, is_mask); > > + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); > > + label_count = hdr_len / sizeof(struct ovs_key_mpls); > > + > > + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || > > + hdr_len % sizeof(struct ovs_key_mpls)) > > + return -EINVAL; > > + > > + label_count_mask = GENMASK(label_count - 1, 0); > > + > > + for (i = 0 ; i < label_count; i++) > > + SW_FLOW_KEY_PUT(match, mpls.lse[i], > > + mpls_key[i].mpls_lse, > > + is_mask); > > + > > + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, > > + label_count_mask, is_mask); > > + > > > > attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS); > > } > > @@ -2104,13 +2121,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, > > ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); > > ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); > > } else if (eth_p_mpls(swkey->eth.type)) { > > + u8 num_labels, i; > > struct ovs_key_mpls *mpls_key; > > > > - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); > > + num_labels = hweight_long(output->mpls.num_labels_mask); > > + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, > > + num_labels * sizeof(*mpls_key)); > > if (!nla) > > goto nla_put_failure; > > + > > mpls_key = nla_data(nla); > > - mpls_key->mpls_lse = output->mpls.top_lse; > > + for (i = 0; i < num_labels; i++) > > + mpls_key[i].mpls_lse = output->mpls.lse[i]; > > } > > > > if ((swkey->eth.type == htons(ETH_P_IP) || @@ -2400,13 > > +2422,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, > > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > - __be16 eth_type, __be16 vlan_tci, bool log); > > + __be16 eth_type, __be16 vlan_tci, > > + u32 mpls_label_count, bool log); > > > > static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > __be16 eth_type, __be16 vlan_tci, > > - bool log, bool last) > > + u32 mpls_label_count, bool log, > > + bool last) > > { > > const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; > > const struct nlattr *probability, *actions; @@ -2457,7 +2480,7 > > @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, > > return err; > > > > err = __ovs_nla_copy_actions(net, actions, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, > > + mpls_label_count, log); > > > > if (err) > > return err; > > @@ -2472,7 +2495,7 @@ static int validate_and_copy_clone(struct net *net, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > __be16 eth_type, __be16 vlan_tci, > > - bool log, bool last) > > + u32 mpls_label_count, bool log, > > + bool last) > > { > > int start, err; > > u32 exec; > > @@ -2492,7 +2515,7 @@ static int validate_and_copy_clone(struct net *net, > > return err; > > > > err = __ovs_nla_copy_actions(net, attr, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, > > + mpls_label_count, log); > > if (err) > > return err; > > > > @@ -2859,6 +2882,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > __be16 eth_type, __be16 > > vlan_tci, > > + u32 mpls_label_count, > > bool log, bool last) > > { > > const struct nlattr *acts_if_greater, *acts_if_lesser_eq; @@ > > -2906,7 +2930,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > > return nested_acts_start; > > > > err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, > > + mpls_label_count, log); > > > > if (err) > > return err; > > @@ -2919,7 +2943,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, > > return nested_acts_start; > > > > err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, > > - eth_type, vlan_tci, log); > > + eth_type, vlan_tci, > > + mpls_label_count, log); > > > > if (err) > > return err; > > @@ -2946,7 +2970,8 @@ static int copy_action(const struct nlattr *from, > > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > const struct sw_flow_key *key, > > struct sw_flow_actions **sfa, > > - __be16 eth_type, __be16 vlan_tci, bool log) > > + __be16 eth_type, __be16 vlan_tci, > > + u32 mpls_label_count, bool log) > > { > > u8 mac_proto = ovs_key_mac_proto(key); > > const struct nlattr *a; > > @@ -3059,26 +3084,35 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > !eth_p_mpls(eth_type))) > > return -EINVAL; > > eth_type = mpls->mpls_ethertype; > > + mpls_label_count++; > > break; > > } > > > > - case OVS_ACTION_ATTR_POP_MPLS: > > + case OVS_ACTION_ATTR_POP_MPLS: { > > + __be16 proto; > > if (vlan_tci & htons(VLAN_CFI_MASK) || > > !eth_p_mpls(eth_type)) > > return -EINVAL; > > > > - /* Disallow subsequent L2.5+ set and mpls_pop actions > > - * as there is no check here to ensure that the new > > - * eth_type is valid and thus set actions could > > - * write off the end of the packet or otherwise > > - * corrupt it. > > + /* Disallow subsequent L2.5+ set actions and mpls_pop > > + * actions once the last MPLS label in the packet is > > + * popped as there is no check here to ensure that > > + * the new eth type is valid and thus set actions could > > + * write off the end of the packet or otherwise corrupt > > + * it. > > * > > * Support for these actions is planned using packet > > * recirculation. > > */ > > - eth_type = htons(0); > > - break; > > + proto = nla_get_be16(a); > > + mpls_label_count--; > > > > + if (!eth_p_mpls(proto) || !mpls_label_count) > > + eth_type = htons(0); > > + else > > + eth_type = proto; > > + break; > > + } > > case OVS_ACTION_ATTR_SET: > > err = validate_set(a, key, sfa, > > &skip_copy, mac_proto, > > eth_type, @@ -3100,6 +3134,7 @@ static int > > __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > > > err = validate_and_copy_sample(net, a, key, sfa, > > eth_type, > > vlan_tci, > > + > > + mpls_label_count, > > log, last); > > if (err) > > return err; @@ -3170,6 +3205,7 @@ > > static int __ovs_nla_copy_actions(struct net *net, const struct > > nlattr *attr, > > > > err = validate_and_copy_clone(net, a, key, sfa, > > eth_type, > > vlan_tci, > > + > > + mpls_label_count, > > log, last); > > if (err) > > return err; @@ -3183,6 +3219,7 @@ > > static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > err = validate_and_copy_check_pkt_len(net, a, key, sfa, > > eth_type, > > > > vlan_tci, log, > > + > > + mpls_label_count, > > last); > > if (err) > > return err; @@ -3213,14 +3250,18 > > @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > struct sw_flow_actions **sfa, bool log) > > { > > int err; > > + u32 mpls_label_count = 0; > > > > *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); > > if (IS_ERR(*sfa)) > > return PTR_ERR(*sfa); > > > > + if (eth_p_mpls(key->eth.type)) > > + mpls_label_count = > > + hweight_long(key->mpls.num_labels_mask); > > + > > (*sfa)->orig_len = nla_len(attr); > > err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, > > - key->eth.vlan.tci, log); > > + key->eth.vlan.tci, > > + mpls_label_count, log); > > if (err) > > ovs_nla_free_flow_actions(*sfa); > > > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at index > > 870a05e..cde7429 100644 > > --- a/tests/system-traffic.at > > +++ b/tests/system-traffic.at > > @@ -992,6 +992,45 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w > > 2 10.1.1.1 | FORMAT_PING], [0], > > > > OVS_TRAFFIC_VSWITCHD_STOP > > AT_CLEANUP > > + > > +AT_SETUP([datapath - multiple mpls label pop]) > > +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) > > + > > +ADD_NAMESPACES(at_ns0, at_ns1) > > + > > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") ADD_VETH(p1, at_ns1, br1, > > +"10.1.1.2/24") > > + > > +AT_CHECK([ip link add patch0 type veth peer name patch1]) on_exit > > +'ip link del patch0' > > + > > +AT_CHECK([ip link set dev patch0 up]) AT_CHECK([ip link set dev > > +patch1 up]) AT_CHECK([ovs-vsctl add-port br0 patch0]) > > +AT_CHECK([ovs-vsctl add-port br1 patch1]) > > + > > +AT_DATA([flows.txt], [dnl > > +table=0,priority=100,dl_type=0x0800 > > +actions=push_mpls:0x8847,set_mpls_label:3,push_mpls:0x8847,set_mpls > > +_label:2,push_mpls:0x8847,set_mpls_label:1,resubmit(,3) > > +table=0,priority=100,dl_type=0x8847,mpls_label=1 > > +actions=pop_mpls:0x8847,resubmit(,1) > > +table=1,priority=100,dl_type=0x8847,mpls_label=2 > > +actions=pop_mpls:0x8847,resubmit(,2) > > +table=2,priority=100,dl_type=0x8847,mpls_label=3 > > +actions=pop_mpls:0x0800,resubmit(,3) > > +table=0,priority=10 actions=resubmit(,3) > > +table=3,priority=10 actions=normal > > +]) > > + > > +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-ofctl > > +add-flows br1 flows.txt]) > > + > > +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | > > +FORMAT_PING], [0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > + > > +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | > > +FORMAT_PING], [0], [dnl > > +3 packets transmitted, 3 received, 0% packet loss, time 0ms > > +]) > > +OVS_TRAFFIC_VSWITCHD_STOP > > +AT_CLEANUP > > + > > AT_SETUP([datapath - basic truncate action]) > > AT_SKIP_IF([test $HAVE_NC = no]) > > OVS_TRAFFIC_VSWITCHD_START() > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff --git a/datapath/actions.c b/datapath/actions.c index a44e804..fbf4457 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -276,7 +276,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, } stack->label_stack_entry = lse; - flow_key->mpls.top_lse = lse; + flow_key->mpls.lse[0] = lse; return 0; } diff --git a/datapath/flow.c b/datapath/flow.c index 916f7f4..6dc7402 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -659,27 +659,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ipv4, 0, sizeof(key->ipv4)); } } else if (eth_p_mpls(key->eth.type)) { - size_t stack_len = MPLS_HLEN; + u8 label_count = 1; + memset(&key->mpls, 0, sizeof(key->mpls)); skb_set_inner_network_header(skb, skb->mac_len); while (1) { __be32 lse; - error = check_header(skb, skb->mac_len + stack_len); + error = check_header(skb, skb->mac_len + + label_count * MPLS_HLEN); if (unlikely(error)) return 0; memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); - if (stack_len == MPLS_HLEN) - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); + if (label_count <= MPLS_LABEL_DEPTH) + memcpy(&key->mpls.lse[label_count - 1], &lse, + MPLS_HLEN); - skb_set_inner_network_header(skb, skb->mac_len + stack_len); + skb_set_inner_network_header(skb, skb->mac_len + + label_count * MPLS_HLEN); if (lse & htonl(MPLS_LS_S_MASK)) break; - stack_len += MPLS_HLEN; + label_count++; } + if (label_count > MPLS_LABEL_DEPTH) + label_count = MPLS_LABEL_DEPTH; + + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ diff --git a/datapath/flow.h b/datapath/flow.h index 5560300..4ad5363 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -43,6 +43,7 @@ enum sw_flow_mac_proto { MAC_PROTO_ETHERNET, }; #define SW_FLOW_KEY_INVALID 0x80 +#define MPLS_LABEL_DEPTH 3 /* Store options at the end of the array if they are less than the * maximum size. This allows us to get the benefits of variable length @@ -98,9 +99,6 @@ struct sw_flow_key { */ union { struct { - __be32 top_lse; /* top label stack entry */ - } mpls; - struct { u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ u8 tos; /* IP ToS. */ u8 ttl; /* IP TTL/hop limit. */ @@ -148,6 +146,10 @@ struct sw_flow_key { } nd; }; } ipv6; + struct { + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ + } mpls; struct ovs_key_nsh nsh; /* network service header */ }; struct { diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 35f13d7..9fc1a19 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -438,7 +438,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, @@ -1619,10 +1619,27 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) { const struct ovs_key_mpls *mpls_key; + u32 hdr_len; + u32 label_count, label_count_mask, i; + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); - SW_FLOW_KEY_PUT(match, mpls.top_lse, - mpls_key->mpls_lse, is_mask); + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); + label_count = hdr_len / sizeof(struct ovs_key_mpls); + + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || + hdr_len % sizeof(struct ovs_key_mpls)) + return -EINVAL; + + label_count_mask = GENMASK(label_count - 1, 0); + + for (i = 0 ; i < label_count; i++) + SW_FLOW_KEY_PUT(match, mpls.lse[i], + mpls_key[i].mpls_lse, is_mask); + + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, + label_count_mask, is_mask); + attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS); } @@ -2104,13 +2121,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); } else if (eth_p_mpls(swkey->eth.type)) { + u8 num_labels, i; struct ovs_key_mpls *mpls_key; - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); + num_labels = hweight_long(output->mpls.num_labels_mask); + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, + num_labels * sizeof(*mpls_key)); if (!nla) goto nla_put_failure; + mpls_key = nla_data(nla); - mpls_key->mpls_lse = output->mpls.top_lse; + for (i = 0; i < num_labels; i++) + mpls_key[i].mpls_lse = output->mpls.lse[i]; } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -2400,13 +2422,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci, bool log); + __be16 eth_type, __be16 vlan_tci, + u32 mpls_label_count, bool log); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - bool log, bool last) + u32 mpls_label_count, bool log, bool last) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2457,7 +2480,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, log); + eth_type, vlan_tci, mpls_label_count, log); if (err) return err; @@ -2472,7 +2495,7 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - bool log, bool last) + u32 mpls_label_count, bool log, bool last) { int start, err; u32 exec; @@ -2492,7 +2515,7 @@ static int validate_and_copy_clone(struct net *net, return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, log); + eth_type, vlan_tci, mpls_label_count, log); if (err) return err; @@ -2859,6 +2882,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, + u32 mpls_label_count, bool log, bool last) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; @@ -2906,7 +2930,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, log); + eth_type, vlan_tci, mpls_label_count, log); if (err) return err; @@ -2919,7 +2943,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, log); + eth_type, vlan_tci, mpls_label_count, log); if (err) return err; @@ -2946,7 +2970,8 @@ static int copy_action(const struct nlattr *from, static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, - __be16 eth_type, __be16 vlan_tci, bool log) + __be16 eth_type, __be16 vlan_tci, + u32 mpls_label_count, bool log) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; @@ -3059,26 +3084,35 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, !eth_p_mpls(eth_type))) return -EINVAL; eth_type = mpls->mpls_ethertype; + mpls_label_count++; break; } - case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_MPLS: { + __be16 proto; if (vlan_tci & htons(VLAN_CFI_MASK) || !eth_p_mpls(eth_type)) return -EINVAL; - /* Disallow subsequent L2.5+ set and mpls_pop actions - * as there is no check here to ensure that the new - * eth_type is valid and thus set actions could - * write off the end of the packet or otherwise - * corrupt it. + /* Disallow subsequent L2.5+ set actions and mpls_pop + * actions once the last MPLS label in the packet is + * popped as there is no check here to ensure that + * the new eth type is valid and thus set actions could + * write off the end of the packet or otherwise corrupt + * it. * * Support for these actions is planned using packet * recirculation. */ - eth_type = htons(0); - break; + proto = nla_get_be16(a); + mpls_label_count--; + if (!eth_p_mpls(proto) || !mpls_label_count) + eth_type = htons(0); + else + eth_type = proto; + break; + } case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, &skip_copy, mac_proto, eth_type, @@ -3100,6 +3134,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, + mpls_label_count, log, last); if (err) return err; @@ -3170,6 +3205,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, + mpls_label_count, log, last); if (err) return err; @@ -3183,6 +3219,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_check_pkt_len(net, a, key, sfa, eth_type, vlan_tci, log, + mpls_label_count, last); if (err) return err; @@ -3213,14 +3250,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { int err; + u32 mpls_label_count = 0; *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); + if (eth_p_mpls(key->eth.type)) + mpls_label_count = hweight_long(key->mpls.num_labels_mask); + (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, log); + key->eth.vlan.tci, mpls_label_count, log); if (err) ovs_nla_free_flow_actions(*sfa); diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 870a05e..cde7429 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -992,6 +992,45 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([datapath - multiple mpls label pop]) +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24") + +AT_CHECK([ip link add patch0 type veth peer name patch1]) +on_exit 'ip link del patch0' + +AT_CHECK([ip link set dev patch0 up]) +AT_CHECK([ip link set dev patch1 up]) +AT_CHECK([ovs-vsctl add-port br0 patch0]) +AT_CHECK([ovs-vsctl add-port br1 patch1]) + +AT_DATA([flows.txt], [dnl +table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,push_mpls:0x8847,set_mpls_label:2,push_mpls:0x8847,set_mpls_label:1,resubmit(,3) +table=0,priority=100,dl_type=0x8847,mpls_label=1 actions=pop_mpls:0x8847,resubmit(,1) +table=1,priority=100,dl_type=0x8847,mpls_label=2 actions=pop_mpls:0x8847,resubmit(,2) +table=2,priority=100,dl_type=0x8847,mpls_label=3 actions=pop_mpls:0x0800,resubmit(,3) +table=0,priority=10 actions=resubmit(,3) +table=3,priority=10 actions=normal +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-ofctl add-flows br1 flows.txt]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([datapath - basic truncate action]) AT_SKIP_IF([test $HAVE_NC = no]) OVS_TRAFFIC_VSWITCHD_START()