[ovs-dev,v2] tunnel: ToS and TTL inheritance for MPLS tunneled traffic

Message ID DB5PR07MB1496349CA3E010994AFA075F9F5F0@DB5PR07MB1496.eurprd07.prod.outlook.com
State New
Headers show
Series
  • [ovs-dev,v2] tunnel: ToS and TTL inheritance for MPLS tunneled traffic
Related show

Commit Message

Balazs Nemeth Nov. 1, 2017, 3:38 p.m.
From: Miklos Pelyva <miklos.pelyva@ericsson.com>

When a new outermost MPLS label is added to 'flow' the 'flow''s
Ethernet type is changed to 'mpls_eth_type'. After the new label is
set, the 'flow''s MPLS stack is updated, and the L3/4 fields are
cleared to mark them invalid. This results in loosing the values of
the 'nw_tos' and the 'nw_ttl' fields from the struct 'flow'.

Hence, it is impossible to use the ToS and TTL 'inherit' feature in
case of MPLS tunneled traffic, because currently the values to be
inherited are coming from the cleared (invalidated) variables of
struct 'flow'.

To support inheriting the ToS field to the outer tunnel header even
in the presence of an MPLS shim header, this patch introduces a new
variable in the context structure, called 'latest_nw_tos', which is
used for storing the up-to-date 'nw_tos' value during the flow
translation, and is referred in the 'tnl_port_send()' function.

Besides, for every MPLS packet the MPLS TTL field is copied to the
TTL field of the outer tunnel header, if inheritance is configured.

Two new unit tests are created for checking the ToS and TTL
inheritance for IP, and non-IP packets sent over MPLS over IP
tunnels. The ECN inheritance is not applied in that case, because
the related RFC 5129 does not describe an individual way, just
options for ECN in MPLS.

Signed-off-by: Miklos Pelyva <miklos.pelyva@ericsson.com>
Signed-off-by: Balazs Nemeth <balazs.nemeth@ericsson.com>
Signed-off-by: Jan Scheurich <jan.scheurich@ericsson.com>
Co-authored-by: Jan Scheurich <jan.scheurich@ericsson.com>
---
 lib/flow.h                   |  5 +++++
 ofproto/ofproto-dpif-xlate.c | 24 ++++++++++++++++++++----
 ofproto/tunnel.c             | 10 +++++++++-
 ofproto/tunnel.h             |  2 +-
 tests/tunnel.at              | 36 +++++++++++++++++++++++++++++++++++-
 5 files changed, 70 insertions(+), 7 deletions(-)

--
1.9.1

Patch

diff --git a/lib/flow.h b/lib/flow.h
index b3128da..3b38298 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -1000,6 +1000,11 @@  static inline bool is_ip_any(const struct flow *flow)
     return dl_type_is_ip_any(get_dl_type(flow));
 }

+static inline bool is_mpls_any(const struct flow *flow)
+{
+    return eth_type_mpls(flow->dl_type);
+}
+
 static inline bool is_ip_proto(const struct flow *flow, uint8_t ip_proto,
                                struct flow_wildcards *wc)
 {
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index d0b45d2..429bf99 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -372,6 +372,11 @@  struct xlate_ctx {
      * the MPLS label stack that was originally present. */
     bool was_mpls;

+    /* Latest IP ToS, an always up to date representation of nw_tos during the
+     * lifetime of the ctx. Every time nw_tos is modified, it should be updated
+     * for possible inheritance configurations. UINT16_MAX means undefined. */
+    uint16_t latest_nw_tos;
+
     /* True if conntrack has been performed on this packet during processing
      * on the current bridge. This is used to determine whether conntrack
      * state from the datapath should be honored after thawing. */
@@ -3716,6 +3721,7 @@  compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
             wc->masks.nw_tos |= IP_DSCP_MASK;
             flow->nw_tos &= ~IP_DSCP_MASK;
             flow->nw_tos |= dscp;
+            ctx->latest_nw_tos = flow->nw_tos;
         }
     }

@@ -3726,7 +3732,8 @@  compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
           * matches, while explicit set actions on tunnel metadata are.
           */
         flow_tnl = flow->tunnel;
-        odp_port = tnl_port_send(xport->ofport, flow, ctx->wc);
+        odp_port = tnl_port_send(xport->ofport, flow, ctx->latest_nw_tos,
+                                 ctx->wc);
         if (odp_port == ODPP_NONE) {
             xlate_report(ctx, OFT_WARN, "Tunneling decided against output");
             goto out; /* restore flow_nw_tos */
@@ -3823,10 +3830,10 @@  compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
                                          xport->xbundle));
     }

- out:
+out:
     /* Restore flow */
     memcpy(flow->vlans, flow_vlans, sizeof flow->vlans);
-    flow->nw_tos = flow_nw_tos;
+    ctx->latest_nw_tos = flow->nw_tos = flow_nw_tos;
     flow->dl_dst = flow_dl_dst;
     flow->dl_src = flow_dl_src;
     flow->packet_type = flow_packet_type;
@@ -5230,7 +5237,7 @@  xlate_sample_action(struct xlate_ctx *ctx,

         if (xport && xport->is_tunnel) {
             struct flow *flow = &ctx->xin->flow;
-            tnl_port_send(xport->ofport, flow, ctx->wc);
+            tnl_port_send(xport->ofport, flow, ctx->latest_nw_tos, ctx->wc);
             if (!ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
                 struct flow_tnl flow_tnl = flow->tunnel;

@@ -6305,6 +6312,7 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                 wc->masks.nw_tos |= IP_DSCP_MASK;
                 flow->nw_tos &= ~IP_DSCP_MASK;
                 flow->nw_tos |= ofpact_get_SET_IP_DSCP(a)->dscp;
+                ctx->latest_nw_tos = flow->nw_tos;
             }
             break;

@@ -6313,6 +6321,7 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                 wc->masks.nw_tos |= IP_ECN_MASK;
                 flow->nw_tos &= ~IP_ECN_MASK;
                 flow->nw_tos |= ofpact_get_SET_IP_ECN(a)->ecn;
+                ctx->latest_nw_tos = flow->nw_tos;
             }
             break;

@@ -6384,6 +6393,12 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                 mf_set_flow_value_masked(mf, set_field->value,
                                          ofpact_set_field_mask(set_field),
                                          flow);
+                if (!strcmp(mf->name, "nw_tos") ||
+                    !strcmp(mf->name, "ip_dscp") ||
+                    !strcmp(mf->name, "ip_ecn") ||
+                    !strcmp(mf->name, "nw_ecn")) {
+                    ctx->latest_nw_tos = flow->nw_tos;
+                }
             } else {
                 xlate_report(ctx, OFT_WARN,
                              "unmet prerequisites for %s, set_field ignored",
@@ -6890,6 +6905,7 @@  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
         .paused_flow = &paused_flow,

         .was_mpls = false,
+        .latest_nw_tos = is_ip_any(flow) ? flow->nw_tos : UINT16_MAX,
         .conntracked = false,

         .ct_nat_action = NULL,
diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
index c6856a0..220ecd1 100644
--- a/ofproto/tunnel.c
+++ b/ofproto/tunnel.c
@@ -395,7 +395,8 @@  tnl_wc_init(struct flow *flow, struct flow_wildcards *wc)
  * shouldn't occur. */
 odp_port_t
 tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
-              struct flow_wildcards *wc) OVS_EXCLUDED(rwlock)
+              uint16_t latest_nw_tos, struct flow_wildcards *wc)
+    OVS_EXCLUDED(rwlock)
 {
     const struct netdev_tunnel_config *cfg;
     struct tnl_port *tnl_port;
@@ -440,6 +441,10 @@  tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
     if (cfg->ttl_inherit && is_ip_any(flow)) {
         wc->masks.nw_ttl = 0xff;
         flow->tunnel.ip_ttl = flow->nw_ttl;
+    } else if (cfg->ttl_inherit && is_mpls_any(flow)) {
+        /* The outer TTL must be determined from the MPLS TTL field, when
+         * inheritance is configured and MPLS tunneled traffic is present. */
+        flow->tunnel.ip_ttl = mpls_lse_to_ttl(flow->mpls_lse[0]);
     } else {
         flow->tunnel.ip_ttl = cfg->ttl;
     }
@@ -447,6 +452,9 @@  tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow,
     if (cfg->tos_inherit && is_ip_any(flow)) {
         wc->masks.nw_tos |= IP_DSCP_MASK;
         flow->tunnel.ip_tos = flow->nw_tos & IP_DSCP_MASK;
+    } else if (cfg->tos_inherit && latest_nw_tos != UINT16_MAX) {
+        wc->masks.nw_tos |= IP_DSCP_MASK;
+        flow->tunnel.ip_tos = latest_nw_tos & IP_DSCP_MASK;
     } else {
         flow->tunnel.ip_tos = cfg->tos;
     }
diff --git a/ofproto/tunnel.h b/ofproto/tunnel.h
index b0ec67c..b6265f4 100644
--- a/ofproto/tunnel.h
+++ b/ofproto/tunnel.h
@@ -42,7 +42,7 @@  const struct ofport_dpif *tnl_port_receive(const struct flow *);
 void tnl_wc_init(struct flow *, struct flow_wildcards *);
 bool tnl_process_ecn(struct flow *);
 odp_port_t tnl_port_send(const struct ofport_dpif *, struct flow *,
-                         struct flow_wildcards *wc);
+                         uint16_t, struct flow_wildcards *wc);

 /* Returns true if 'flow' should be submitted to tnl_port_receive(). */
 static inline bool
diff --git a/tests/tunnel.at b/tests/tunnel.at
index 767af99..c3d5e9f 100644
--- a/tests/tunnel.at
+++ b/tests/tunnel.at
@@ -201,7 +201,7 @@  OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \
                     options:remote_ip=1.1.1.1 options:tos=inherit \
                     options:ttl=inherit ofport_request=1 \
                     -- add-port br0 p2 -- set Interface p2 type=dummy \
-                    ofport_request=2 ofport_request=2])
+                    ofport_request=2])
 AT_DATA([flows.txt], [dnl
 actions=output:1
 ])
@@ -235,6 +235,40 @@  AT_CHECK([tail -1 stdout], [0],
 OVS_VSWITCHD_STOP
 AT_CLEANUP

+AT_SETUP([tunnel - MPLS over L3 tunnel - ToS and TTL inheritance])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \
+                    options:packet_type=legacy_l3 options:remote_ip=1.1.1.1 \
+                    options:tos=inherit options:ttl=inherit \
+                    ofport_request=1 \
+                    -- add-port br0 p2 -- set Interface p2 type=dummy \
+                    ofport_request=2])
+AT_DATA([flows.txt], [dnl
+action=push_mpls:0x8847,1
+])
+
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
+               br0 65534/100: (dummy-internal)
+               p1 1/1: (gre: packet_type=legacy_l3, remote_ip=1.1.1.1, tos=inherit, ttl=inherit)
+               p2 2/2: (dummy)
+])
+
+dnl Basic
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=4,ttl=144,frag=no),tcp(src=8,dst=9)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: set(tunnel(dst=1.1.1.1,tos=0x4,ttl=144,flags(df))),pop_eth,push_mpls(label=0,tc=1,ttl=144,bos=1,eth_type=0x8847),1
+])
+
+dnl non-IP
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,flags(df))),pop_eth,push_mpls(label=0,tc=0,ttl=64,bos=1,eth_type=0x8847),1
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([tunnel - set_tunnel])
 OVS_VSWITCHD_START([dnl
     add-port br0 p1 -- set Interface p1 type=gre options:key=flow \