[ovs-dev,2/2] xlate: fix xport lookup for recirc

Message ID 20180112133411.27721-3-zoltan.balogh@ericsson.com
State Accepted
Headers show
Series
  • avoid using xport_lookup() in case of recirculation
Related show

Commit Message

Zoltan Balogh Jan. 12, 2018, 1:34 p.m.
Xlate_lookup and xlate_lookup_ofproto_() provides in_port and ofproto
based on xport determined using flow, which is extracted from packet.
The lookup can happen due to recirculation as well. It can happen, that
packet_type has been modified during xlate before recirculation is
triggered, so the lookup fails or delivers wrong xport.
This can be worked around by propagating xport to ctx->xin after the very
first lookup and store it in frozen state of the recirculation.
So, when lookup is performed due to recirculation, the xport can be
retrieved from the frozen state.

The packet-type-aware unit tests are updated with a new one to verify
this behavior.

Signed-off-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
CC: Jan Scheurich <jan.scheurich@ericsson.com>
Fixes: beb75a40fdc2 ("userspace: Switching of L3 packets in L2
pipeline")
---
 ofproto/ofproto-dpif-rid.c   |   5 +-
 ofproto/ofproto-dpif-rid.h   |   1 +
 ofproto/ofproto-dpif-xlate.c |  52 ++++++++++++++++++
 ofproto/ofproto-dpif-xlate.h |   4 ++
 tests/packet-type-aware.at   | 122 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 183 insertions(+), 1 deletion(-)

Patch

diff --git a/ofproto/ofproto-dpif-rid.c b/ofproto/ofproto-dpif-rid.c
index 83278d82b..79412c28b 100644
--- a/ofproto/ofproto-dpif-rid.c
+++ b/ofproto/ofproto-dpif-rid.c
@@ -162,7 +162,8 @@  frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b)
                              b->ofpacts, b->ofpacts_len)
             && ofpacts_equal(a->action_set, a->action_set_len,
                              b->action_set, b->action_set_len)
-            && !memcmp(a->userdata, b->userdata, a->userdata_len));
+            && !memcmp(a->userdata, b->userdata, a->userdata_len)
+            && uuid_equals(&a->xport_uuid, &b->xport_uuid));
 }
 
 /* Lockless RCU protected lookup.  If node is needed accross RCU quiescent
@@ -294,6 +295,8 @@  recirc_alloc_id(struct ofproto_dpif *ofproto)
             },
             .in_port = OFPP_NONE },
     };
+    /* In order to make sparse happy, xport_uuid needs to be set separately. */
+    state.xport_uuid = UUID_ZERO;
     return recirc_alloc_id__(&state, frozen_state_hash(&state))->id;
 }
 
diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
index 858b02e39..441584af8 100644
--- a/ofproto/ofproto-dpif-rid.h
+++ b/ofproto/ofproto-dpif-rid.h
@@ -143,6 +143,7 @@  struct frozen_state {
     size_t stack_size;
     mirror_mask_t mirrors;        /* Mirrors already output. */
     bool conntracked;             /* Conntrack occurred prior to freeze. */
+    struct uuid xport_uuid;       /* UUID of 1st port packet received on. */
 
     /* Actions to be translated when thawing. */
     struct ofpact *ofpacts;
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 47b4f7378..e2a175a4c 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -561,6 +561,8 @@  static struct xbundle *xbundle_lookup(struct xlate_cfg *,
                                       const struct ofbundle *);
 static struct xport *xport_lookup(struct xlate_cfg *,
                                   const struct ofport_dpif *);
+static struct xport *xport_lookup_by_uuid(struct xlate_cfg *,
+                                          const struct uuid *);
 static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
 static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
                                                      uint32_t skb_priority);
@@ -1374,12 +1376,36 @@  xlate_lookup_ofproto_(const struct dpif_backer *backer, const struct flow *flow,
     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
     const struct xport *xport;
 
+    /* If packet is recirculated, xport can be retrieved from frozen state. */
+    if (flow->recirc_id) {
+        const struct recirc_id_node *recirc_id_node;
+
+        recirc_id_node = recirc_id_node_find(flow->recirc_id);
+
+        if (OVS_UNLIKELY(!recirc_id_node)) {
+            return NULL;
+        }
+
+        /* If recirculation was initiated due to bond (in_port = OFPP_NONE)
+         * then frozen state is static and xport_uuid is not defined, so xport
+         * cannot be restored from frozen state. */
+        if (recirc_id_node->state.metadata.in_port != OFPP_NONE) {
+            struct uuid xport_uuid = recirc_id_node->state.xport_uuid;
+            xport = xport_lookup_by_uuid(xcfg, &xport_uuid);
+            if (xport && xport->xbridge && xport->xbridge->ofproto) {
+                goto out;
+            }
+        }
+    }
+
     xport = xport_lookup(xcfg, tnl_port_should_receive(flow)
                          ? tnl_port_receive(flow)
                          : odp_port_to_ofport(backer, flow->in_port.odp_port));
     if (OVS_UNLIKELY(!xport)) {
         return NULL;
     }
+
+out:
     *xportp = xport;
     if (ofp_in_port) {
         *ofp_in_port = xport->ofp_port;
@@ -1517,6 +1543,26 @@  xport_lookup(struct xlate_cfg *xcfg, const struct ofport_dpif *ofport)
     return NULL;
 }
 
+static struct xport *
+xport_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
+{
+    struct hmap *xports;
+    struct xport *xport;
+
+    if (uuid_is_zero(uuid) || !xcfg) {
+        return NULL;
+    }
+
+    xports = &xcfg->xports_uuid;
+
+    HMAP_FOR_EACH_IN_BUCKET (xport, uuid_node, uuid_hash(uuid), xports) {
+        if (uuid_equals(&xport->uuid, uuid)) {
+            return xport;
+        }
+    }
+    return NULL;
+}
+
 static struct stp_port *
 xport_get_stp_port(const struct xport *xport)
 {
@@ -4480,6 +4526,7 @@  finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
         .stack_size = ctx->stack.size,
         .mirrors = ctx->mirrors,
         .conntracked = ctx->conntracked,
+        .xport_uuid = ctx->xin->xport_uuid,
         .ofpacts = ctx->frozen_actions.data,
         .ofpacts_len = ctx->frozen_actions.size,
         .action_set = ctx->action_set.data,
@@ -6509,6 +6556,7 @@  xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
     xin->odp_actions = odp_actions;
     xin->in_packet_out = false;
     xin->recirc_queue = NULL;
+    xin->xport_uuid = UUID_ZERO;
 
     /* Do recirc lookup. */
     xin->frozen_state = NULL;
@@ -6934,6 +6982,9 @@  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
      * flow->in_port is the ultimate input port of the packet.) */
     struct xport *in_port = get_ofp_port(xbridge,
                                          ctx.base_flow.in_port.ofp_port);
+    if (in_port && !in_port->peer) {
+        ctx.xin->xport_uuid = in_port->uuid;
+    }
 
     if (flow->packet_type != htonl(PT_ETH) && in_port &&
         in_port->pt_mode == NETDEV_PT_LEGACY_L3 && ctx.table_id == 0) {
@@ -7173,6 +7224,7 @@  xlate_resume(struct ofproto_dpif *ofproto,
         .stack_size = pin->stack_size,
         .mirrors = pin->mirrors,
         .conntracked = pin->conntracked,
+        .xport_uuid = UUID_ZERO,
 
         /* When there are no actions, xlate_actions() will search the flow
          * table.  We don't want it to do that (we want it to resume), so
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 39542de2b..2cbb3c909 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -26,6 +26,7 @@ 
 #include "ofproto.h"
 #include "stp.h"
 #include "ovs-lldp.h"
+#include "uuid.h"
 
 struct bfd;
 struct bond;
@@ -162,6 +163,9 @@  struct xlate_in {
     /* ofproto/trace maintains this queue to trace flows that require
      * recirculation. */
     struct ovs_list *recirc_queue;
+
+    /* UUID of first non-patch port packet was received on.*/
+    struct uuid xport_uuid;
 };
 
 void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *,
diff --git a/tests/packet-type-aware.at b/tests/packet-type-aware.at
index 33332f16a..f43095c60 100644
--- a/tests/packet-type-aware.at
+++ b/tests/packet-type-aware.at
@@ -915,3 +915,125 @@  deafbeefbabeaa550000000208004500006c00004000402f26610a0000010a000002000008004500
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+
+AT_SETUP([ptap - recirculate after packet_type change])
+
+########################
+#      --<--+
+#     LOCAL |
+#        +--o------+
+#        |  int-br |
+#        +------o--+
+#               L3 tunnel (remote : 20.0.0.2/24)
+#        +--o------+
+#        |   br0   |  <- 20.0.0.1/24
+#        +------o--+
+#            p0 |
+#               +--<--
+
+OVS_VSWITCHD_START([dnl
+    -- set bridge br0 other-config:hwaddr=aa:bb:cc:00:00:01 \
+    -- add-port br0 p0 \
+    -- set interface p0 type=dummy other_config:hwaddr=aa:bb:cc:00:00:01 ofport_request=1 \
+    -- add-br int-br -- set bridge int-br datapath_type=dummy \
+    -- add-port int-br tunnel \
+    -- set interface tunnel type=gre options:packet_type=legacy_l3 options:remote_ip=20.0.0.2 ofport_request=2
+])
+
+### Verify datapath configuration
+AT_CHECK([
+    ovs-appctl dpif/show | grep -v hit | sed 's/\t/    /g' | sed 's./[[0-9]]\{1,\}..'
+], [0], [dnl
+    br0:
+        br0 65534: (dummy-internal)
+        p0 1: (dummy)
+    int-br:
+        int-br 65534: (dummy-internal)
+        tunnel 2: (gre: packet_type=legacy_l3, remote_ip=20.0.0.2)
+])
+
+AT_CHECK([
+    ovs-appctl netdev-dummy/ip4addr br0 20.0.0.1/24 &&
+    ovs-appctl ovs/route/add 20.0.0.2/24 br0 &&
+    ovs-appctl tnl/neigh/set br0 20.0.0.1 aa:bb:cc:00:00:01 &&
+    ovs-appctl tnl/neigh/set br0 20.0.0.2 aa:bb:cc:00:00:02
+], [0], [ignore])
+
+ovs-appctl time/warp 1000
+
+AT_CHECK([
+    ovs-appctl ovs/route/show | grep User
+],[0], [dnl
+User: 20.0.0.0/24 dev br0 SRC 20.0.0.1
+])
+
+AT_CHECK([
+    ovs-appctl tnl/neigh/show | grep br | tr -s ' ' | sort
+],[0], [dnl
+20.0.0.1 aa:bb:cc:00:00:01 br0
+20.0.0.2 aa:bb:cc:00:00:02 br0
+])
+
+# Goto_table after pop_mpls triggers recirculation.
+AT_CHECK([
+    ovs-ofctl del-flows br0 &&
+    ovs-ofctl del-flows int-br &&
+    ovs-ofctl add-flow br0 "actions=normal"
+    ovs-ofctl add-flow int-br "table=0,in_port=tunnel,actions=pop_mpls:0x800,goto_table:20" &&
+    ovs-ofctl add-flow int-br "table=20,actions=dec_ttl,output:LOCAL"
+], [0], [ignore])
+
+ovs-appctl time/warp 1000
+
+AT_CHECK([
+    ovs-ofctl dump-flows br0 -OOpenFlow13 | ofctl_strip | grep actions | sed 's/reset_counts //'
+], [0], [dnl
+ actions=NORMAL
+])
+
+AT_CHECK([
+    ovs-ofctl dump-flows int-br -OOpenFlow13 | ofctl_strip | grep actions | sed 's/reset_counts //'
+], [0], [dnl
+ in_port=2 actions=pop_mpls:0x0800,resubmit(,20)
+ table=20, actions=dec_ttl,LOCAL
+])
+
+# Receive MPLS over L3 GRE packet in underlay bridge.
+AT_CHECK([
+    ovs-appctl netdev-dummy/receive p0 aabbcc000001aabbcc00000208004500007000004000402f125d140000021400000100008847003e71404500005470f5400040013445c0a80a14c0a80a0a08004e1adbfc0001566b575a00000000604f010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+    ovs-appctl netdev-dummy/receive p0 aabbcc000001aabbcc00000208004500007000004000402f125d140000021400000100008847003e71404500005470f5400040013445c0a80a14c0a80a0a08004e1adbfc0001566b575a00000000604f010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+], [0], [ignore])
+
+ovs-appctl time/warp 1000
+
+AT_CHECK([
+    ovs-appctl netdev-dummy/receive p0 aabbcc000001aabbcc00000208004500007000004000402f125d140000021400000100008847003e71404500005470f5400040013445c0a80a14c0a80a0a08004e1adbfc0001566b575a00000000604f010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+    ovs-appctl netdev-dummy/receive p0 aabbcc000001aabbcc00000208004500007000004000402f125d140000021400000100008847003e71404500005470f5400040013445c0a80a14c0a80a0a08004e1adbfc0001566b575a00000000604f010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+], [0], [ignore])
+
+ovs-appctl time/warp 1000
+
+# After packet has been received on L3 port in overlay bridge, its packet_type changes to Ethernet.
+# Resubmit after pop_mpls recirculates the L2 packet.
+AT_CHECK([
+    ovs-appctl dpctl/dump-flows --names dummy@ovs-dummy | strip_used | grep -v ipv6 | sort
+], [0], [flow-dump from non-dpdk interfaces:
+recirc_id(0),in_port(p0),packet_type(ns=0,id=0),eth(src=aa:bb:cc:00:00:02,dst=aa:bb:cc:00:00:01),eth_type(0x0800),ipv4(dst=20.0.0.1,proto=47,frag=no), packets:3, bytes:378, used:0.0s, actions:tnl_pop(gre_sys)
+tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0),in_port(gre_sys),packet_type(ns=1,id=0x8847),mpls(label=999/0x0,tc=0/0,ttl=64/0x0,bos=1/1), packets:3, bytes:264, used:0.0s, actions:push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),pop_mpls(eth_type=0x800),recirc(0x1)
+tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0x1),in_port(gre_sys),packet_type(ns=0,id=0),eth(dst=00:00:00:00:00:00),eth_type(0x0800),ipv4(ttl=64,frag=no), packets:3, bytes:294, used:0.0s, actions:set(ipv4(ttl=63)),int-br
+])
+
+ovs-appctl time/warp 1000
+
+AT_CHECK([
+    ovs-ofctl dump-ports int-br
+], [0], [OFPST_PORT reply (xid=0x2): 2 ports
+  port LOCAL: rx pkts=0, bytes=0, drop=?, errs=?, frame=?, over=?, crc=?
+           tx pkts=4, bytes=392, drop=?, errs=?, coll=?
+  port  2: rx pkts=4, bytes=352, drop=?, errs=?, frame=?, over=?, crc=?
+           tx pkts=0, bytes=0, drop=?, errs=?, coll=?
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP