diff mbox series

[ovs-dev] ofproto-dpif-xlate: Fix tun_metadata match after recirc

Message ID 1597963522-30983-1-git-send-email-yihung.wei@gmail.com
State New
Headers show
Series [ovs-dev] ofproto-dpif-xlate: Fix tun_metadata match after recirc | expand

Commit Message

Yi-Hung Wei Aug. 20, 2020, 10:45 p.m. UTC
Consider the following OpenFlow rules that match on tun_metadata0 after
recirculation.  If we start ICMP flow with tun_metadata0=0x1 follow by
a flow with tun_metadata0=0x3, OVS will incorrectly match the second
flow with the tun_metadata0=0x1 rule.

table=0, in_port=gnv1, icmp, action=ct(table=1)
table=0, in_port=gnv0, icmp  action=ct(table=1)
table=1, in_port=gnv1, icmp, action=output:gnv0
table=1, in_port=gnv0, icmp, ct_state=+trk+new, action=ct(commit),output:gnv1
table=1, in_port=gnv0, icmp, ct_state=+trk+est, tun_metadata0=0x1 action=output:gnv1
table=1, in_port=gnv0, icmp, ct_state=+trk+est, tun_metadata0=0x3 action=output:gnv1

The root cause of this issue is that during recirculation OVS will overwrite
the tun_metadata0 with the one stored in the frozen state.  This will result in
erroneous behavior if tun_metadata0 is wildcarded in the megaflow as shown
below.  Notice that both the second and the third megaflow both carry 0x1
in tun_metadata0, where the expected behavior in the thrid megaflow one
should be 0x3.

recirc_id(0),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,geneve({}{}{}),
flags(-df-csum+key)),in_port(5),eth(),eth_type(0x0800),ipv4(proto=1,frag=no),
packets:213, bytes:20774, used:0.672s, actions:ct,recirc(0x4)

recirc_id(0x4),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x1}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk),eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:112, bytes:10976, used:0.672s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x1}),flags(df|key))),5

recirc_id(0x4),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x3}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk),eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:98, bytes:9506, used:0.992s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x1}),flags(df|key))),5

There seems to be two ways to fix this issue, one is to unwildcarded all the
tunnel metadata when OVS generates megaflow with recirculation.  The other
one is to add a boolean flag in the frozen state, set the flag if the flow
is from a tunnel port, and ask OVS to honor the tunnel metadata from the flow
instead of from the frozen state.  Since the first approach will increase the
number of megaflows and may incur performance impact, this patch takes the
second approach.  With this patch, the megaflows become as below:

recirc_id(0),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({}{}{}),flags(-df-csum+key)),in_port(5),eth(),eth_type(0x0800),
ipv4(proto=1,frag=no), packets:60, bytes:5850, used:0.004s,
actions:ct,recirc(0x8)

recirc_id(0x8),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x1}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk),eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:29, bytes:2842, used:0.356s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x1}),flags(df|key))),5

recirc_id(0x8),tunnel(tun_id=0x2,src=172.31.1.1,dst=172.31.1.100,
geneve({class=0x102,type=0x7,len=4,0x3}{class=0,type=0,len=0}),
flags(-df-csum+key)),in_port(5),ct_state(-new+est+trk), eth(),eth_type(0x0800),
ipv4(proto=1,tos=0/0x3,frag=no), packets:28, bytes:2716, used:0.004s,
actions:set(tunnel(tun_id=0x2,dst=172.31.1.2,ttl=64,tp_src=64725,tp_dst=6081,
geneve({class=0x102,type=0x7,len=4,0x3}),flags(df|key))),5

VMWare-BZ: 2617696
Reported-by: Ran Gu <gran@vmware.com>
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
https://travis-ci.org/github/YiHungWei/ovs/builds/719445339
---
 ofproto/ofproto-dpif-rid.h    |  8 ++++++--
 ofproto/ofproto-dpif-upcall.c | 16 +++++++++++++++-
 ofproto/ofproto-dpif-xlate.c  | 16 +++++++++++++++-
 tests/tunnel.at               | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
index 30cd5275f24c..d6df1212cc0f 100644
--- a/ofproto/ofproto-dpif-rid.h
+++ b/ofproto/ofproto-dpif-rid.h
@@ -131,9 +131,12 @@  frozen_metadata_from_flow(struct frozen_metadata *md,
 static inline void
 frozen_metadata_to_flow(struct ofproto *ofproto,
                         const struct frozen_metadata *md,
-                        struct flow *flow)
+                        struct flow *flow,
+                        bool from_tunnel)
 {
-    flow->tunnel = md->tunnel;
+    if (!from_tunnel) {
+        flow->tunnel = md->tunnel;
+    }
     flow->tunnel.metadata.tab = ofproto_get_tun_tab(ofproto);
     flow->metadata = md->metadata;
     memcpy(flow->regs, md->regs, sizeof flow->regs);
@@ -153,6 +156,7 @@  struct frozen_state {
     size_t stack_size;
     mirror_mask_t mirrors;        /* Mirrors already output. */
     bool conntracked;             /* Conntrack occurred prior to freeze. */
+    bool from_tunnel;             /* Flow is from a tunnel port. */
     bool was_mpls;                /* MPLS packet */
     struct uuid xport_uuid;       /* UUID of 1st port packet received on. */
 
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 72a5b4d734f0..da56e2afeb3c 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -1536,8 +1536,22 @@  process_upcall(struct udpif *udpif, struct upcall *upcall,
                 flow_clear_conntrack(&frozen_flow);
             }
 
+            /* Packet-in message expects tunnel metadata in non-udpif
+             * format. */
+            if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
+                const struct tun_table *tun_tab =
+                    ofproto_get_tun_tab(&upcall->ofproto->up);
+                int err =
+                    tun_metadata_from_geneve_udpif(tun_tab, &flow->tunnel,
+                                                   &flow->tunnel,
+                                                   &frozen_flow.tunnel);
+                if (err) {
+                    return err;
+                }
+            }
+
             frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata,
-                                    &frozen_flow);
+                                    &frozen_flow, state->from_tunnel);
             flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata);
 
             ofproto_dpif_send_async_msg(upcall->ofproto, am);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index e0ede2cab3ad..97c17d4f95c0 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -396,6 +396,11 @@  struct xlate_ctx {
      * state from the datapath should be honored after thawing. */
     bool conntracked;
 
+    /* True if a packet is form a tunnel port.  This is used to determine
+     * whether tunnel information from the datapath should be honored after
+     * thawing. */
+    bool from_tunnel;
+
     /* Pointer to an embedded NAT action in a conntrack action, or NULL. */
     struct ofpact_nat *ct_nat_action;
 
@@ -4858,6 +4863,7 @@  xlate_controller_action(struct xlate_ctx *ctx, int len,
         .stack_size = ctx->stack.size,
         .mirrors = ctx->mirrors,
         .conntracked = ctx->conntracked,
+        .from_tunnel = ctx->from_tunnel,
         .was_mpls = ctx->was_mpls,
         .ofpacts = NULL,
         .ofpacts_len = 0,
@@ -4933,6 +4939,7 @@  finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
         .stack_size = ctx->stack.size,
         .mirrors = ctx->mirrors,
         .conntracked = ctx->conntracked,
+        .from_tunnel = ctx->from_tunnel,
         .was_mpls = ctx->was_mpls,
         .xport_uuid = ctx->xin->xport_uuid,
         .ofpacts = ctx->frozen_actions.data,
@@ -7513,6 +7520,7 @@  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
 
         .was_mpls = false,
         .conntracked = false,
+        .from_tunnel = false,
 
         .ct_nat_action = NULL,
 
@@ -7580,7 +7588,8 @@  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
         /* Restore pipeline metadata. May change flow's in_port and other
          * metadata to the values that existed when freezing was triggered. */
         frozen_metadata_to_flow(&ctx.xbridge->ofproto->up,
-                                &state->metadata, flow);
+                                &state->metadata, flow,
+                                state->from_tunnel);
 
         /* Restore stack, if any. */
         if (state->stack) {
@@ -7649,6 +7658,10 @@  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
         ctx.xin->xport_uuid = in_port->uuid;
     }
 
+    if (in_port && in_port->is_tunnel) {
+        ctx.from_tunnel = true;
+    }
+
     if (flow->packet_type != htonl(PT_ETH) && in_port &&
         in_port->pt_mode == NETDEV_PT_LEGACY_L3 && ctx.table_id == 0) {
         /* Add dummy Ethernet header to non-L2 packet if it's coming from a
@@ -7917,6 +7930,7 @@  xlate_resume(struct ofproto_dpif *ofproto,
         .mirrors = pin->mirrors,
         .conntracked = pin->conntracked,
         .xport_uuid = UUID_ZERO,
+        .from_tunnel = false,
 
         /* When there are no actions, xlate_actions() will search the flow
          * table.  We don't want it to do that (we want it to resume), so
diff --git a/tests/tunnel.at b/tests/tunnel.at
index e08fd1e048f8..e15ceaf61c56 100644
--- a/tests/tunnel.at
+++ b/tests/tunnel.at
@@ -949,6 +949,39 @@  Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([tunnel - Match tun_metadata after recirc])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \
+                    options:remote_ip=1.1.1.1 ofport_request=1 \
+                    -- add-port br0 p2 -- set Interface p2 type=dummy \
+                    ofport_request=2 ofport_request=2])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
+
+AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0,len=4}->tun_metadata0"])
+
+AT_DATA([flows.txt], [dnl
+table=0,udp,actions=ct(table=1)
+table=1,udp,tun_metadata0=0x11,actions=IN_PORT
+table=1,udp,tun_metadata0=0x22,actions=drop
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl check flow with tun_metadata0=0x11
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x11}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1234,dst=5678)' -generate], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+  [Megaflow: recirc_id=0x1,eth,udp,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x11,in_port=1,nw_ecn=0,nw_frag=no
+Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0,len=4,0x11}),flags(df))),6081
+])
+
+dnl check flow with tun_metadata0=0x22 and recirc_id(0x1)
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0x1),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x22}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1234,dst=5678)'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+  [Megaflow: recirc_id=0x1,eth,udp,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x22,in_port=1,nw_frag=no
+Datapath actions: drop
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([tunnel - concomitant IPv6 and IPv4 tunnels])
 OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \
                     options:remote_ip=1.1.1.1 ofport_request=1 \