[ovs-dev,v2,1/2] xlate: rollback to valid known state in case of ctx->error on translation

Message ID 1512557728-31050-2-git-send-email-zoltan.balogh@ericsson.com
State New
Headers show
Series
  • xlate: optimize dp flow action in case of error in multi-bridge setup
Related show

Commit Message

Zoltan Balogh Dec. 6, 2017, 10:55 a.m.
If several bridges are connected via patch ports or the packet is sent
via tunnel and an error occurs during translation in a subsequent bridge,
e.g. Ethernet header cannot be pushed to the packet due to lack of
prerequisites in the second bridge and there were flow actions already
translated in first bridge, it can happen that the installed datapath
flow contains actions which are going to be performed for all received
packets, however the packets are going to be dropped in the end. This
is waste of processing resource.

Let's have a config like this and inject L2 Ethernet packets into br0
via LOCAL port:

   ->-+
      | LOCAL                  LOCAL                               LOCAL
    +-o-------+       +-------o-+       +---------+       +-------o-+
    |   br0   |       |   br1   |       |   br2   |       |   br3   |
    +-------o-+       +-o-----o-+       +-o-----o-+       +-o-------+
        p01 |       p10 |     | p12   p21 |     | p23   p32 |
            +-----------+     +-----------+     +-----------+

netdev@ovs-netdev: hit:17754528 missed:57
        br0:
                br0 65534/1: (tap)
                p01 10/none: (patch: peer=p10)
        br1:
                br1 65534/2: (tap)
                p10 20/none: (patch: peer=p01)
                p12 30/none: (patch: peer=p21)
        br2:
                br2 65534/3: (tap)
                p21 40/none: (patch: peer=p12)
                p23 50/none: (patch: peer=p32)
        br3:
                br3 65534/4: (tap)
                p32 60/none: (patch: peer=p23)

- OpenFlow rules:

"br0"
OFPST_FLOW reply (OF1.3) (xid=0x2):
cookie=0x0, duration=15.601s, table=0, n_packets=20739, n_bytes=2032422,
reset_counts in_port=LOCAL actions=dec_ttl,output:10

"br1"
OFPST_FLOW reply (OF1.3) (xid=0x2):
cookie=0x0, duration=15.568s, table=0, n_packets=20739, n_bytes=2032422,
reset_counts ip,in_port=20
actions=dec_ttl,dec_ttl,dec_ttl,dec_ttl,LOCAL,
set_field:ee:ee:ee:ff:ff:01->eth_dst,output:30

"br2"
OFPST_FLOW reply (OF1.3) (xid=0x2):
cookie=0x0, duration=15.536s, table=0, n_packets=20739, n_bytes=2032422,
ip,in_port=40
actions=dec_ttl,dec_ttl,dec_ttl,dec_ttl,dec_ttl,
set_field:aa:aa:aa:bb:bb:ff->eth_src,encap(ethernet),dec_ttl,output:50

"br3"
OFPST_FLOW reply (OF1.3) (xid=0x2):
cookie=0x0, duration=15.502s, table=0, n_packets=0, n_bytes=0,
reset_counts in_port=60 actions=LOCAL

- Installed datapath flow:

flow-dump from non-dpdk interfaces:
recirc_id(0),in_port(1),packet_type(ns=0,id=0),
eth(src=aa:dd:dd:ee:ee:f9,dst=aa:aa:aa:bb:bb:00),eth_type(0x0800),
ipv4(ttl=255,frag=no), packets:20738, bytes:2032324, used:5.176s,
actions:set(ipv4(ttl=250)),2,
set(eth(src=aa:aa:aa:bb:bb:ff,dst=ee:ee:ee:ff:ff:01)),set(ipv4(ttl=245))

In this case, encap(ethernet) action cannot be performed, because of the
packet is L2 Ethernet packet. By looking at the datapath flow, you can
see, that setting MAC addresses and TTL at the end is actually not needed,
since the packet is not transmitted anywhere after these actions.

There are two points in the code where we can fix such datapath flows.
1) During translation of OF actions on a bridge, we can store the last
valid state of translated actions while iterating over the OF actions
and revert to it in case of error. This can be performed in the
do_xlate_actions() funtion.
2) When all OF actions have been translated, there could be actions at
the end of list of odp actions which are not needed to be executed.
So, the list can be normalized at the end of xlate_actions().

This commit implements 1). An upcomming commit is going to implement 2).

Signed-off-by: Zoltan Balogh <zoltan.balogh@ericsson.com>
Signed-off-by: Sugesh Chandran <sugesh.chandran@intel.com>
Co-authored-by: Sugesh Chandran <sugesh.chandran@intel.com>
CC: Ben Pfaff <blp@ovn.org>
CC: Jan Scheurich <jan.scheurich@ericsson.com>
---
 ofproto/ofproto-dpif-xlate.c | 137 +++++++++++++++++++++++++++++++++
 tests/ofproto-dpif.at        | 179 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 316 insertions(+)

Patch

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index fcced344e..36d0a0e1f 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -173,6 +173,8 @@  struct xport {
     struct lldp *lldp;               /* LLDP handle or null. */
 };
 
+struct xlate_backup_data;
+
 struct xlate_ctx {
     struct xlate_in *xin;
     struct xlate_out *xout;
@@ -390,6 +392,10 @@  struct xlate_ctx {
     struct ofpbuf action_set;   /* Action set. */
 
     enum xlate_error error;     /* Translation failed. */
+
+    struct xlate_backup_data *xlate_backup_data;   /* To restore flow data if
+                                                      something goes wrong
+                                                      during xlate. */
 };
 
 /* Structure to track VLAN manipulation */
@@ -6150,6 +6156,118 @@  xlate_ofpact_unroll_xlate(struct xlate_ctx *ctx,
                  "cookie=%#"PRIx64, a->rule_table_id, a->rule_cookie);
 }
 
+struct xlate_backup_data {
+    size_t odp_actions_size;
+    struct flow wc_masks;
+};
+
+static void
+store_ctx_xlate_data(struct xlate_ctx *ctx)
+{
+    uint64_t *wc = (uint64_t *) &ctx->wc->masks;
+    uint64_t *backup_wc = (uint64_t *) &ctx->xlate_backup_data->wc_masks;
+    int i;
+
+    ctx->xlate_backup_data->odp_actions_size = ctx->odp_actions->size;
+    for (i = 0; i < FLOW_U64S; i++) {
+        if (wc[i]) {
+            backup_wc[i] |= wc[i];
+        }
+    }
+}
+
+static void
+restore_ctx_xlate_data(struct xlate_ctx *ctx)
+{
+    uint64_t *flow = (uint64_t *) &ctx->xin->flow;
+    uint64_t *base_flow = (uint64_t *) &ctx->base_flow;
+    uint64_t *wc = (uint64_t *) &ctx->wc->masks;
+    uint64_t *backup_wc = (uint64_t *) &ctx->xlate_backup_data->wc_masks;
+    int i;
+
+    ctx->odp_actions->size = ctx->xlate_backup_data->odp_actions_size;
+    for (i = 0; i < FLOW_U64S; i++) {
+        if (wc[i]) {
+            wc[i] &= backup_wc[i];
+            flow[i] = base_flow[i];
+        }
+    }
+}
+
+static bool
+ofpact_validates_dp_actions(const struct ofpact *a)
+{
+    switch (a->type) {
+    /* Output. */
+    case OFPACT_OUTPUT:
+    case OFPACT_GROUP:
+    case OFPACT_CONTROLLER:
+    case OFPACT_ENQUEUE:
+    case OFPACT_OUTPUT_REG:
+    case OFPACT_BUNDLE:
+    /* Metadata. */
+    case OFPACT_SET_TUNNEL:
+    case OFPACT_SET_QUEUE:
+    case OFPACT_POP_QUEUE:
+    case OFPACT_FIN_TIMEOUT:
+    /* Flow table interaction. */
+    case OFPACT_RESUBMIT:
+    case OFPACT_LEARN:
+    case OFPACT_CONJUNCTION:
+    /* Arithmetic. */
+    case OFPACT_MULTIPATH:
+    /* Other. */
+    case OFPACT_NOTE:
+    case OFPACT_EXIT:
+    case OFPACT_SAMPLE:
+    case OFPACT_UNROLL_XLATE:
+    case OFPACT_CT:
+    case OFPACT_CT_CLEAR:
+    case OFPACT_NAT:
+    case OFPACT_OUTPUT_TRUNC:
+    case OFPACT_CLONE:
+    /* Instructions. */
+    case OFPACT_METER:
+    case OFPACT_CLEAR_ACTIONS:
+    case OFPACT_WRITE_ACTIONS:
+    case OFPACT_WRITE_METADATA:
+    case OFPACT_GOTO_TABLE:
+        return true;
+    /* Header changes. */
+    case OFPACT_SET_FIELD:
+    case OFPACT_SET_VLAN_VID:
+    case OFPACT_SET_VLAN_PCP:
+    case OFPACT_STRIP_VLAN:
+    case OFPACT_PUSH_VLAN:
+    case OFPACT_SET_ETH_SRC:
+    case OFPACT_SET_ETH_DST:
+    case OFPACT_SET_IPV4_SRC:
+    case OFPACT_SET_IPV4_DST:
+    case OFPACT_SET_IP_DSCP:
+    case OFPACT_SET_IP_ECN:
+    case OFPACT_SET_IP_TTL:
+    case OFPACT_SET_L4_SRC_PORT:
+    case OFPACT_SET_L4_DST_PORT:
+    case OFPACT_REG_MOVE:
+    case OFPACT_STACK_PUSH:
+    case OFPACT_STACK_POP:
+    case OFPACT_DEC_TTL:
+    case OFPACT_SET_MPLS_LABEL:
+    case OFPACT_SET_MPLS_TC:
+    case OFPACT_SET_MPLS_TTL:
+    case OFPACT_DEC_MPLS_TTL:
+    case OFPACT_PUSH_MPLS:
+    case OFPACT_POP_MPLS:
+    /* Generic encap & decap */
+    case OFPACT_ENCAP:
+    case OFPACT_DECAP:
+    /* Debugging actions. */
+    case OFPACT_DEBUG_RECIRC:
+    default:
+        return false;
+    }
+}
+
 static void
 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                  struct xlate_ctx *ctx, bool is_last_action)
@@ -6168,6 +6286,9 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
         return;
     }
 
+    /* Initialize xlate_data. */
+    store_ctx_xlate_data(ctx);
+
     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
         struct ofpact_controller *controller;
         const struct ofpact_metadata *metadata;
@@ -6175,8 +6296,11 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
         const struct mf_field *mf;
         bool last = is_last_action && ofpact_last(a, ofpacts, ofpacts_len)
                     && ctx->action_set.size;
+        size_t old_size;
 
         if (ctx->error) {
+            /* Restore xlate data. */
+            restore_ctx_xlate_data(ctx);
             break;
         }
 
@@ -6199,6 +6323,8 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             ds_destroy(&s);
         }
 
+        old_size = ctx->odp_actions->size;
+
         switch (a->type) {
         case OFPACT_OUTPUT:
             xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
@@ -6558,6 +6684,12 @@  do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
         }
 
+        /* Store xlate data. */
+        if (ofpact_validates_dp_actions(a) && !ctx->error
+            && old_size != ctx->odp_actions->size) {
+            store_ctx_xlate_data(ctx);
+        }
+
         /* Check if need to store this and the remaining actions for later
          * execution. */
         if (!ctx->error && ctx->exit && ctx_first_frozen_action(ctx)) {
@@ -6898,6 +7030,9 @@  xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
         .action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
     };
 
+    /* Allocate memory for xlate backup data. */
+    ctx.xlate_backup_data = xzalloc(sizeof(struct xlate_backup_data));
+
     /* 'base_flow' reflects the packet as it came in, but we need it to reflect
      * the packet as the datapath will treat it for output actions. Our
      * datapath doesn't retain tunneling information without us re-setting
@@ -7221,6 +7356,8 @@  exit:
     ofpbuf_uninit(&scratch_actions);
     ofpbuf_delete(ctx.encap_data);
 
+    free(ctx.xlate_backup_data);
+
     /* Make sure we return a "drop flow" in case of an error. */
     if (ctx.error) {
         xout->slow = 0;
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 9a51a1364..e7df1504a 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -9954,3 +9954,182 @@  AT_CHECK([grep flow_del ovs-vswitchd.log], [1])
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - xlate error - rollback])
+
+#   ->-+
+#      | p1                                                         LOCAL
+#    +-o-------+       +---------+       +---------+       +-------o-+
+#    |   br0   |       |   br1   |       |   br2   |       |   br3   |
+#    +-------o-+       +-o-----o-+       +-o-----o-+       +-o-------+
+#        p01 |       p10 |     | p12   p21 |     | p23   p32 |
+#            +-----------+     +-----------+     +-----------+
+
+OVS_VSWITCHD_START([dnl
+    -- add-br br1 -- set bridge br1 datapath_type=dummy \
+    -- add-br br2 -- set bridge br2 datapath_type=dummy \
+    -- add-br br3 -- set bridge br3 datapath_type=dummy \
+    -- add-port br0 p01 \
+    -- set interface p01 type=patch options:peer=p10 ofport_request=10 \
+    -- add-port br1 p10 \
+    -- set interface p10 type=patch options:peer=p01 ofport_request=20 \
+    -- add-port br1 p12 \
+    -- set interface p12 type=patch options:peer=p21 ofport_request=30 \
+    -- add-port br2 p21 \
+    -- set interface p21 type=patch options:peer=p12 ofport_request=40 \
+    -- add-port br2 p23 \
+    -- set interface p23 type=patch options:peer=p32 ofport_request=50 \
+    -- add-port br3 p32 \
+    -- set interface p32 type=patch options:peer=p23 ofport_request=60
+])
+
+AT_CHECK([
+    ovs-vsctl add-port br0 p1 -- set interface p1 type=dummy ofport_request=1
+])
+
+AT_CHECK([
+    ovs-appctl dpif/show | grep -v hit | sed "s/$(printf \\t)/    /g" | sed 's./[[0-9]]\{1,\}..'
+], [0], [dnl
+    br0:
+        br0 65534: (dummy-internal)
+        p01 10/none: (patch: peer=p10)
+        p1 1: (dummy)
+    br1:
+        br1 65534: (dummy-internal)
+        p10 20/none: (patch: peer=p01)
+        p12 30/none: (patch: peer=p21)
+    br2:
+        br2 65534: (dummy-internal)
+        p21 40/none: (patch: peer=p12)
+        p23 50/none: (patch: peer=p32)
+    br3:
+        br3 65534: (dummy-internal)
+        p32 60/none: (patch: peer=p23)
+])
+
+AT_CHECK([
+    ovs-ofctl del-flows br0
+    ovs-ofctl del-flows br1
+    ovs-ofctl del-flows br2
+    ovs-ofctl del-flows br3
+], [0])
+
+# Error due to pushing too many MPLS labels.
+AT_CHECK([
+    ovs-ofctl add-flow -OOpenFlow13 br0 "in_port=1,ip,actions=dec_ttl,push_mpls:0x8847,output:p01"
+    ovs-ofctl add-flow -OOpenFlow13 br1 "in_port=p10,mpls,actions=mod_dl_src:aa:aa:aa:bb:bb:f0,push_mpls:0x8847,output:p12"
+    ovs-ofctl add-flow -OOpenFlow13 br2 "in_port=p21,mpls,actions=mod_dl_dst:aa:aa:aa:00:00:01,push_mpls:0x8847,output:p23"
+    ovs-ofctl add-flow -OOpenFlow13 br3 "in_port=p32,mpls,actions=push_mpls:0x8847,output:LOCAL"
+], [0])
+
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,ip,nw_ttl=255'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: drop
+])
+
+AT_CHECK([
+    ovs-ofctl del-flows br0
+    ovs-ofctl del-flows br1
+    ovs-ofctl del-flows br2
+    ovs-ofctl del-flows br3
+], [0])
+
+# Error due to applying encap(Ethernet) to an Ethernet packet.
+AT_CHECK([
+ovs-ofctl add-flow br0 "in_port=1,actions=dec_ttl,output:p01"
+ovs-ofctl add-flow br1 in_port=p10,actions=p12
+ovs-ofctl add-flow br2 -OOpenFlow13 "in_port=p21,ip,actions=dec_ttl,dec_ttl,dec_ttl,dec_ttl,output:LOCAL,dec_ttl,mod_dl_src:aa:aa:aa:bb:bb:ff,encap(ethernet),dec_ttl,output:p23"
+ovs-ofctl add-flow br3 "in_port=p32,actions=LOCAL"
+], [0])
+
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,ip,nw_ttl=255'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+  [Datapath actions: 102
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - xlate error - rollback2])
+
+#   ->-+
+#      | p1                     LOCAL                               LOCAL
+#    +-o-------+       +-------o-+       +---------+       +-------o-+
+#    |   br0   |       |   br1   |       |   br2   |       |   br3   |
+#    +-------o-+       +-o----o-o+       +-o-----o-+       +-o-o-----+
+#        p01 |       p10 | p13| |p12   p21 |     | p23   p32 | | p31
+#            +-----------+    | +----------+     +-----------+ |
+#                             +--------------------------------+
+
+OVS_VSWITCHD_START([dnl
+    -- add-br br1 -- set bridge br1 datapath_type=dummy \
+    -- add-br br2 -- set bridge br2 datapath_type=dummy \
+    -- add-br br3 -- set bridge br3 datapath_type=dummy \
+    -- add-port br0 p01 \
+    -- set interface p01 type=patch options:peer=p10 ofport_request=10 \
+    -- add-port br1 p10 \
+    -- set interface p10 type=patch options:peer=p01 ofport_request=20 \
+    -- add-port br1 p12 \
+    -- set interface p12 type=patch options:peer=p21 ofport_request=30 \
+    -- add-port br2 p21 \
+    -- set interface p21 type=patch options:peer=p12 ofport_request=40 \
+    -- add-port br2 p23 \
+    -- set interface p23 type=patch options:peer=p32 ofport_request=50 \
+    -- add-port br3 p32 \
+    -- set interface p32 type=patch options:peer=p23 ofport_request=60 \
+    -- add-port br1 p13 \
+    -- set interface p13 type=patch options:peer=p31 ofport_request=70 \
+    -- add-port br3 p31 \
+    -- set interface p31 type=patch options:peer=p13 ofport_request=80
+])
+
+AT_CHECK([
+    ovs-vsctl add-port br0 p1 -- set interface p1 type=dummy ofport_request=1
+])
+
+AT_CHECK([
+    ovs-appctl dpif/show | grep -v hit | sed "s/$(printf \\t)/    /g" | sed 's./[[0-9]]\{1,\}..'
+], [0], [dnl
+    br0:
+        br0 65534: (dummy-internal)
+        p01 10/none: (patch: peer=p10)
+        p1 1: (dummy)
+    br1:
+        br1 65534: (dummy-internal)
+        p10 20/none: (patch: peer=p01)
+        p12 30/none: (patch: peer=p21)
+        p13 70/none: (patch: peer=p31)
+    br2:
+        br2 65534: (dummy-internal)
+        p21 40/none: (patch: peer=p12)
+        p23 50/none: (patch: peer=p32)
+    br3:
+        br3 65534: (dummy-internal)
+        p31 80/none: (patch: peer=p13)
+        p32 60/none: (patch: peer=p23)
+])
+
+
+AT_CHECK([
+    ovs-ofctl del-flows br0
+    ovs-ofctl del-flows br1
+    ovs-ofctl del-flows br2
+    ovs-ofctl del-flows br3
+], [0])
+
+# Drop packet due to xlate error on one path (br0-br1-br2-b3) but forward it via another one (br0-br1-br3).
+AT_CHECK([
+    ovs-ofctl add-flow br0 "in_port=1,actions=dec_ttl,output:p01"
+    ovs-ofctl add-flow br1 in_port=p10,ip,actions=dec_ttl,dec_ttl,dec_ttl,dec_ttl,output:LOCAL,mod_dl_dst:ee:ee:ee:ff:ff:01,output:p12,p13
+    ovs-ofctl add-flow br2 -OOpenFlow13 "in_port=p21,ip,actions=dec_ttl,dec_ttl,dec_ttl,dec_ttl,dec_ttl,mod_dl_src:aa:aa:aa:bb:bb:ff,encap(ethernet),dec_ttl,output:p23"
+    ovs-ofctl add-flow br3 "in_port=p32,actions=LOCAL"
+    ovs-ofctl add-flow br3 "in_port=p31,actions=LOCAL"
+], [0])
+
+AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,ip,nw_ttl=255'], [0], [stdout])
+AT_CHECK([grep "Datapath actions" stdout], [0],
+  [Datapath actions: 101,set(eth(dst=ee:ee:ee:ff:ff:01)),103
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP