[ovs-dev,v3,2/2] dpctl: Support flush conntrack by conntrack 5-tuple

Message ID 1512672004-58666-3-git-send-email-yihung.wei@gmail.com
State Accepted
Headers show
Series
  • Support conntrack flush by ct 5-tuple
Related show

Commit Message

Yi-Hung Wei Dec. 7, 2017, 6:40 p.m.
With this patch, "flush-conntrack" in ovs-dpctl and ovs-appctl accept
a conntrack 5-tuple to delete the conntrack entry specified by the 5-tuple.
For example, user can use the following command to flush a conntrack entry
in zone 5.

$ ovs-dpctl flush-conntrack zone=5 \
    'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'
$ ovs-appctl dpctl/flush-conntrack zone=5 \
    'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'

VMWare-BZ: #1983178
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
 NEWS                             |   2 +
 lib/ct-dpif.c                    | 108 +++++++++++++++++++++++++++++++++++++++
 lib/ct-dpif.h                    |   1 +
 lib/dpctl.c                      |  76 +++++++++++++++++++++------
 lib/dpctl.man                    |  20 ++++++--
 tests/system-kmod-macros.at      |   8 +++
 tests/system-traffic.at          |  65 +++++++++++++++++++++++
 tests/system-userspace-macros.at |  10 ++++
 utilities/ovs-dpctl.c            |   4 +-
 9 files changed, 272 insertions(+), 22 deletions(-)

Patch

diff --git a/NEWS b/NEWS
index 427c8f83d8b2..188a0757a797 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,8 @@  Post-v2.8.0
      * ovn-ctl: New commands run_nb_ovsdb and run_sb_ovsdb.
    - Linux kernel 4.13
      * Add support for compiling OVS with the latest Linux 4.13 kernel
+   - "flush-conntrack" in ovs-dpctl and ovs-appctl now accept a 5-tuple to
+     delete a specific connection tracking entry.
 
 v2.8.0 - 31 Aug 2017
 --------------------
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index cee4791565fb..239c848d3735 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -20,6 +20,7 @@ 
 #include <errno.h>
 
 #include "ct-dpif.h"
+#include "openvswitch/ofp-parse.h"
 #include "openvswitch/vlog.h"
 
 VLOG_DEFINE_THIS_MODULE(ct_dpif);
@@ -435,3 +436,110 @@  ct_dpif_format_tcp_stat(struct ds * ds, int tcp_state, int conn_per_state)
     ds_put_cstr(ds, "]");
     ds_put_format(ds, "=%u", conn_per_state);
 }
+
+/* Parses a specification of a conntrack 5-tuple from 's' into 'tuple'.
+ * Returns true on success.  Otherwise, returns false and puts the error
+ * message in 'ds'. */
+bool
+ct_dpif_parse_tuple(struct ct_dpif_tuple *tuple, const char *s, struct ds *ds)
+{
+    char *pos, *key, *value, *copy;
+    memset(tuple, 0, sizeof *tuple);
+
+    pos = copy = xstrdup(s);
+    while (ofputil_parse_key_value(&pos, &key, &value)) {
+        if (!*value) {
+            ds_put_format(ds, "field %s missing value", key);
+            goto error;
+        }
+
+        if (!strcmp(key, "ct_nw_src") || !strcmp(key, "ct_nw_dst")) {
+            if (tuple->l3_type && tuple->l3_type != AF_INET) {
+                ds_put_cstr(ds, "L3 type set multiple times");
+                goto error;
+            } else {
+                tuple->l3_type = AF_INET;
+            }
+            if (!ip_parse(value, key[6] == 's' ? &tuple->src.ip :
+                                                 &tuple->dst.ip)) {
+                goto error_with_msg;
+            }
+        } else if (!strcmp(key, "ct_ipv6_src") ||
+                   !strcmp(key, "ct_ipv6_dst")) {
+            if (tuple->l3_type && tuple->l3_type != AF_INET6) {
+                ds_put_cstr(ds, "L3 type set multiple times");
+                goto error;
+            } else {
+                tuple->l3_type = AF_INET6;
+            }
+            if (!ipv6_parse(value, key[8] == 's' ? &tuple->src.in6 :
+                                                   &tuple->dst.in6)) {
+                goto error_with_msg;
+            }
+        } else if (!strcmp(key, "ct_nw_proto")) {
+            char *err = str_to_u8(value, key, &tuple->ip_proto);
+            if (err) {
+                free(err);
+                goto error_with_msg;
+            }
+        } else if (!strcmp(key, "ct_tp_src") || !strcmp(key,"ct_tp_dst")) {
+            uint16_t port;
+            char *err = str_to_u16(value, key, &port);
+            if (err) {
+                free(err);
+                goto error_with_msg;
+            }
+            if (key[6] == 's') {
+                tuple->src_port = htons(port);
+            } else {
+                tuple->dst_port = htons(port);
+            }
+        } else if (!strcmp(key, "icmp_type") || !strcmp(key, "icmp_code") ||
+                   !strcmp(key, "icmp_id") ) {
+            if (tuple->ip_proto != IPPROTO_ICMP &&
+                tuple->ip_proto != IPPROTO_ICMPV6) {
+                ds_put_cstr(ds, "invalid L4 fields");
+                goto error;
+            }
+            uint16_t icmp_id;
+            char *err;
+            if (key[5] == 't') {
+                err = str_to_u8(value, key, &tuple->icmp_type);
+            } else if (key[5] == 'c') {
+                err = str_to_u8(value, key, &tuple->icmp_code);
+            } else {
+                err = str_to_u16(value, key, &icmp_id);
+                tuple->icmp_id = htons(icmp_id);
+            }
+            if (err) {
+                free(err);
+                goto error_with_msg;
+            }
+        } else {
+            ds_put_format(ds, "invalid conntrack tuple field: %s", key);
+            goto error;
+        }
+    }
+
+    if (ipv6_is_zero(&tuple->src.in6) || ipv6_is_zero(&tuple->dst.in6) ||
+        !tuple->ip_proto) {
+        /* icmp_type, icmp_code, and icmp_id can be 0. */
+        if (tuple->ip_proto != IPPROTO_ICMP &&
+            tuple->ip_proto != IPPROTO_ICMPV6) {
+            if (!tuple->src_port || !tuple->dst_port) {
+                ds_put_cstr(ds, "at least one of the conntrack 5-tuple fields "
+                                "is missing.");
+                goto error;
+            }
+        }
+    }
+
+    free(copy);
+    return true;
+
+error_with_msg:
+    ds_put_format(ds, "failed to parse field %s", key);
+error:
+    free(copy);
+    return false;
+}
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index ef019050c78e..5e2de53834e8 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -203,5 +203,6 @@  void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
 void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *);
 uint8_t ct_dpif_coalesce_tcp_state(uint8_t state);
 void ct_dpif_format_tcp_stat(struct ds *, int, int);
+bool ct_dpif_parse_tuple(struct ct_dpif_tuple *, const char *s, struct ds *);
 
 #endif /* CT_DPIF_H */
diff --git a/lib/dpctl.c b/lib/dpctl.c
index 7fc0e3afab37..867b42105130 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -1331,30 +1331,73 @@  dpctl_flush_conntrack(int argc, const char *argv[],
                       struct dpctl_params *dpctl_p)
 {
     struct dpif *dpif;
+    struct ct_dpif_tuple tuple, *ptuple = NULL;
+    struct ds ds = DS_EMPTY_INITIALIZER;
     uint16_t zone, *pzone = NULL;
     char *name;
-    int error;
+    int error, i = 1;
+    bool got_dpif = false;
+
+    /* Parse datapath name. It is not a mandatory parameter for this command.
+     * If it is not specified, we retrieve it from the current setup,
+     * assuming only one exists. */
+    if (argc >= 2) {
+        error = parsed_dpif_open(argv[i], false, &dpif);
+        if (!error) {
+            got_dpif = true;
+            i++;
+        } else if (argc == 4) {
+            dpctl_error(dpctl_p, error, "invalid datapath");
+            return error;
+        }
+    }
+    if (!got_dpif) {
+        name = get_one_dp(dpctl_p);
+        if (!name) {
+            return EINVAL;
+        }
+        error = parsed_dpif_open(name, false, &dpif);
+        free(name);
+        if (error) {
+            dpctl_error(dpctl_p, error, "opening datapath");
+            return error;
+        }
+    }
 
-    if (argc > 1 && ovs_scan(argv[argc - 1], "zone=%"SCNu16, &zone)) {
+    /* Parse zone */
+    if (argc > i && ovs_scan(argv[i], "zone=%"SCNu16, &zone)) {
         pzone = &zone;
-        argc--;
+        i++;
     }
-    /* The datapath name is not a mandatory parameter for this command.
-     * If it is not specified - so argc < 2 - we retrieve it from the
-     * current setup, assuming only one exists. */
-    name = (argc == 2) ? xstrdup(argv[1]) : get_one_dp(dpctl_p);
-    if (!name) {
-        return EINVAL;
+    /* Report error if there are more than one unparsed argument. */
+    if (argc - i > 1) {
+        ds_put_cstr(&ds, "invalid zone");
+        error = EINVAL;
+        goto error;
     }
-    error = parsed_dpif_open(name, false, &dpif);
-    free(name);
-    if (error) {
-        dpctl_error(dpctl_p, error, "opening datapath");
-        return error;
+
+    /* Parse ct tuple */
+    if (argc > i && ct_dpif_parse_tuple(&tuple, argv[i], &ds)) {
+        ptuple = &tuple;
+        i++;
+    }
+    /* Report error if there is an unparsed argument. */
+    if (argc - i) {
+        error = EINVAL;
+        goto error;
     }
 
-    error = ct_dpif_flush(dpif, pzone, NULL);
+    error = ct_dpif_flush(dpif, pzone, ptuple);
+    if (!error) {
+        dpif_close(dpif);
+        return 0;
+    } else {
+        ds_put_cstr(&ds, "failed to flush conntrack");
+    }
 
+error:
+    dpctl_error(dpctl_p, error, "%s", ds_cstr(&ds));
+    ds_destroy(&ds);
     dpif_close(dpif);
     return error;
 }
@@ -1902,7 +1945,8 @@  static const struct dpctl_command all_commands[] = {
     { "del-flow", "[dp] flow", 1, 2, dpctl_del_flow, DP_RW },
     { "del-flows", "[dp]", 0, 1, dpctl_del_flows, DP_RW },
     { "dump-conntrack", "[dp] [zone=N]", 0, 2, dpctl_dump_conntrack, DP_RO },
-    { "flush-conntrack", "[dp] [zone=N]", 0, 2, dpctl_flush_conntrack, DP_RW },
+    { "flush-conntrack", "[dp] [zone=N] [ct-tuple]", 0, 3,
+      dpctl_flush_conntrack, DP_RW },
     { "ct-stats-show", "[dp] [zone=N] [verbose]",
       0, 3, dpctl_ct_stats_show, DP_RO },
     { "ct-bkts", "[dp] [gt=N]", 0, 2, dpctl_ct_bkts, DP_RO },
diff --git a/lib/dpctl.man b/lib/dpctl.man
index 675fe5af4914..d7c95ff18317 100644
--- a/lib/dpctl.man
+++ b/lib/dpctl.man
@@ -217,10 +217,22 @@  are included. With \fB\-\-statistics\fR timeouts and timestamps are
 added to the output.
 .
 .TP
-\*(DX\fBflush\-conntrack\fR [\fIdp\fR] [\fBzone=\fIzone\fR]
-Flushes all the connection entries in the tracker used by \fIdp\fR.
-If \fBzone=\fIzone\fR is specified, only flushes the connections in
-\fBzone\fR.
+\*(DX\fBflush\-conntrack\fR [\fIdp\fR] [\fBzone=\fIzone\fR] [\fIct-tuple\fR]
+Flushes the connection entries in the tracker used by \fIdp\fR based on
+\fIzone\fR and connection tracking tuple \fIct-tuple\fR.
+If \fIct-tuple\fR is not provided, flushes all the connection entries.
+If \fBzone\fR=\fIzone\fR is specified, only flushes the connections in
+\fIzone\fR.
+.IP
+If \fIct-tuple\fR is provided, flushes the connection entry specified by
+\fIct-tuple\fR in \fIzone\fR. The zone defaults to 0 if it is not provided.
+An example of an IPv4 ICMP \fIct-tuple\fR:
+.IP
+"ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=1,icmp_type=8,icmp_code=0,icmp_id=10"
+.IP
+An example of an IPv6 TCP \fIct-tuple\fR:
+.IP
+"ct_ipv6_src=fc00::1,ct_ipv6_dst=fc00::2,ct_nw_proto=6,ct_tp_src=1,ct_tp_dst=2"
 .
 .TP
 \*(DX\fBct\-stats\-show\fR [\fIdp\fR] [\fBzone=\fIzone\fR] [\fBverbose\fR]
diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
index a7c6808ad1b4..34db21a564ae 100644
--- a/tests/system-kmod-macros.at
+++ b/tests/system-kmod-macros.at
@@ -96,3 +96,11 @@  m4_define([CHECK_CONNTRACK_LOCAL_STACK])
 # always supports NAT, so no check is needed.
 #
 m4_define([CHECK_CONNTRACK_NAT])
+
+# CHECK_CT_DPIF_FLUSH_BY_CT_TUPLE()
+#
+# Perform requirements checks for running ovs-dpctl flush-conntrack by
+# conntrack 5-tuple test. The kernel datapath does support this
+# feature. Will remove this check after both kernel and userspace datapath
+# support it.
+m4_define([CHECK_CT_DPIF_FLUSH_BY_CT_TUPLE])
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index fd7b6121b04f..56aae69538cf 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -832,6 +832,71 @@  udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([conntrack - ct flush by 5-tuple])
+CHECK_CONNTRACK()
+CHECK_CT_DPIF_FLUSH_BY_CT_TUPLE()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=100,in_port=1,udp,action=ct(commit),2
+priority=100,in_port=2,udp,action=ct(zone=5,commit),1
+priority=100,in_port=1,icmp,action=ct(commit),2
+priority=100,in_port=2,icmp,action=ct(zone=5,commit),1
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+dnl Test UDP from port 1
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [], [dnl
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1)
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack 'ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=17,ct_tp_src=2,ct_tp_dst=1'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [1], [dnl
+])
+
+dnl Test UDP from port 2
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [0], [dnl
+udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5
+])
+
+AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 'ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2'])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+])
+
+dnl Test ICMP traffic
+NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [0], [stdout])
+AT_CHECK([cat stdout | FORMAT_CT(10.1.1.1)], [0],[dnl
+icmp,orig=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=8,code=0),reply=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=0,code=0),zone=5
+])
+
+ICMP_ID=`cat stdout | cut -d ',' -f4 | cut -d '=' -f2`
+ICMP_TUPLE=ct_nw_src=10.1.1.2,ct_nw_dst=10.1.1.1,ct_nw_proto=1,icmp_id=$ICMP_ID,icmp_type=8,icmp_code=0
+AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=5 $ICMP_TUPLE])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.2,"], [1], [dnl
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP(["/could not create datapath/d"])
+AT_CLEANUP
+
 AT_SETUP([conntrack - IPv4 ping])
 CHECK_CONNTRACK()
 OVS_TRAFFIC_VSWITCHD_START()
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index d3d27bb2b8f2..f22061298985 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -99,3 +99,13 @@  m4_define([CHECK_CONNTRACK_LOCAL_STACK],
 # datapath supports NAT.
 #
 m4_define([CHECK_CONNTRACK_NAT])
+
+# CHECK_CT_DPIF_FLUSH_BY_CT_TUPLE()
+#
+# Perform requirements checks for running ovs-dpctl flush-conntrack by
+# conntrack 5-tuple test. The userspace datapath does not support
+# this feature yet.
+m4_define([CHECK_CT_DPIF_FLUSH_BY_CT_TUPLE],
+[
+    AT_SKIP_IF([:])
+])
diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c
index 7b005ace3f4e..ef2daf6fb192 100644
--- a/utilities/ovs-dpctl.c
+++ b/utilities/ovs-dpctl.c
@@ -198,8 +198,8 @@  usage(void *userdata OVS_UNUSED)
            "  del-flows [DP]             delete all flows from DP\n"
            "  dump-conntrack [DP] [zone=ZONE]  " \
                "display conntrack entries for ZONE\n"
-           "  flush-conntrack [DP] [zone=ZONE] " \
-               "delete all conntrack entries in ZONE\n"
+           "  flush-conntrack [DP] [zone=ZONE] [ct-tuple]" \
+               "delete matched conntrack entries in ZONE\n"
            "  ct-stats-show [DP] [zone=ZONE] [verbose] " \
                "CT connections grouped by protocol\n"
            "  ct-bkts [DP] [gt=N] display connections per CT bucket\n"