[ovs-dev,v3,1/2] ct-dpif, dpif-netlink: Support conntrack flush by ct 5-tuple

Message ID 1512672004-58666-2-git-send-email-yihung.wei@gmail.com
State Accepted
Headers show
Series
  • Support conntrack flush by ct 5-tuple
Related show

Commit Message

Yi-Hung Wei Dec. 7, 2017, 6:40 p.m.
This patch adds support of flushing a conntrack entry specified by the
conntrack 5-tuple, and provides the implementation in dpif-netlink.
The implementation of dpif-netlink in the linux datapath utilizes the
NFNL_SUBSYS_CTNETLINK netlink subsystem to delete a conntrack entry in
nf_conntrack.  Future patches will add support for the userspace and
Windows datapaths.

VMWare-BZ: #1983178
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
---
 lib/ct-dpif.c           | 24 +++++++++---
 lib/ct-dpif.h           |  3 +-
 lib/dpctl.c             |  2 +-
 lib/dpif-netdev.c       |  6 ++-
 lib/dpif-netlink.c      |  7 +++-
 lib/dpif-provider.h     | 16 ++++++--
 lib/netlink-conntrack.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/netlink-conntrack.h |  1 +
 ofproto/ofproto-dpif.c  |  2 +-
 tests/ovs-ofctl.at      |  2 +-
 10 files changed, 144 insertions(+), 16 deletions(-)

Patch

diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index c79e69e23970..cee4791565fb 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -110,20 +110,32 @@  ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
             : EOPNOTSUPP);
 }
 
-/* Flush the entries in the connection tracker used by 'dpif'.
+/* Flush the entries in the connection tracker used by 'dpif'.  The
+ * arguments have the following behavior:
  *
- * If 'zone' is not NULL, flush only the entries in '*zone'. */
+ *   - If both 'zone' and 'tuple' are NULL, flush all the conntrack entries.
+ *   - If 'zone' is not NULL, and 'tuple' is NULL, flush all the conntrack
+ *     entries in '*zone'.
+ *   - If 'tuple' is not NULL, flush the conntrack entry specified by 'tuple'
+ *     in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */
 int
-ct_dpif_flush(struct dpif *dpif, const uint16_t *zone)
+ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
+              const struct ct_dpif_tuple *tuple)
 {
-    if (zone) {
-        VLOG_DBG("%s: ct_flush: %"PRIu16, dpif_name(dpif), *zone);
+    if (tuple) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+        ct_dpif_format_tuple(&ds, tuple);
+        VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
+                                                zone ? *zone : 0);
+        ds_destroy(&ds);
+    } else if (zone) {
+        VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
     } else {
         VLOG_DBG("%s: ct_flush: <all>", dpif_name(dpif));
     }
 
     return (dpif->dpif_class->ct_flush
-            ? dpif->dpif_class->ct_flush(dpif, zone)
+            ? dpif->dpif_class->ct_flush(dpif, zone, tuple)
             : EOPNOTSUPP);
 }
 
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index d5f966175bc0..ef019050c78e 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -195,7 +195,8 @@  int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **,
                        const uint16_t *zone, int *);
 int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
 int ct_dpif_dump_done(struct ct_dpif_dump_state *);
-int ct_dpif_flush(struct dpif *, const uint16_t *zone);
+int ct_dpif_flush(struct dpif *, const uint16_t *zone,
+                  const struct ct_dpif_tuple *);
 void ct_dpif_entry_uninit(struct ct_dpif_entry *);
 void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
                           bool verbose, bool print_stats);
diff --git a/lib/dpctl.c b/lib/dpctl.c
index 6520788aa428..7fc0e3afab37 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -1353,7 +1353,7 @@  dpctl_flush_conntrack(int argc, const char *argv[],
         return error;
     }
 
-    error = ct_dpif_flush(dpif, pzone);
+    error = ct_dpif_flush(dpif, pzone, NULL);
 
     dpif_close(dpif);
     return error;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0a62630c2712..b1ef9a6a5992 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5734,10 +5734,14 @@  dpif_netdev_ct_dump_done(struct dpif *dpif OVS_UNUSED,
 }
 
 static int
-dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone)
+dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone,
+                     const struct ct_dpif_tuple *tuple)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
 
+    if (tuple) {
+        return EOPNOTSUPP;
+    }
     return conntrack_flush(&dp->conntrack, zone);
 }
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index c265909f8587..3f76128999d1 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2892,9 +2892,12 @@  dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
 }
 
 static int
-dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone)
+dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
+                      const struct ct_dpif_tuple *tuple)
 {
-    if (zone) {
+    if (tuple) {
+        return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
+    } else if (zone) {
         return nl_ct_flush_zone(*zone);
     } else {
         return nl_ct_flush();
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 1d82a0939aa1..947bf5e31362 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -75,6 +75,7 @@  dpif_flow_dump_thread_init(struct dpif_flow_dump_thread *thread,
 
 struct ct_dpif_dump_state;
 struct ct_dpif_entry;
+struct ct_dpif_tuple;
 
 /* Datapath interface class structure, to be defined by each implementation of
  * a datapath interface.
@@ -424,9 +425,18 @@  struct dpif_class {
                         struct ct_dpif_entry *entry);
     int (*ct_dump_done)(struct dpif *, struct ct_dpif_dump_state *state);
 
-    /* Flushes the connection tracking tables. If 'zone' is not NULL,
-     * only deletes connections in '*zone'. */
-    int (*ct_flush)(struct dpif *, const uint16_t *zone);
+    /* Flushes the connection tracking tables.  The arguments have the
+     * following behavior:
+     *
+     *   - If both 'zone' and 'tuple' are NULL, flush all the conntrack
+     *     entries.
+     *   - If 'zone' is not NULL, and 'tuple' is NULL, flush all the
+     *     conntrack entries in '*zone'.
+     *   - If 'tuple' is not NULL, flush the conntrack entry specified by
+     *     'tuple' in '*zone'. If 'zone' is NULL, use the default zone
+     *     (zone 0). */
+    int (*ct_flush)(struct dpif *, const uint16_t *zone,
+                    const struct ct_dpif_tuple *tuple);
 
     /* Meters */
 
diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c
index 1e1bb2f79d1d..1f0b9121036d 100644
--- a/lib/netlink-conntrack.c
+++ b/lib/netlink-conntrack.c
@@ -18,6 +18,7 @@ 
 
 #include "netlink-conntrack.h"
 
+#include <errno.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 #include <linux/netfilter/nf_conntrack_common.h>
@@ -111,6 +112,8 @@  static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
 static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
         struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
         uint8_t nfgen_family);
+static bool nl_ct_put_ct_tuple(struct ofpbuf *buf,
+        const struct ct_dpif_tuple *tuple, enum ctattr_type type);
 
 struct nl_ct_dump_state {
     struct nl_dump dump;
@@ -239,6 +242,27 @@  nl_ct_flush(void)
     return err;
 }
 
+int
+nl_ct_flush_tuple(const struct ct_dpif_tuple *tuple, uint16_t zone)
+{
+    int err;
+    struct ofpbuf buf;
+
+    ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
+    nl_msg_put_nfgenmsg(&buf, 0, tuple->l3_type, NFNL_SUBSYS_CTNETLINK,
+                        IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
+
+    nl_msg_put_be16(&buf, CTA_ZONE, htons(zone));
+    if (!nl_ct_put_ct_tuple(&buf, tuple, CTA_TUPLE_ORIG)) {
+        err = EOPNOTSUPP;
+        goto out;
+    }
+    err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
+out:
+    ofpbuf_uninit(&buf);
+    return err;
+}
+
 #ifdef _WIN32
 int
 nl_ct_flush_zone(uint16_t flush_zone)
@@ -517,6 +541,79 @@  nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
     return parsed;
 }
 
+static bool
+nl_ct_put_tuple_ip(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
+{
+    size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_IP);
+
+    if (tuple->l3_type == AF_INET) {
+        nl_msg_put_be32(buf, CTA_IP_V4_SRC, tuple->src.ip);
+        nl_msg_put_be32(buf, CTA_IP_V4_DST, tuple->dst.ip);
+    } else if (tuple->l3_type == AF_INET6) {
+        nl_msg_put_in6_addr(buf, CTA_IP_V6_SRC, &tuple->src.in6);
+        nl_msg_put_in6_addr(buf, CTA_IP_V6_DST, &tuple->dst.in6);
+    } else {
+        VLOG_WARN_RL(&rl, "Unsupported IP protocol: %"PRIu16".",
+                     tuple->l3_type);
+        return false;
+    }
+
+    nl_msg_end_nested(buf, offset);
+    return true;
+}
+
+static bool
+nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
+{
+    size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_PROTO);
+
+    nl_msg_put_u8(buf, CTA_PROTO_NUM, tuple->ip_proto);
+
+    if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
+        nl_msg_put_be16(buf, CTA_PROTO_ICMP_ID, tuple->icmp_id);
+        nl_msg_put_u8(buf, CTA_PROTO_ICMP_TYPE, tuple->icmp_type);
+        nl_msg_put_u8(buf, CTA_PROTO_ICMP_CODE, tuple->icmp_code);
+    } else if (tuple->l3_type == AF_INET6 &&
+               tuple->ip_proto == IPPROTO_ICMPV6) {
+        nl_msg_put_be16(buf, CTA_PROTO_ICMPV6_ID, tuple->icmp_id);
+        nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type);
+        nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code);
+    } else if (tuple->ip_proto == IPPROTO_TCP ||
+               tuple->ip_proto == IPPROTO_UDP) {
+        nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port);
+        nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port);
+    } else {
+        VLOG_WARN_RL(&rl, "Unsupported L4 protocol: %"PRIu8".",
+                     tuple->ip_proto);
+        return false;
+    }
+
+    nl_msg_end_nested(buf, offset);
+    return true;
+}
+
+static bool
+nl_ct_put_ct_tuple(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple,
+                   enum ctattr_type type)
+{
+    if (type != CTA_TUPLE_ORIG && type != CTA_TUPLE_REPLY &&
+        type != CTA_TUPLE_MASTER) {
+        return false;
+    }
+
+    size_t offset = nl_msg_start_nested(buf, type);
+
+    if (!nl_ct_put_tuple_ip(buf, tuple)) {
+        return false;
+    }
+    if (!nl_ct_put_tuple_proto(buf, tuple)) {
+        return false;
+    }
+
+    nl_msg_end_nested(buf, offset);
+    return true;
+}
+
 /* Translate netlink TCP state to CT_DPIF_TCP state. */
 static uint8_t
 nl_ct_tcp_state_to_dpif(uint8_t state)
diff --git a/lib/netlink-conntrack.h b/lib/netlink-conntrack.h
index a95aa69a4cde..8b536fd65ba8 100644
--- a/lib/netlink-conntrack.h
+++ b/lib/netlink-conntrack.h
@@ -42,6 +42,7 @@  int nl_ct_dump_done(struct nl_ct_dump_state *);
 
 int nl_ct_flush(void);
 int nl_ct_flush_zone(uint16_t zone);
+int nl_ct_flush_tuple(const struct ct_dpif_tuple *, uint16_t zone);
 
 bool nl_ct_parse_entry(struct ofpbuf *, struct ct_dpif_entry *,
                        enum nl_ct_event_type *);
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index b99f04fad300..43b7b89f26e4 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -4850,7 +4850,7 @@  ct_flush(const struct ofproto *ofproto_, const uint16_t *zone)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
 
-    ct_dpif_flush(ofproto->backer->dpif, zone);
+    ct_dpif_flush(ofproto->backer->dpif, zone, NULL);
 }
 
 static bool
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 28bca8b9b781..f8d43adccd33 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -3167,7 +3167,7 @@  AT_CHECK([ovs-appctl vlog/set ct_dpif:dbg])
 AT_CHECK([ovs-ofctl ct-flush-zone br0 123])
 
 OVS_WAIT_UNTIL([grep -q "|ct_dpif|DBG|.*ct_flush:" ovs-vswitchd.log])
-AT_CHECK([grep -q "ct_dpif|DBG|.*ct_flush: 123" ovs-vswitchd.log])
+AT_CHECK([grep -q "ct_dpif|DBG|.*ct_flush: zone 123" ovs-vswitchd.log])
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP