[ovs-dev,v2,01/11] ct-dpif: New module.
diff mbox

Message ID 1446779562-3837-2-git-send-email-diproiettod@vmware.com
State Deferred
Headers show

Commit Message

Daniele Di Proietto Nov. 6, 2015, 3:12 a.m. UTC
This defines some structures (and their related formatting functions) to
manipulate entries in connection tracking tables.

It will be used by next commits.

Based on original work by Jarno Rajahalme

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
---
 lib/automake.mk |   2 +
 lib/ct-dpif.c   | 343 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/ct-dpif.h   | 174 ++++++++++++++++++++++++++++
 3 files changed, 519 insertions(+)
 create mode 100644 lib/ct-dpif.c
 create mode 100644 lib/ct-dpif.h

Patch
diff mbox

diff --git a/lib/automake.mk b/lib/automake.mk
index d8c00da..1986a31 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -51,6 +51,8 @@  lib_libopenvswitch_la_SOURCES = \
 	lib/crc32c.h \
 	lib/csum.c \
 	lib/csum.h \
+	lib/ct-dpif.c \
+	lib/ct-dpif.h \
 	lib/daemon.c \
 	lib/daemon.h \
 	lib/daemon-private.h \
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
new file mode 100644
index 0000000..db1f831
--- /dev/null
+++ b/lib/ct-dpif.c
@@ -0,0 +1,343 @@ 
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+
+#include "ct-dpif.h"
+
+/* Declarations for conntrack entry formatting. */
+struct flags {
+    uint32_t flag;
+    const char *name;
+};
+
+static void ct_dpif_format_ipproto(struct ds *, uint16_t ipproto);
+static void ct_dpif_format_counters(struct ds *,
+                                    const struct ct_dpif_counters *);
+static void ct_dpif_format_timestamp(struct ds *,
+                                     const struct ct_dpif_timestamp *);
+static void ct_dpif_format_flags(struct ds *, const char *title,
+                                 uint32_t flags, const struct flags *);
+static void ct_dpif_format_protoinfo(struct ds *, const char *title,
+                                     const struct ct_dpif_protoinfo *,
+                                     bool verbose);
+static void ct_dpif_format_helper(struct ds *, const char *title,
+                                  const struct ct_dpif_helper *);
+
+static const struct flags ct_dpif_status_flags[] = {
+#define CT_DPIF_STATUS_FLAG(FLAG) { CT_DPIF_STATUS_##FLAG, #FLAG },
+    CT_DPIF_STATUS_FLAGS
+#undef CT_DPIF_STATUS_FLAG
+    { 0, NULL } /* End marker. */
+};
+
+/* Free memory held by 'entry'. */
+void
+ct_dpif_entry_uninit(struct ct_dpif_entry *entry)
+{
+    if (entry) {
+        if (entry->helper.name) {
+            free(entry->helper.name);
+        }
+    }
+}
+
+/* Conntrack entry formatting. */
+
+/* Format conntrack 'entry' of 'type' to 'ds'. */
+void
+ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
+                     bool verbose, bool print_stats)
+{
+    ct_dpif_format_ipproto(ds, entry->tuple_orig.ip_proto);
+
+    ds_put_cstr(ds, " orig=(");
+    ct_dpif_format_tuple(ds, &entry->tuple_orig, verbose);
+    if (print_stats) {
+        ct_dpif_format_counters(ds, &entry->counters_orig);
+    }
+    ds_put_cstr(ds, ")");
+
+    ds_put_cstr(ds, " reply=(");
+    ct_dpif_format_tuple(ds, &entry->tuple_reply, verbose);
+    if (print_stats) {
+        ct_dpif_format_counters(ds, &entry->counters_reply);
+    }
+    ds_put_cstr(ds, ")");
+
+    if (print_stats) {
+        ct_dpif_format_timestamp(ds, &entry->timestamp);
+    }
+    if (verbose) {
+        ds_put_format(ds, " id=%"PRIu32, entry->id);
+    }
+    if (entry->zone) {
+        ds_put_format(ds, " zone=%"PRIu16, entry->zone);
+    }
+    if (verbose) {
+        ct_dpif_format_flags(ds, " status=", entry->status,
+                             ct_dpif_status_flags);
+    }
+    if (print_stats) {
+        ds_put_format(ds, " timeout=%"PRIu32, entry->timeout);
+    }
+    if (entry->mark) {
+        ds_put_format(ds, " mark=%"PRIu32, entry->mark);
+    }
+    if (!ovs_u128_is_zero(&entry->labels)) {
+        ovs_be128 value;
+
+        ds_put_cstr(ds, " labels=");
+        hton128(&entry->labels, &value);
+        ds_put_hex(ds, &value, sizeof value);
+    }
+    ct_dpif_format_protoinfo(ds, " protoinfo=", &entry->protoinfo, verbose);
+    ct_dpif_format_helper(ds, " helper=", &entry->helper);
+    if (verbose && entry->tuple_master.l3_type != 0) {
+        ds_put_cstr(ds, " master=(");
+        ct_dpif_format_tuple(ds, &entry->tuple_master, verbose);
+        ds_put_cstr(ds, ")");
+    }
+}
+
+/* Formatters for the parts of the conntrack entries. */
+
+static void
+ct_dpif_format_ipproto(struct ds *ds, uint16_t ipproto)
+{
+    const char *name;
+
+    name = (ipproto == IPPROTO_ICMP) ? "icmp"
+        : (ipproto == IPPROTO_ICMPV6) ? "icmpv6"
+        : (ipproto == IPPROTO_TCP) ? "tcp"
+        : (ipproto == IPPROTO_UDP) ? "udp"
+        : (ipproto == IPPROTO_SCTP) ? "sctp"
+        : NULL;
+
+    if (name) {
+        ds_put_cstr(ds, name);
+    } else {
+        ds_put_format(ds, "%u", ipproto);
+    }
+}
+
+static void
+ct_dpif_format_counters(struct ds *ds, const struct ct_dpif_counters *counters)
+{
+    if (counters->packets || counters->bytes) {
+        ds_put_format(ds, " packets=%"PRIu64" bytes=%"PRIu64,
+                      counters->packets, counters->bytes);
+    }
+}
+
+static void
+ct_dpif_format_timestamp(struct ds *ds,
+                         const struct ct_dpif_timestamp *timestamp)
+{
+    if (timestamp->start || timestamp->stop) {
+        ds_put_strftime_msec(ds, " start=%Y-%m-%d,%H:%M:%S.###",
+                             timestamp->start / UINT64_C(1000000), false);
+        if (timestamp->stop) {
+            ds_put_strftime_msec(ds, " stop=%Y-%m-%d,%H:%M:%S.###",
+                                 timestamp->stop / UINT64_C(1000000), false);
+        }
+    }
+}
+
+static void
+ct_dpif_format_tuple_icmp(struct ds *ds, const struct ct_dpif_tuple *tuple,
+                          bool verbose)
+{
+    if (verbose) {
+        ds_put_format(ds, " id=%u type=%u code=%u",
+                      ntohs(tuple->icmp_id),
+                      tuple->icmp_type,
+                      tuple->icmp_code);
+    } else {
+        ds_put_format(ds, " id=%u", ntohs(tuple->icmp_id));
+    }
+}
+
+static void
+ct_dpif_format_tuple_tp(struct ds *ds, const struct ct_dpif_tuple *tuple)
+{
+    ds_put_format(ds, " sport=%u dport=%u",
+                  ntohs(tuple->src_port), ntohs(tuple->dst_port));
+}
+
+void
+ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple,
+                     bool verbose)
+{
+    if (tuple->l3_type == AF_INET) {
+        ds_put_format(ds, "src="IP_FMT" dst="IP_FMT,
+                      IP_ARGS(tuple->src.ip), IP_ARGS(tuple->dst.ip));
+    } else if (tuple->l3_type == AF_INET6) {
+        ds_put_cstr(ds, "src=");
+        print_ipv6_addr(ds, &tuple->src.in6);
+        ds_put_cstr(ds, " dst=");
+        print_ipv6_addr(ds, &tuple->dst.in6);
+    } else {
+        ds_put_format(ds, "Unsupported address family: %u. HEX:\n",
+                      tuple->l3_type);
+        ds_put_hex_dump(ds, tuple, sizeof *tuple, 0, false);
+        return;
+    }
+
+    if (tuple->ip_proto == IPPROTO_ICMP
+        || tuple->ip_proto == IPPROTO_ICMPV6) {
+        ct_dpif_format_tuple_icmp(ds, tuple, verbose);
+    } else {
+        ct_dpif_format_tuple_tp(ds, tuple);
+    }
+}
+
+static void
+ct_dpif_format_flags(struct ds *ds, const char *title, uint32_t flags,
+                     const struct flags *table)
+{
+    bool first = true;
+
+    if (title) {
+        ds_put_cstr(ds, title);
+    }
+    for (; table->name; table++) {
+        if (flags & table->flag) {
+            ds_put_format(ds, first ? "%s" : ",%s", table->name);
+            first = false;
+        }
+    }
+}
+
+static const struct flags tcp_flags[] = {
+#define CT_DPIF_TCP_FLAG(FLAG)  { CT_DPIF_TCPF_##FLAG, #FLAG },
+    CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+    { 0, NULL } /* End marker. */
+};
+
+const char *ct_dpif_tcp_state_string[] = {
+#define CT_DPIF_TCP_STATE(STATE) [CT_DPIF_TCPS_##STATE] = #STATE,
+    CT_DPIF_TCP_STATES
+#undef CT_DPIF_TCP_STATE
+};
+
+static void
+ct_dpif_format_enum__(struct ds *ds, const char *title, unsigned int state,
+                      const char *names[], unsigned int max)
+{
+    if (title) {
+        ds_put_cstr(ds, title);
+    }
+    if (state < max) {
+        ds_put_cstr(ds, names[state]);
+    } else {
+        ds_put_format(ds, "[%u]", state);
+    }
+}
+
+#define ct_dpif_format_enum(DS, TITLE, STATE, NAMES) \
+    ct_dpif_format_enum__((DS), (TITLE), (STATE), (NAMES), ARRAY_SIZE(NAMES))
+
+static uint8_t
+coalesce_tcp_state(uint8_t state)
+{
+    /* The Linux kernel connection tracker and the userspace view the
+     * tcp states differently in some situations.  If we're formatting
+     * the entry without being verbose, it is worth to adjust the
+     * differences, to ease writing testcases */
+    switch (state) {
+        case CT_DPIF_TCPS_FIN_WAIT_2:
+            return CT_DPIF_TCPS_TIME_WAIT;
+        case CT_DPIF_TCPS_SYN_RECV:
+            return CT_DPIF_TCPS_ESTABLISHED;
+        default:
+            return state;
+    }
+}
+
+static void
+ct_dpif_format_protoinfo_tcp(struct ds *ds,
+                             const struct ct_dpif_protoinfo *protoinfo)
+{
+    uint8_t tcp_state;
+
+    /* We keep two separate tcp states, but we print just one. The Linux
+     * kernel connection tracker internally keeps only one state, so
+     * 'state_orig' and 'state_reply', will be the same. */
+    tcp_state = MAX(protoinfo->tcp.state_orig, protoinfo->tcp.state_reply);
+
+    tcp_state = coalesce_tcp_state(tcp_state);
+    ct_dpif_format_enum(ds, "state=", tcp_state, ct_dpif_tcp_state_string);
+}
+
+static void
+ct_dpif_format_protoinfo_tcp_verbose(struct ds *ds,
+                                     const struct ct_dpif_protoinfo *protoinfo)
+{
+    ct_dpif_format_enum(ds, "state_orig=", protoinfo->tcp.state_orig,
+                        ct_dpif_tcp_state_string);
+    ct_dpif_format_enum(ds, " state_reply=", protoinfo->tcp.state_reply,
+                        ct_dpif_tcp_state_string);
+
+    if (protoinfo->tcp.wscale_orig || protoinfo->tcp.wscale_reply) {
+        ds_put_format(ds, " wscale_orig=%u wscale_reply=%u",
+                      protoinfo->tcp.wscale_orig,
+                      protoinfo->tcp.wscale_reply);
+    }
+    ct_dpif_format_flags(ds, " flags_orig=", protoinfo->tcp.flags_orig,
+                         tcp_flags);
+    ct_dpif_format_flags(ds, " flags_reply=", protoinfo->tcp.flags_reply,
+                         tcp_flags);
+}
+
+static void
+ct_dpif_format_protoinfo(struct ds *ds, const char *title,
+                         const struct ct_dpif_protoinfo *protoinfo,
+                         bool verbose)
+{
+    if (protoinfo->proto != 0) {
+        if (title) {
+            ds_put_format(ds, "%s(", title);
+        }
+        switch (protoinfo->proto) {
+        case IPPROTO_TCP:
+            if (!verbose) {
+                ct_dpif_format_protoinfo_tcp(ds, protoinfo);
+            } else {
+                ct_dpif_format_protoinfo_tcp_verbose(ds, protoinfo);
+            }
+            break;
+        }
+        if (title) {
+            ds_put_cstr(ds, ")");
+        }
+    }
+}
+
+static void
+ct_dpif_format_helper(struct ds *ds, const char *title,
+                    const struct ct_dpif_helper *helper)
+{
+    if (helper->name) {
+        if (title) {
+            ds_put_cstr(ds, title);
+        }
+        ds_put_cstr(ds, helper->name);
+    }
+}
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
new file mode 100644
index 0000000..5127ec2
--- /dev/null
+++ b/lib/ct-dpif.h
@@ -0,0 +1,174 @@ 
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CT_DPIF_H
+#define CT_DPIF_H
+
+#include "openvswitch/types.h"
+#include "packets.h"
+
+union ct_dpif_inet_addr {
+    ovs_be32 ip;
+    ovs_be32 ip6[4];
+    struct in_addr in;
+    struct in6_addr in6;
+};
+
+struct ct_dpif_tuple {
+    uint16_t l3_type; /* Address family. */
+    uint8_t  ip_proto;
+    union ct_dpif_inet_addr src;
+    union ct_dpif_inet_addr dst;
+    union {
+        ovs_be16 src_port;
+        ovs_be16 icmp_id;
+    };
+    union {
+        ovs_be16 dst_port;
+        struct {
+            uint8_t icmp_type;
+            uint8_t icmp_code;
+        };
+    };
+};
+BUILD_ASSERT_DECL(sizeof(struct ct_dpif_tuple) % 8 == 0);
+
+struct ct_dpif_counters {
+    uint64_t packets;
+    uint64_t bytes;
+};
+
+struct ct_dpif_timestamp {
+    uint64_t start;
+    uint64_t stop;
+};
+
+#define CT_DPIF_TCP_STATES \
+    CT_DPIF_TCP_STATE(CLOSED) \
+    CT_DPIF_TCP_STATE(LISTEN) \
+    CT_DPIF_TCP_STATE(SYN_SENT) \
+    CT_DPIF_TCP_STATE(SYN_RECV) \
+    CT_DPIF_TCP_STATE(ESTABLISHED) \
+    CT_DPIF_TCP_STATE(CLOSE_WAIT) \
+    CT_DPIF_TCP_STATE(FIN_WAIT_1) \
+    CT_DPIF_TCP_STATE(CLOSING) \
+    CT_DPIF_TCP_STATE(LAST_ACK) \
+    CT_DPIF_TCP_STATE(FIN_WAIT_2) \
+    CT_DPIF_TCP_STATE(TIME_WAIT)
+
+enum ct_dpif_tcp_state {
+#define CT_DPIF_TCP_STATE(STATE) CT_DPIF_TCPS_##STATE,
+    CT_DPIF_TCP_STATES
+#undef CT_DPIF_TCP_STATE
+};
+
+extern const char *ct_dpif_tcp_state_string[];
+
+#define CT_DPIF_TCP_FLAGS \
+    CT_DPIF_TCP_FLAG(WINDOW_SCALE) \
+    CT_DPIF_TCP_FLAG(SACK_PERM) \
+    CT_DPIF_TCP_FLAG(CLOSE_INIT) \
+    CT_DPIF_TCP_FLAG(BE_LIBERAL) \
+    CT_DPIF_TCP_FLAG(DATA_UNACKNOWLEDGED) \
+    CT_DPIF_TCP_FLAG(MAXACK_SET) \
+
+enum ct_dpif_tcp_flags_count_ {
+#define CT_DPIF_TCP_FLAG(FLAG) FLAG##_COUNT_,
+    CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+};
+
+enum ct_dpif_tcp_flags {
+#define CT_DPIF_TCP_FLAG(FLAG) CT_DPIF_TCPF_##FLAG = (1 << FLAG##_COUNT_),
+    CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+};
+
+struct ct_dpif_protoinfo {
+    uint16_t proto; /* IPPROTO_* */
+    union {
+        struct {
+            uint8_t state_orig;
+            uint8_t state_reply;
+            uint8_t wscale_orig;
+            uint8_t wscale_reply;
+            uint8_t flags_orig;
+            uint8_t flags_reply;
+        } tcp;
+    };
+};
+
+struct ct_dpif_helper {
+    char *name;
+};
+
+#define CT_DPIF_STATUS_FLAGS \
+    CT_DPIF_STATUS_FLAG(EXPECTED) \
+    CT_DPIF_STATUS_FLAG(SEEN_REPLY) \
+    CT_DPIF_STATUS_FLAG(ASSURED) \
+    CT_DPIF_STATUS_FLAG(CONFIRMED) \
+    CT_DPIF_STATUS_FLAG(SRC_NAT) \
+    CT_DPIF_STATUS_FLAG(DST_NAT) \
+    CT_DPIF_STATUS_FLAG(SEQ_ADJUST) \
+    CT_DPIF_STATUS_FLAG(SRC_NAT_DONE) \
+    CT_DPIF_STATUS_FLAG(DST_NAT_DONE) \
+    CT_DPIF_STATUS_FLAG(DYING) \
+    CT_DPIF_STATUS_FLAG(FIXED_TIMEOUT) \
+    CT_DPIF_STATUS_FLAG(TEMPLATE) \
+    CT_DPIF_STATUS_FLAG(UNTRACKED) \
+
+enum ct_dpif_status_flags_count_ {
+#define CT_DPIF_STATUS_FLAG(FLAG) FLAG##_COUNT_,
+    CT_DPIF_STATUS_FLAGS
+#undef CT_DPIF_STATUS_FLAG
+};
+
+enum ct_dpif_status_flags {
+#define CT_DPIF_STATUS_FLAG(FLAG) CT_DPIF_STATUS_##FLAG = (1 << FLAG##_COUNT_),
+    CT_DPIF_STATUS_FLAGS
+#undef CT_DPIF_STATUS_FLAG
+};
+
+struct ct_dpif_entry {
+    /* Const members. */
+    struct ct_dpif_tuple tuple_orig;
+    struct ct_dpif_tuple tuple_reply;
+    struct ct_dpif_tuple tuple_master;
+    struct ct_dpif_helper helper;
+    uint32_t id;
+    uint16_t zone;
+
+    /* Modifiable members. */
+
+    struct ct_dpif_counters counters_orig;
+    struct ct_dpif_counters counters_reply;
+
+    struct ct_dpif_timestamp timestamp;
+    struct ct_dpif_protoinfo protoinfo;
+
+    ovs_u128 labels;
+    uint32_t status;
+    uint32_t timeout;
+    uint32_t mark;
+};
+
+void ct_dpif_entry_uninit(struct ct_dpif_entry *);
+void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
+                          bool verbose, bool print_stats);
+void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *,
+                          bool verbose);
+
+#endif /* CT_DPIF_H */