@@ -51,6 +51,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/crc32c.h \
lib/csum.c \
lib/csum.h \
+ lib/ct-dpif.c \
+ lib/ct-dpif.h \
lib/daemon.c \
lib/daemon.h \
lib/daemon-private.h \
new file mode 100644
@@ -0,0 +1,343 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+
+#include "ct-dpif.h"
+
+/* Declarations for conntrack entry formatting. */
+struct flags {
+ uint32_t flag;
+ const char *name;
+};
+
+static void ct_dpif_format_ipproto(struct ds *, uint16_t ipproto);
+static void ct_dpif_format_counters(struct ds *,
+ const struct ct_dpif_counters *);
+static void ct_dpif_format_timestamp(struct ds *,
+ const struct ct_dpif_timestamp *);
+static void ct_dpif_format_flags(struct ds *, const char *title,
+ uint32_t flags, const struct flags *);
+static void ct_dpif_format_protoinfo(struct ds *, const char *title,
+ const struct ct_dpif_protoinfo *,
+ bool verbose);
+static void ct_dpif_format_helper(struct ds *, const char *title,
+ const struct ct_dpif_helper *);
+
+static const struct flags ct_dpif_status_flags[] = {
+#define CT_DPIF_STATUS_FLAG(FLAG) { CT_DPIF_STATUS_##FLAG, #FLAG },
+ CT_DPIF_STATUS_FLAGS
+#undef CT_DPIF_STATUS_FLAG
+ { 0, NULL } /* End marker. */
+};
+
+/* Free memory held by 'entry'. */
+void
+ct_dpif_entry_uninit(struct ct_dpif_entry *entry)
+{
+ if (entry) {
+ if (entry->helper.name) {
+ free(entry->helper.name);
+ }
+ }
+}
+
+/* Conntrack entry formatting. */
+
+/* Format conntrack 'entry' of 'type' to 'ds'. */
+void
+ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
+ bool verbose, bool print_stats)
+{
+ ct_dpif_format_ipproto(ds, entry->tuple_orig.ip_proto);
+
+ ds_put_cstr(ds, " orig=(");
+ ct_dpif_format_tuple(ds, &entry->tuple_orig, verbose);
+ if (print_stats) {
+ ct_dpif_format_counters(ds, &entry->counters_orig);
+ }
+ ds_put_cstr(ds, ")");
+
+ ds_put_cstr(ds, " reply=(");
+ ct_dpif_format_tuple(ds, &entry->tuple_reply, verbose);
+ if (print_stats) {
+ ct_dpif_format_counters(ds, &entry->counters_reply);
+ }
+ ds_put_cstr(ds, ")");
+
+ if (print_stats) {
+ ct_dpif_format_timestamp(ds, &entry->timestamp);
+ }
+ if (verbose) {
+ ds_put_format(ds, " id=%"PRIu32, entry->id);
+ }
+ if (entry->zone) {
+ ds_put_format(ds, " zone=%"PRIu16, entry->zone);
+ }
+ if (verbose) {
+ ct_dpif_format_flags(ds, " status=", entry->status,
+ ct_dpif_status_flags);
+ }
+ if (print_stats) {
+ ds_put_format(ds, " timeout=%"PRIu32, entry->timeout);
+ }
+ if (entry->mark) {
+ ds_put_format(ds, " mark=%"PRIu32, entry->mark);
+ }
+ if (!ovs_u128_is_zero(&entry->labels)) {
+ ovs_be128 value;
+
+ ds_put_cstr(ds, " labels=");
+ hton128(&entry->labels, &value);
+ ds_put_hex(ds, &value, sizeof value);
+ }
+ ct_dpif_format_protoinfo(ds, " protoinfo=", &entry->protoinfo, verbose);
+ ct_dpif_format_helper(ds, " helper=", &entry->helper);
+ if (verbose && entry->tuple_master.l3_type != 0) {
+ ds_put_cstr(ds, " master=(");
+ ct_dpif_format_tuple(ds, &entry->tuple_master, verbose);
+ ds_put_cstr(ds, ")");
+ }
+}
+
+/* Formatters for the parts of the conntrack entries. */
+
+static void
+ct_dpif_format_ipproto(struct ds *ds, uint16_t ipproto)
+{
+ const char *name;
+
+ name = (ipproto == IPPROTO_ICMP) ? "icmp"
+ : (ipproto == IPPROTO_ICMPV6) ? "icmpv6"
+ : (ipproto == IPPROTO_TCP) ? "tcp"
+ : (ipproto == IPPROTO_UDP) ? "udp"
+ : (ipproto == IPPROTO_SCTP) ? "sctp"
+ : NULL;
+
+ if (name) {
+ ds_put_cstr(ds, name);
+ } else {
+ ds_put_format(ds, "%u", ipproto);
+ }
+}
+
+static void
+ct_dpif_format_counters(struct ds *ds, const struct ct_dpif_counters *counters)
+{
+ if (counters->packets || counters->bytes) {
+ ds_put_format(ds, " packets=%"PRIu64" bytes=%"PRIu64,
+ counters->packets, counters->bytes);
+ }
+}
+
+static void
+ct_dpif_format_timestamp(struct ds *ds,
+ const struct ct_dpif_timestamp *timestamp)
+{
+ if (timestamp->start || timestamp->stop) {
+ ds_put_strftime_msec(ds, " start=%Y-%m-%d,%H:%M:%S.###",
+ timestamp->start / UINT64_C(1000000), false);
+ if (timestamp->stop) {
+ ds_put_strftime_msec(ds, " stop=%Y-%m-%d,%H:%M:%S.###",
+ timestamp->stop / UINT64_C(1000000), false);
+ }
+ }
+}
+
+static void
+ct_dpif_format_tuple_icmp(struct ds *ds, const struct ct_dpif_tuple *tuple,
+ bool verbose)
+{
+ if (verbose) {
+ ds_put_format(ds, " id=%u type=%u code=%u",
+ ntohs(tuple->icmp_id),
+ tuple->icmp_type,
+ tuple->icmp_code);
+ } else {
+ ds_put_format(ds, " id=%u", ntohs(tuple->icmp_id));
+ }
+}
+
+static void
+ct_dpif_format_tuple_tp(struct ds *ds, const struct ct_dpif_tuple *tuple)
+{
+ ds_put_format(ds, " sport=%u dport=%u",
+ ntohs(tuple->src_port), ntohs(tuple->dst_port));
+}
+
+void
+ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple,
+ bool verbose)
+{
+ if (tuple->l3_type == AF_INET) {
+ ds_put_format(ds, "src="IP_FMT" dst="IP_FMT,
+ IP_ARGS(tuple->src.ip), IP_ARGS(tuple->dst.ip));
+ } else if (tuple->l3_type == AF_INET6) {
+ ds_put_cstr(ds, "src=");
+ print_ipv6_addr(ds, &tuple->src.in6);
+ ds_put_cstr(ds, " dst=");
+ print_ipv6_addr(ds, &tuple->dst.in6);
+ } else {
+ ds_put_format(ds, "Unsupported address family: %u. HEX:\n",
+ tuple->l3_type);
+ ds_put_hex_dump(ds, tuple, sizeof *tuple, 0, false);
+ return;
+ }
+
+ if (tuple->ip_proto == IPPROTO_ICMP
+ || tuple->ip_proto == IPPROTO_ICMPV6) {
+ ct_dpif_format_tuple_icmp(ds, tuple, verbose);
+ } else {
+ ct_dpif_format_tuple_tp(ds, tuple);
+ }
+}
+
+static void
+ct_dpif_format_flags(struct ds *ds, const char *title, uint32_t flags,
+ const struct flags *table)
+{
+ bool first = true;
+
+ if (title) {
+ ds_put_cstr(ds, title);
+ }
+ for (; table->name; table++) {
+ if (flags & table->flag) {
+ ds_put_format(ds, first ? "%s" : ",%s", table->name);
+ first = false;
+ }
+ }
+}
+
+static const struct flags tcp_flags[] = {
+#define CT_DPIF_TCP_FLAG(FLAG) { CT_DPIF_TCPF_##FLAG, #FLAG },
+ CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+ { 0, NULL } /* End marker. */
+};
+
+const char *ct_dpif_tcp_state_string[] = {
+#define CT_DPIF_TCP_STATE(STATE) [CT_DPIF_TCPS_##STATE] = #STATE,
+ CT_DPIF_TCP_STATES
+#undef CT_DPIF_TCP_STATE
+};
+
+static void
+ct_dpif_format_enum__(struct ds *ds, const char *title, unsigned int state,
+ const char *names[], unsigned int max)
+{
+ if (title) {
+ ds_put_cstr(ds, title);
+ }
+ if (state < max) {
+ ds_put_cstr(ds, names[state]);
+ } else {
+ ds_put_format(ds, "[%u]", state);
+ }
+}
+
+#define ct_dpif_format_enum(DS, TITLE, STATE, NAMES) \
+ ct_dpif_format_enum__((DS), (TITLE), (STATE), (NAMES), ARRAY_SIZE(NAMES))
+
+static uint8_t
+coalesce_tcp_state(uint8_t state)
+{
+ /* The Linux kernel connection tracker and the userspace view the
+ * tcp states differently in some situations. If we're formatting
+ * the entry without being verbose, it is worth to adjust the
+ * differences, to ease writing testcases */
+ switch (state) {
+ case CT_DPIF_TCPS_FIN_WAIT_2:
+ return CT_DPIF_TCPS_TIME_WAIT;
+ case CT_DPIF_TCPS_SYN_RECV:
+ return CT_DPIF_TCPS_ESTABLISHED;
+ default:
+ return state;
+ }
+}
+
+static void
+ct_dpif_format_protoinfo_tcp(struct ds *ds,
+ const struct ct_dpif_protoinfo *protoinfo)
+{
+ uint8_t tcp_state;
+
+ /* We keep two separate tcp states, but we print just one. The Linux
+ * kernel connection tracker internally keeps only one state, so
+ * 'state_orig' and 'state_reply', will be the same. */
+ tcp_state = MAX(protoinfo->tcp.state_orig, protoinfo->tcp.state_reply);
+
+ tcp_state = coalesce_tcp_state(tcp_state);
+ ct_dpif_format_enum(ds, "state=", tcp_state, ct_dpif_tcp_state_string);
+}
+
+static void
+ct_dpif_format_protoinfo_tcp_verbose(struct ds *ds,
+ const struct ct_dpif_protoinfo *protoinfo)
+{
+ ct_dpif_format_enum(ds, "state_orig=", protoinfo->tcp.state_orig,
+ ct_dpif_tcp_state_string);
+ ct_dpif_format_enum(ds, " state_reply=", protoinfo->tcp.state_reply,
+ ct_dpif_tcp_state_string);
+
+ if (protoinfo->tcp.wscale_orig || protoinfo->tcp.wscale_reply) {
+ ds_put_format(ds, " wscale_orig=%u wscale_reply=%u",
+ protoinfo->tcp.wscale_orig,
+ protoinfo->tcp.wscale_reply);
+ }
+ ct_dpif_format_flags(ds, " flags_orig=", protoinfo->tcp.flags_orig,
+ tcp_flags);
+ ct_dpif_format_flags(ds, " flags_reply=", protoinfo->tcp.flags_reply,
+ tcp_flags);
+}
+
+static void
+ct_dpif_format_protoinfo(struct ds *ds, const char *title,
+ const struct ct_dpif_protoinfo *protoinfo,
+ bool verbose)
+{
+ if (protoinfo->proto != 0) {
+ if (title) {
+ ds_put_format(ds, "%s(", title);
+ }
+ switch (protoinfo->proto) {
+ case IPPROTO_TCP:
+ if (!verbose) {
+ ct_dpif_format_protoinfo_tcp(ds, protoinfo);
+ } else {
+ ct_dpif_format_protoinfo_tcp_verbose(ds, protoinfo);
+ }
+ break;
+ }
+ if (title) {
+ ds_put_cstr(ds, ")");
+ }
+ }
+}
+
+static void
+ct_dpif_format_helper(struct ds *ds, const char *title,
+ const struct ct_dpif_helper *helper)
+{
+ if (helper->name) {
+ if (title) {
+ ds_put_cstr(ds, title);
+ }
+ ds_put_cstr(ds, helper->name);
+ }
+}
new file mode 100644
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CT_DPIF_H
+#define CT_DPIF_H
+
+#include "openvswitch/types.h"
+#include "packets.h"
+
+union ct_dpif_inet_addr {
+ ovs_be32 ip;
+ ovs_be32 ip6[4];
+ struct in_addr in;
+ struct in6_addr in6;
+};
+
+struct ct_dpif_tuple {
+ uint16_t l3_type; /* Address family. */
+ uint8_t ip_proto;
+ union ct_dpif_inet_addr src;
+ union ct_dpif_inet_addr dst;
+ union {
+ ovs_be16 src_port;
+ ovs_be16 icmp_id;
+ };
+ union {
+ ovs_be16 dst_port;
+ struct {
+ uint8_t icmp_type;
+ uint8_t icmp_code;
+ };
+ };
+};
+BUILD_ASSERT_DECL(sizeof(struct ct_dpif_tuple) % 8 == 0);
+
+struct ct_dpif_counters {
+ uint64_t packets;
+ uint64_t bytes;
+};
+
+struct ct_dpif_timestamp {
+ uint64_t start;
+ uint64_t stop;
+};
+
+#define CT_DPIF_TCP_STATES \
+ CT_DPIF_TCP_STATE(CLOSED) \
+ CT_DPIF_TCP_STATE(LISTEN) \
+ CT_DPIF_TCP_STATE(SYN_SENT) \
+ CT_DPIF_TCP_STATE(SYN_RECV) \
+ CT_DPIF_TCP_STATE(ESTABLISHED) \
+ CT_DPIF_TCP_STATE(CLOSE_WAIT) \
+ CT_DPIF_TCP_STATE(FIN_WAIT_1) \
+ CT_DPIF_TCP_STATE(CLOSING) \
+ CT_DPIF_TCP_STATE(LAST_ACK) \
+ CT_DPIF_TCP_STATE(FIN_WAIT_2) \
+ CT_DPIF_TCP_STATE(TIME_WAIT)
+
+enum ct_dpif_tcp_state {
+#define CT_DPIF_TCP_STATE(STATE) CT_DPIF_TCPS_##STATE,
+ CT_DPIF_TCP_STATES
+#undef CT_DPIF_TCP_STATE
+};
+
+extern const char *ct_dpif_tcp_state_string[];
+
+#define CT_DPIF_TCP_FLAGS \
+ CT_DPIF_TCP_FLAG(WINDOW_SCALE) \
+ CT_DPIF_TCP_FLAG(SACK_PERM) \
+ CT_DPIF_TCP_FLAG(CLOSE_INIT) \
+ CT_DPIF_TCP_FLAG(BE_LIBERAL) \
+ CT_DPIF_TCP_FLAG(DATA_UNACKNOWLEDGED) \
+ CT_DPIF_TCP_FLAG(MAXACK_SET) \
+
+enum ct_dpif_tcp_flags_count_ {
+#define CT_DPIF_TCP_FLAG(FLAG) FLAG##_COUNT_,
+ CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+};
+
+enum ct_dpif_tcp_flags {
+#define CT_DPIF_TCP_FLAG(FLAG) CT_DPIF_TCPF_##FLAG = (1 << FLAG##_COUNT_),
+ CT_DPIF_TCP_FLAGS
+#undef CT_DPIF_TCP_FLAG
+};
+
+struct ct_dpif_protoinfo {
+ uint16_t proto; /* IPPROTO_* */
+ union {
+ struct {
+ uint8_t state_orig;
+ uint8_t state_reply;
+ uint8_t wscale_orig;
+ uint8_t wscale_reply;
+ uint8_t flags_orig;
+ uint8_t flags_reply;
+ } tcp;
+ };
+};
+
+struct ct_dpif_helper {
+ char *name;
+};
+
+#define CT_DPIF_STATUS_FLAGS \
+ CT_DPIF_STATUS_FLAG(EXPECTED) \
+ CT_DPIF_STATUS_FLAG(SEEN_REPLY) \
+ CT_DPIF_STATUS_FLAG(ASSURED) \
+ CT_DPIF_STATUS_FLAG(CONFIRMED) \
+ CT_DPIF_STATUS_FLAG(SRC_NAT) \
+ CT_DPIF_STATUS_FLAG(DST_NAT) \
+ CT_DPIF_STATUS_FLAG(SEQ_ADJUST) \
+ CT_DPIF_STATUS_FLAG(SRC_NAT_DONE) \
+ CT_DPIF_STATUS_FLAG(DST_NAT_DONE) \
+ CT_DPIF_STATUS_FLAG(DYING) \
+ CT_DPIF_STATUS_FLAG(FIXED_TIMEOUT) \
+ CT_DPIF_STATUS_FLAG(TEMPLATE) \
+ CT_DPIF_STATUS_FLAG(UNTRACKED) \
+
+enum ct_dpif_status_flags_count_ {
+#define CT_DPIF_STATUS_FLAG(FLAG) FLAG##_COUNT_,
+ CT_DPIF_STATUS_FLAGS
+#undef CT_DPIF_STATUS_FLAG
+};
+
+enum ct_dpif_status_flags {
+#define CT_DPIF_STATUS_FLAG(FLAG) CT_DPIF_STATUS_##FLAG = (1 << FLAG##_COUNT_),
+ CT_DPIF_STATUS_FLAGS
+#undef CT_DPIF_STATUS_FLAG
+};
+
+struct ct_dpif_entry {
+ /* Const members. */
+ struct ct_dpif_tuple tuple_orig;
+ struct ct_dpif_tuple tuple_reply;
+ struct ct_dpif_tuple tuple_master;
+ struct ct_dpif_helper helper;
+ uint32_t id;
+ uint16_t zone;
+
+ /* Modifiable members. */
+
+ struct ct_dpif_counters counters_orig;
+ struct ct_dpif_counters counters_reply;
+
+ struct ct_dpif_timestamp timestamp;
+ struct ct_dpif_protoinfo protoinfo;
+
+ ovs_u128 labels;
+ uint32_t status;
+ uint32_t timeout;
+ uint32_t mark;
+};
+
+void ct_dpif_entry_uninit(struct ct_dpif_entry *);
+void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
+ bool verbose, bool print_stats);
+void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *,
+ bool verbose);
+
+#endif /* CT_DPIF_H */