@@ -110,20 +110,32 @@ ct_dpif_dump_done(struct ct_dpif_dump_state *dump)
: EOPNOTSUPP);
}
-/* Flush the entries in the connection tracker used by 'dpif'.
+/* Flush the entries in the connection tracker used by 'dpif'. The
+ * arguments have the following behavior:
*
- * If 'zone' is not NULL, flush only the entries in '*zone'. */
+ * - If both 'zone' and 'tuple' are NULL, flush all the conntrack entries.
+ * - If 'zone' is not NULL, and 'tuple' is NULL, flush all the conntrack
+ * entries in '*zone'.
+ * - If 'tuple' is not NULL, flush the conntrack entry specified by 'tuple'
+ * in '*zone'. If 'zone' is NULL, use the default zone (zone 0). */
int
-ct_dpif_flush(struct dpif *dpif, const uint16_t *zone)
+ct_dpif_flush(struct dpif *dpif, const uint16_t *zone,
+ const struct ct_dpif_tuple *tuple)
{
- if (zone) {
- VLOG_DBG("%s: ct_flush: %"PRIu16, dpif_name(dpif), *zone);
+ if (tuple) {
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ ct_dpif_format_tuple(&ds, tuple);
+ VLOG_DBG("%s: ct_flush: %s in zone %d", dpif_name(dpif), ds_cstr(&ds),
+ zone ? *zone : 0);
+ ds_destroy(&ds);
+ } else if (zone) {
+ VLOG_DBG("%s: ct_flush: zone %"PRIu16, dpif_name(dpif), *zone);
} else {
VLOG_DBG("%s: ct_flush: <all>", dpif_name(dpif));
}
return (dpif->dpif_class->ct_flush
- ? dpif->dpif_class->ct_flush(dpif, zone)
+ ? dpif->dpif_class->ct_flush(dpif, zone, tuple)
: EOPNOTSUPP);
}
@@ -195,7 +195,8 @@ int ct_dpif_dump_start(struct dpif *, struct ct_dpif_dump_state **,
const uint16_t *zone, int *);
int ct_dpif_dump_next(struct ct_dpif_dump_state *, struct ct_dpif_entry *);
int ct_dpif_dump_done(struct ct_dpif_dump_state *);
-int ct_dpif_flush(struct dpif *, const uint16_t *zone);
+int ct_dpif_flush(struct dpif *, const uint16_t *zone,
+ const struct ct_dpif_tuple *);
void ct_dpif_entry_uninit(struct ct_dpif_entry *);
void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
bool verbose, bool print_stats);
@@ -1353,7 +1353,7 @@ dpctl_flush_conntrack(int argc, const char *argv[],
return error;
}
- error = ct_dpif_flush(dpif, pzone);
+ error = ct_dpif_flush(dpif, pzone, NULL);
dpif_close(dpif);
return error;
@@ -5734,10 +5734,14 @@ dpif_netdev_ct_dump_done(struct dpif *dpif OVS_UNUSED,
}
static int
-dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone)
+dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone,
+ const struct ct_dpif_tuple *tuple)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
+ if (tuple) {
+ return EOPNOTSUPP;
+ }
return conntrack_flush(&dp->conntrack, zone);
}
@@ -2892,9 +2892,12 @@ dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
}
static int
-dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone)
+dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone,
+ const struct ct_dpif_tuple *tuple)
{
- if (zone) {
+ if (tuple) {
+ return nl_ct_flush_tuple(tuple, zone ? *zone : 0);
+ } else if (zone) {
return nl_ct_flush_zone(*zone);
} else {
return nl_ct_flush();
@@ -75,6 +75,7 @@ dpif_flow_dump_thread_init(struct dpif_flow_dump_thread *thread,
struct ct_dpif_dump_state;
struct ct_dpif_entry;
+struct ct_dpif_tuple;
/* Datapath interface class structure, to be defined by each implementation of
* a datapath interface.
@@ -424,9 +425,18 @@ struct dpif_class {
struct ct_dpif_entry *entry);
int (*ct_dump_done)(struct dpif *, struct ct_dpif_dump_state *state);
- /* Flushes the connection tracking tables. If 'zone' is not NULL,
- * only deletes connections in '*zone'. */
- int (*ct_flush)(struct dpif *, const uint16_t *zone);
+ /* Flushes the connection tracking tables. The arguments have the
+ * following behavior:
+ *
+ * - If both 'zone' and 'tuple' are NULL, flush all the conntrack
+ * entries.
+ * - If 'zone' is not NULL, and 'tuple' is NULL, flush all the
+ * conntrack entries in '*zone'.
+ * - If 'tuple' is not NULL, flush the conntrack entry specified by
+ * 'tuple' in '*zone'. If 'zone' is NULL, use the default zone
+ * (zone 0). */
+ int (*ct_flush)(struct dpif *, const uint16_t *zone,
+ const struct ct_dpif_tuple *tuple);
/* Meters */
@@ -18,6 +18,7 @@
#include "netlink-conntrack.h"
+#include <errno.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/netfilter/nf_conntrack_common.h>
@@ -111,6 +112,8 @@ static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
uint8_t nfgen_family);
+static bool nl_ct_put_ct_tuple(struct ofpbuf *buf,
+ const struct ct_dpif_tuple *tuple, enum ctattr_type type);
struct nl_ct_dump_state {
struct nl_dump dump;
@@ -239,6 +242,27 @@ nl_ct_flush(void)
return err;
}
+int
+nl_ct_flush_tuple(const struct ct_dpif_tuple *tuple, uint16_t zone)
+{
+ int err;
+ struct ofpbuf buf;
+
+ ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
+ nl_msg_put_nfgenmsg(&buf, 0, tuple->l3_type, NFNL_SUBSYS_CTNETLINK,
+ IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
+
+ nl_msg_put_be16(&buf, CTA_ZONE, htons(zone));
+ if (!nl_ct_put_ct_tuple(&buf, tuple, CTA_TUPLE_ORIG)) {
+ err = EOPNOTSUPP;
+ goto out;
+ }
+ err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
+out:
+ ofpbuf_uninit(&buf);
+ return err;
+}
+
#ifdef _WIN32
int
nl_ct_flush_zone(uint16_t flush_zone)
@@ -517,6 +541,79 @@ nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
return parsed;
}
+static bool
+nl_ct_put_tuple_ip(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
+{
+ size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_IP);
+
+ if (tuple->l3_type == AF_INET) {
+ nl_msg_put_be32(buf, CTA_IP_V4_SRC, tuple->src.ip);
+ nl_msg_put_be32(buf, CTA_IP_V4_DST, tuple->dst.ip);
+ } else if (tuple->l3_type == AF_INET6) {
+ nl_msg_put_in6_addr(buf, CTA_IP_V6_SRC, &tuple->src.in6);
+ nl_msg_put_in6_addr(buf, CTA_IP_V6_DST, &tuple->dst.in6);
+ } else {
+ VLOG_WARN_RL(&rl, "Unsupported IP protocol: %"PRIu16".",
+ tuple->l3_type);
+ return false;
+ }
+
+ nl_msg_end_nested(buf, offset);
+ return true;
+}
+
+static bool
+nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple)
+{
+ size_t offset = nl_msg_start_nested(buf, CTA_TUPLE_PROTO);
+
+ nl_msg_put_u8(buf, CTA_PROTO_NUM, tuple->ip_proto);
+
+ if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
+ nl_msg_put_be16(buf, CTA_PROTO_ICMP_ID, tuple->icmp_id);
+ nl_msg_put_u8(buf, CTA_PROTO_ICMP_TYPE, tuple->icmp_type);
+ nl_msg_put_u8(buf, CTA_PROTO_ICMP_CODE, tuple->icmp_code);
+ } else if (tuple->l3_type == AF_INET6 &&
+ tuple->ip_proto == IPPROTO_ICMPV6) {
+ nl_msg_put_be16(buf, CTA_PROTO_ICMPV6_ID, tuple->icmp_id);
+ nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type);
+ nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code);
+ } else if (tuple->ip_proto == IPPROTO_TCP ||
+ tuple->ip_proto == IPPROTO_UDP) {
+ nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port);
+ nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port);
+ } else {
+ VLOG_WARN_RL(&rl, "Unsupported L4 protocol: %"PRIu8".",
+ tuple->ip_proto);
+ return false;
+ }
+
+ nl_msg_end_nested(buf, offset);
+ return true;
+}
+
+static bool
+nl_ct_put_ct_tuple(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple,
+ enum ctattr_type type)
+{
+ if (type != CTA_TUPLE_ORIG && type != CTA_TUPLE_REPLY &&
+ type != CTA_TUPLE_MASTER) {
+ return false;
+ }
+
+ size_t offset = nl_msg_start_nested(buf, type);
+
+ if (!nl_ct_put_tuple_ip(buf, tuple)) {
+ return false;
+ }
+ if (!nl_ct_put_tuple_proto(buf, tuple)) {
+ return false;
+ }
+
+ nl_msg_end_nested(buf, offset);
+ return true;
+}
+
/* Translate netlink TCP state to CT_DPIF_TCP state. */
static uint8_t
nl_ct_tcp_state_to_dpif(uint8_t state)
@@ -42,6 +42,7 @@ int nl_ct_dump_done(struct nl_ct_dump_state *);
int nl_ct_flush(void);
int nl_ct_flush_zone(uint16_t zone);
+int nl_ct_flush_tuple(const struct ct_dpif_tuple *, uint16_t zone);
bool nl_ct_parse_entry(struct ofpbuf *, struct ct_dpif_entry *,
enum nl_ct_event_type *);
@@ -4850,7 +4850,7 @@ ct_flush(const struct ofproto *ofproto_, const uint16_t *zone)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- ct_dpif_flush(ofproto->backer->dpif, zone);
+ ct_dpif_flush(ofproto->backer->dpif, zone, NULL);
}
static bool
@@ -3167,7 +3167,7 @@ AT_CHECK([ovs-appctl vlog/set ct_dpif:dbg])
AT_CHECK([ovs-ofctl ct-flush-zone br0 123])
OVS_WAIT_UNTIL([grep -q "|ct_dpif|DBG|.*ct_flush:" ovs-vswitchd.log])
-AT_CHECK([grep -q "ct_dpif|DBG|.*ct_flush: 123" ovs-vswitchd.log])
+AT_CHECK([grep -q "ct_dpif|DBG|.*ct_flush: zone 123" ovs-vswitchd.log])
OVS_VSWITCHD_STOP
AT_CLEANUP
This patch adds support of flushing a conntrack entry specified by the conntrack 5-tuple, and provides the implementation in dpif-netlink. The implementation of dpif-netlink in the linux datapath utilizes the NFNL_SUBSYS_CTNETLINK netlink subsystem to delete a conntrack entry in nf_conntrack. Future patches will add support for the userspace and Windows datapaths. VMWare-BZ: #1983178 Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com> --- lib/ct-dpif.c | 24 +++++++++--- lib/ct-dpif.h | 3 +- lib/dpctl.c | 2 +- lib/dpif-netdev.c | 6 ++- lib/dpif-netlink.c | 7 +++- lib/dpif-provider.h | 16 ++++++-- lib/netlink-conntrack.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/netlink-conntrack.h | 1 + ofproto/ofproto-dpif.c | 2 +- tests/ovs-ofctl.at | 2 +- 10 files changed, 144 insertions(+), 16 deletions(-)