@@ -49,6 +49,7 @@ lib_libopenvswitch_la_SOURCES = \
lib/compiler.h \
lib/connectivity.c \
lib/connectivity.h \
+ lib/conntrack-icmp.c \
lib/conntrack-private.h \
lib/conntrack-tcp.c \
lib/conntrack-other.c \
new file mode 100644
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2015, 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+
+#include "conntrack-private.h"
+#include "dp-packet.h"
+
+enum icmp_state {
+ ICMPS_FIRST,
+ ICMPS_REPLY,
+};
+
+struct conn_icmp {
+ struct conn up;
+ enum icmp_state state;
+};
+
+static const enum ct_timeout icmp_timeouts[] = {
+ [ICMPS_FIRST] = CT_TM_ICMP_FIRST,
+ [ICMPS_REPLY] = CT_TM_ICMP_REPLY,
+};
+
+static struct conn_icmp *
+conn_icmp_cast(const struct conn *conn)
+{
+ return CONTAINER_OF(conn, struct conn_icmp, up);
+}
+
+static enum ct_update_res
+icmp_conn_update(struct conn *conn_, struct conntrack_bucket *ctb,
+ struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
+{
+ struct conn_icmp *conn = conn_icmp_cast(conn_);
+
+ if (reply && conn->state != ICMPS_REPLY) {
+ conn->state = ICMPS_REPLY;
+ }
+
+ conn_update_expiration(ctb, &conn->up, icmp_timeouts[conn->state], now);
+
+ return CT_UPDATE_VALID;
+}
+
+static bool
+icmp4_valid_new(struct dp_packet *pkt)
+{
+ struct icmp_header *icmp = dp_packet_l4(pkt);
+
+ return icmp->icmp_type == ICMP4_ECHO_REQUEST
+ || icmp->icmp_type == ICMP4_INFOREQUEST
+ || icmp->icmp_type == ICMP4_TIMESTAMP;
+}
+
+static bool
+icmp6_valid_new(struct dp_packet *pkt)
+{
+ struct icmp6_header *icmp6 = dp_packet_l4(pkt);
+
+ return icmp6->icmp6_type == ICMP6_ECHO_REQUEST;
+}
+
+static struct conn *
+icmp_new_conn(struct conntrack_bucket *ctb, struct dp_packet *pkt OVS_UNUSED,
+ long long now)
+{
+ struct conn_icmp *conn;
+
+ conn = xzalloc(sizeof *conn);
+ conn->state = ICMPS_FIRST;
+
+ conn_init_expiration(ctb, &conn->up, icmp_timeouts[conn->state], now);
+
+ return &conn->up;
+}
+
+struct ct_l4_proto ct_proto_icmp4 = {
+ .new_conn = icmp_new_conn,
+ .valid_new = icmp4_valid_new,
+ .conn_update = icmp_conn_update,
+};
+
+struct ct_l4_proto ct_proto_icmp6 = {
+ .new_conn = icmp_new_conn,
+ .valid_new = icmp6_valid_new,
+ .conn_update = icmp_conn_update,
+};
@@ -40,7 +40,14 @@ struct ct_addr {
struct ct_endpoint {
struct ct_addr addr;
- ovs_be16 port;
+ union {
+ ovs_be16 port;
+ struct {
+ ovs_be16 icmp_id;
+ uint8_t icmp_type;
+ uint8_t icmp_code;
+ };
+ };
};
/* Changes to this structure need to be reflected in conn_key_hash() */
@@ -83,6 +90,8 @@ struct ct_l4_proto {
extern struct ct_l4_proto ct_proto_tcp;
extern struct ct_l4_proto ct_proto_other;
+extern struct ct_l4_proto ct_proto_icmp4;
+extern struct ct_l4_proto ct_proto_icmp6;
extern long long ct_timeout_val[];
@@ -78,8 +78,8 @@ static void *clean_thread_main(void *f_);
static struct ct_l4_proto *l4_protos[] = {
[IPPROTO_TCP] = &ct_proto_tcp,
[IPPROTO_UDP] = &ct_proto_other,
- [IPPROTO_ICMP] = &ct_proto_other,
- [IPPROTO_ICMPV6] = &ct_proto_other,
+ [IPPROTO_ICMP] = &ct_proto_icmp4,
+ [IPPROTO_ICMPV6] = &ct_proto_icmp6,
};
long long ct_timeout_val[] = {
@@ -693,6 +693,29 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size)
static inline bool extract_l4(struct conn_key *key, const void *data,
size_t size, bool *related, const void *l3);
+static uint8_t
+reverse_icmp_type(uint8_t type)
+{
+ switch (type) {
+ case ICMP4_ECHO_REQUEST:
+ return ICMP4_ECHO_REPLY;
+ case ICMP4_ECHO_REPLY:
+ return ICMP4_ECHO_REQUEST;
+
+ case ICMP4_TIMESTAMP:
+ return ICMP4_TIMESTAMPREPLY;
+ case ICMP4_TIMESTAMPREPLY:
+ return ICMP4_TIMESTAMP;
+
+ case ICMP4_INFOREQUEST:
+ return ICMP4_INFOREPLY;
+ case ICMP4_INFOREPLY:
+ return ICMP4_INFOREQUEST;
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
/* If 'related' is not NULL and the function is processing an ICMP
* error packet, extract the l3 and l4 fields from the nested header
* instead and set *related to true. If 'related' is NULL we're
@@ -715,8 +738,13 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
case ICMP4_TIMESTAMPREPLY:
case ICMP4_INFOREQUEST:
case ICMP4_INFOREPLY:
+ if (icmp->icmp_code != 0) {
+ return false;
+ }
/* Separate ICMP connection: identified using id */
- key->src.port = key->dst.port = icmp->icmp_fields.echo.id;
+ key->src.icmp_id = key->dst.icmp_id = icmp->icmp_fields.echo.id;
+ key->src.icmp_type = icmp->icmp_type;
+ key->dst.icmp_type = reverse_icmp_type(icmp->icmp_type);
break;
case ICMP4_DST_UNREACH:
case ICMP4_TIME_EXCEEDED:
@@ -766,6 +794,19 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
return true;
}
+static uint8_t
+reverse_icmp6_type(uint8_t type)
+{
+ switch (type) {
+ case ICMP6_ECHO_REQUEST:
+ return ICMP6_ECHO_REPLY;
+ case ICMP6_ECHO_REPLY:
+ return ICMP6_ECHO_REQUEST;
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
/* If 'related' is not NULL and the function is processing an ICMP
* error packet, extract the l3 and l4 fields from the nested header
* instead and set *related to true. If 'related' is NULL we're
@@ -786,8 +827,13 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
switch (icmp6->icmp6_type) {
case ICMP6_ECHO_REQUEST:
case ICMP6_ECHO_REPLY:
+ if (icmp6->icmp6_code != 0) {
+ return false;
+ }
/* Separate ICMP connection: identified using id */
- key->src.port = key->dst.port = *(ovs_be16 *) (icmp6 + 1);
+ key->src.icmp_id = key->dst.icmp_id = *(ovs_be16 *) (icmp6 + 1);
+ key->src.icmp_type = icmp6->icmp6_type;
+ key->dst.icmp_type = reverse_icmp6_type(icmp6->icmp6_type);
break;
case ICMP6_DST_UNREACH:
case ICMP6_PACKET_TOO_BIG:
@@ -980,6 +1026,7 @@ static void
conn_key_reverse(struct conn_key *key)
{
struct ct_endpoint tmp;
+
tmp = key->src;
key->src = key->dst;
key->dst = tmp;
@@ -1079,10 +1126,9 @@ conn_key_to_tuple(const struct conn_key *key, struct ct_dpif_tuple *tuple)
key->dl_type);
if (key->nw_proto == IPPROTO_ICMP || key->nw_proto == IPPROTO_ICMPV6) {
- tuple->icmp_id = key->src.port;
- /* ICMP type and code are not tracked */
- tuple->icmp_type = 0;
- tuple->icmp_code = 0;
+ tuple->icmp_id = key->src.icmp_id;
+ tuple->icmp_type = key->src.icmp_type;
+ tuple->icmp_code = key->src.icmp_code;
} else {
tuple->src_port = key->src.port;
tuple->dst_port = key->dst.port;
@@ -130,6 +130,8 @@ static inline void ct_lock_destroy(struct ct_lock *lock)
CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
+ CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
+ CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
/* The smallest of the above values: it is used as an upper bound for the
* interval between two rounds of cleanup of expired entries */
@@ -136,14 +136,14 @@ ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
ct_dpif_format_ipproto(ds, entry->tuple_orig.ip_proto);
ds_put_cstr(ds, ",orig=(");
- ct_dpif_format_tuple(ds, &entry->tuple_orig, verbose);
+ ct_dpif_format_tuple(ds, &entry->tuple_orig);
if (print_stats) {
ct_dpif_format_counters(ds, &entry->counters_orig);
}
ds_put_cstr(ds, ")");
ds_put_cstr(ds, ",reply=(");
- ct_dpif_format_tuple(ds, &entry->tuple_reply, verbose);
+ ct_dpif_format_tuple(ds, &entry->tuple_reply);
if (print_stats) {
ct_dpif_format_counters(ds, &entry->counters_reply);
}
@@ -179,7 +179,7 @@ ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
ct_dpif_format_helper(ds, ",helper=", &entry->helper);
if (verbose && entry->tuple_master.l3_type != 0) {
ds_put_cstr(ds, ",master=(");
- ct_dpif_format_tuple(ds, &entry->tuple_master, verbose);
+ ct_dpif_format_tuple(ds, &entry->tuple_master);
ds_put_cstr(ds, ")");
}
}
@@ -227,17 +227,10 @@ ct_dpif_format_timestamp(struct ds *ds,
}
static void
-ct_dpif_format_tuple_icmp(struct ds *ds, const struct ct_dpif_tuple *tuple,
- bool verbose)
+ct_dpif_format_tuple_icmp(struct ds *ds, const struct ct_dpif_tuple *tuple)
{
- if (verbose) {
- ds_put_format(ds, ",id=%u,type=%u,code=%u",
- ntohs(tuple->icmp_id),
- tuple->icmp_type,
- tuple->icmp_code);
- } else {
- ds_put_format(ds, ",id=%u", ntohs(tuple->icmp_id));
- }
+ ds_put_format(ds, ",id=%u,type=%u,code=%u", ntohs(tuple->icmp_id),
+ tuple->icmp_type, tuple->icmp_code);
}
static void
@@ -248,8 +241,7 @@ ct_dpif_format_tuple_tp(struct ds *ds, const struct ct_dpif_tuple *tuple)
}
void
-ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple,
- bool verbose)
+ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple)
{
if (tuple->l3_type == AF_INET) {
ds_put_format(ds, "src="IP_FMT",dst="IP_FMT,
@@ -268,7 +260,7 @@ ct_dpif_format_tuple(struct ds *ds, const struct ct_dpif_tuple *tuple,
if (tuple->ip_proto == IPPROTO_ICMP
|| tuple->ip_proto == IPPROTO_ICMPV6) {
- ct_dpif_format_tuple_icmp(ds, tuple, verbose);
+ ct_dpif_format_tuple_icmp(ds, tuple);
} else {
ct_dpif_format_tuple_tp(ds, tuple);
}
@@ -183,7 +183,6 @@ int ct_dpif_flush(struct dpif *, const uint16_t *zone);
void ct_dpif_entry_uninit(struct ct_dpif_entry *);
void ct_dpif_format_entry(const struct ct_dpif_entry *, struct ds *,
bool verbose, bool print_stats);
-void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *,
- bool verbose);
+void ct_dpif_format_tuple(struct ds *, const struct ct_dpif_tuple *);
#endif /* CT_DPIF_H */
@@ -497,7 +497,7 @@ nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
struct ds ds;
ds_init(&ds);
- ct_dpif_format_tuple(&ds, tuple, true);
+ ct_dpif_format_tuple(&ds, tuple);
VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds));
ds_destroy(&ds);
@@ -634,7 +634,7 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0],
])
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>)
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0)
])
AT_CHECK([ovs-appctl dpctl/flush-conntrack])
@@ -686,7 +686,7 @@ NS_CHECK_EXEC([at_ns0], [ping6 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING], [0],
])
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(fc00::2)], [0], [dnl
-icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>),reply=(src=fc00::2,dst=fc00::1,id=<cleared>)
+icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>,type=128,code=0),reply=(src=fc00::2,dst=fc00::1,id=<cleared>,type=129,code=0)
])
OVS_TRAFFIC_VSWITCHD_STOP
@@ -1027,8 +1027,8 @@ dnl (again) HTTP requests from root namespace to p0 should work fine.
AT_CHECK([wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | grep "zone"], [0], [dnl
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=1
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=2
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=1
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=2
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=1,protoinfo=(state=<cleared>)
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=2,protoinfo=(state=<cleared>)
])
@@ -1138,8 +1138,8 @@ dnl (again) HTTP requests from root namespace to p0 should work fine.
AT_CHECK([wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | grep "zone"], [0], [dnl
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=1
-icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>),zone=65534
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=1
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=<cleared>,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=<cleared>,type=0,code=0),zone=65534
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=1,protoinfo=(state=<cleared>)
tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),zone=65534,protoinfo=(state=<cleared>)
])