Message ID | 20161220202507.112241-2-diproiettod@vmware.com |
---|---|
State | Superseded |
Headers | show |
On 12/20/16, 12:25 PM, "ovs-dev-bounces@openvswitch.org on behalf of Daniele Di Proietto" <ovs-dev-bounces@openvswitch.org on behalf of diproiettod@vmware.com> wrote:
The userspace connection tracker treats Neighbor Discovery packets
as invalid, because they're not checked against any connection.
This in inconsistent with the kernel connection tracker which always
returns 'CS_NEW'.
Therefore, this commit makes the userspace connection tracker conforming
with the kernel. ND packets still do not create or read any state, but
they're treated as NEW.
The kernel opens a security hole by default for ND packets – presumably for
“user friendliness”.
This patch follows that approach for consistency, so it seems like a tradeoff.
To support this, the key extraction functions can now return
KEY_NO_TRACK, meaning that the packet is ok,
It does not seem the code below does much checking for “ok”.
but it should be treated
statelessly.
We also have to remove a test that explicitly checked that neighbor
discovery was treated as invalid.
Reported-by: Sridhar Gaddam <sgaddam@redhat.com>
Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
---
lib/conntrack.c | 134 ++++++++++++++++++++++++++++++++----------------
tests/ofproto-dpif.at | 32 ------------
tests/system-traffic.at | 35 +++++++++++++
3 files changed, 125 insertions(+), 76 deletions(-)
diff --git a/lib/conntrack.c b/lib/conntrack.c
index d459321..30f1ed0 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -52,9 +52,17 @@ struct conn_lookup_ctx {
bool related;
};
-static bool conn_key_extract(struct conntrack *, struct dp_packet *,
- ovs_be16 dl_type, struct conn_lookup_ctx *,
- uint16_t zone);
+enum key_status {
+ KEY_INVALID, /* Could not extract the connection key: invalid. */
+ KEY_OK, /* Connection key is ok. */
+ KEY_NO_TRACK, /* Connection key is ok, but it should not be tracked. */
+};
+
+static enum key_status conn_key_extract(struct conntrack *,
+ struct dp_packet *,
+ ovs_be16 dl_type,
+ struct conn_lookup_ctx *,
+ uint16_t zone);
static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
static void conn_key_reverse(struct conn_key *);
static void conn_key_lookup(struct conntrack_bucket *ctb,
@@ -157,6 +165,20 @@ static unsigned hash_to_bucket(uint32_t hash)
return (hash >> (32 - CONNTRACK_BUCKETS_SHIFT)) % CONNTRACK_BUCKETS;
}
+static uint16_t
+key_status_to_cs(enum key_status s)
+{
+ switch (s) {
+ case KEY_INVALID:
+ return CS_INVALID;
+ case KEY_OK:
+ case KEY_NO_TRACK:
+ return CS_NEW;
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
static void
write_ct_md(struct dp_packet *pkt, uint16_t state, uint16_t zone,
uint32_t mark, ovs_u128 label)
@@ -301,10 +323,13 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
memset(bucket_list, INT8_C(-1), sizeof bucket_list);
for (i = 0; i < cnt; i++) {
+ enum key_status extract_res;
unsigned bucket;
- if (!conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone)) {
- write_ct_md(pkts[i], CS_INVALID, zone, 0, OVS_U128_ZERO);
+ extract_res = conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone);
+ if (extract_res != KEY_OK) {
+ write_ct_md(pkts[i], key_status_to_cs(extract_res), zone, 0,
+ OVS_U128_ZERO);
continue;
}
@@ -691,8 +716,11 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size)
return key->src.port && key->dst.port;
}
-static inline bool extract_l4(struct conn_key *key, const void *data,
- size_t size, bool *related, const void *l3);
+static inline enum key_status extract_l4(struct conn_key *key,
+ const void *data,
+ size_t size,
+ bool *related,
+ const void *l3);
static uint8_t
reverse_icmp_type(uint8_t type)
@@ -722,14 +750,14 @@ reverse_icmp_type(uint8_t type)
* instead and set *related to true. If 'related' is NULL we're
* already processing a nested header and no such recursion is
* possible */
-static inline int
+static inline enum key_status
extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
bool *related)
{
const struct icmp_header *icmp = data;
if (OVS_UNLIKELY(size < ICMP_HEADER_LEN)) {
- return false;
+ return KEY_INVALID;
}
switch (icmp->icmp_type) {
@@ -740,13 +768,15 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
case ICMP4_INFOREQUEST:
case ICMP4_INFOREPLY:
if (icmp->icmp_code != 0) {
- return false;
+ return KEY_INVALID;
}
/* Separate ICMP connection: identified using id */
key->src.icmp_id = key->dst.icmp_id = icmp->icmp_fields.echo.id;
key->src.icmp_type = icmp->icmp_type;
key->dst.icmp_type = reverse_icmp_type(icmp->icmp_type);
- break;
+
+ return KEY_OK;
+
case ICMP4_DST_UNREACH:
case ICMP4_TIME_EXCEEDED:
case ICMP4_PARAM_PROB:
@@ -758,41 +788,40 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
const char *l3 = (const char *) (icmp + 1);
const char *tail = (const char *) data + size;
const char *l4;
+ enum key_status res;
bool ok;
if (!related) {
- return false;
+ return KEY_INVALID;
}
memset(&inner_key, 0, sizeof inner_key);
inner_key.dl_type = htons(ETH_TYPE_IP);
ok = extract_l3_ipv4(&inner_key, l3, tail - l3, &l4, false);
if (!ok) {
- return false;
+ return KEY_INVALID;
}
/* pf doesn't do this, but it seems a good idea */
if (inner_key.src.addr.ipv4_aligned != key->dst.addr.ipv4_aligned
|| inner_key.dst.addr.ipv4_aligned != key->src.addr.ipv4_aligned) {
- return false;
+ return KEY_INVALID;
}
key->src = inner_key.src;
key->dst = inner_key.dst;
key->nw_proto = inner_key.nw_proto;
- ok = extract_l4(key, l4, tail - l4, NULL, l3);
- if (ok) {
+ res = extract_l4(key, l4, tail - l4, NULL, l3);
+ if (res != KEY_INVALID) {
conn_key_reverse(key);
*related = true;
}
- return ok;
+ return res;
}
default:
- return false;
+ return KEY_INVALID;
}
-
- return true;
}
static uint8_t
@@ -813,7 +842,7 @@ reverse_icmp6_type(uint8_t type)
* instead and set *related to true. If 'related' is NULL we're
* already processing a nested header and no such recursion is
* possible */
-static inline bool
+static inline enum key_status
extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
bool *related)
{
@@ -822,20 +851,22 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
/* All the messages that we support need at least 4 bytes after
* the header */
if (size < sizeof *icmp6 + 4) {
- return false;
+ return KEY_INVALID;
}
switch (icmp6->icmp6_type) {
case ICMP6_ECHO_REQUEST:
case ICMP6_ECHO_REPLY:
if (icmp6->icmp6_code != 0) {
- return false;
+ return KEY_INVALID;
}
/* Separate ICMP connection: identified using id */
key->src.icmp_id = key->dst.icmp_id = *(ovs_be16 *) (icmp6 + 1);
key->src.icmp_type = icmp6->icmp6_type;
key->dst.icmp_type = reverse_icmp6_type(icmp6->icmp6_type);
- break;
+
+ return KEY_OK;
+
case ICMP6_DST_UNREACH:
case ICMP6_PACKET_TOO_BIG:
case ICMP6_TIME_EXCEEDED:
@@ -846,17 +877,18 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
const char *l3 = (const char *) icmp6 + 8;
const char *tail = (const char *) data + size;
const char *l4 = NULL;
+ enum key_status res;
bool ok;
if (!related) {
- return false;
+ return KEY_INVALID;
}
memset(&inner_key, 0, sizeof inner_key);
inner_key.dl_type = htons(ETH_TYPE_IPV6);
ok = extract_l3_ipv6(&inner_key, l3, tail - l3, &l4);
if (!ok) {
- return false;
+ return KEY_INVALID;
}
/* pf doesn't do this, but it seems a good idea */
@@ -864,25 +896,32 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
&key->dst.addr.ipv6_aligned)
|| !ipv6_addr_equals(&inner_key.dst.addr.ipv6_aligned,
&key->src.addr.ipv6_aligned)) {
- return false;
+ return KEY_INVALID;
}
key->src = inner_key.src;
key->dst = inner_key.dst;
key->nw_proto = inner_key.nw_proto;
- ok = extract_l4(key, l4, tail - l4, NULL, l3);
- if (ok) {
+ res = extract_l4(key, l4, tail - l4, NULL, l3);
+ if (res != KEY_INVALID) {
conn_key_reverse(key);
*related = true;
}
- return ok;
+ return res;
}
+ case ND_NEIGHBOR_SOLICIT:
+ case ND_NEIGHBOR_ADVERT:
+ case ND_ROUTER_SOLICIT:
+ case ND_ROUTER_ADVERT:
+ if (icmp6->icmp6_code != 0) {
+ return KEY_INVALID;
+ }
+ /* The packet is valid, but it's not going to be tracked. */
Do we want to say “The packet is valid” ? based on a verification being half-baked/partial.
or simply we are not going to track something that looks like a ND packet ?
Maybe, just check the type and say KEY_NO_TRACK, meaning “We certify nothing.”.
+ return KEY_NO_TRACK;
default:
- return false;
+ return KEY_INVALID;
}
-
- return true;
}
/* Extract l4 fields into 'key', which must already contain valid l3
@@ -894,30 +933,34 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
*
* If 'related' is NULL, it means that we're already parsing a header nested
* in an ICMP error. In this case, we skip checksum and length validation. */
-static inline bool
+static inline enum key_status
extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
const void *l3)
{
if (key->nw_proto == IPPROTO_TCP) {
return (!related || check_l4_tcp(key, data, size, l3))
- && extract_l4_tcp(key, data, size);
+ ? extract_l4_tcp(key, data, size)
+ : KEY_INVALID;
} else if (key->nw_proto == IPPROTO_UDP) {
return (!related || check_l4_udp(key, data, size, l3))
- && extract_l4_udp(key, data, size);
+ ? extract_l4_udp(key, data, size)
+ : KEY_INVALID;
} else if (key->dl_type == htons(ETH_TYPE_IP)
&& key->nw_proto == IPPROTO_ICMP) {
return (!related || check_l4_icmp(data, size))
- && extract_l4_icmp(key, data, size, related);
+ ? extract_l4_icmp(key, data, size, related)
+ : KEY_INVALID;
} else if (key->dl_type == htons(ETH_TYPE_IPV6)
&& key->nw_proto == IPPROTO_ICMPV6) {
return (!related || check_l4_icmp6(key, data, size, l3))
- && extract_l4_icmp6(key, data, size, related);
+ ? extract_l4_icmp6(key, data, size, related)
+ : KEY_INVALID;
} else {
- return false;
+ return KEY_INVALID;
}
}
-static bool
+static enum key_status
conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
struct conn_lookup_ctx *ctx, uint16_t zone)
{
@@ -930,7 +973,7 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
memset(ctx, 0, sizeof *ctx);
if (!l2 || !l3 || !l4) {
- return false;
+ return KEY_INVALID;
}
ctx->key.zone = zone;
@@ -977,13 +1020,16 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
}
if (ok) {
- if (extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3)) {
+ enum key_status res;
+
+ res = extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3);
+ if (res == KEY_OK) {
ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
- return true;
}
+ return res;
}
- return false;
+ return KEY_INVALID;
}
?
/* Symmetric */
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 07a1675..a63f031 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -8608,38 +8608,6 @@ OFPT_ECHO_REQUEST (xid=0x0): 0 bytes of payload
OVS_VSWITCHD_STOP
AT_CLEANUP
-AT_SETUP([ofproto-dpif - conntrack - untrackable traffic])
-OVS_VSWITCHD_START
-
-add_of_ports br0 1 2
-
-AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info])
-
-AT_DATA([flows.txt], [dnl
-ipv6,ct_state=-trk,action=ct(table=0,zone=0)
-ct_state=+trk,action=controller
-])
-
-AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
-
-AT_CAPTURE_FILE([ofctl_monitor.log])
-AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log])
-
-AT_CHECK([ovs-appctl time/stop])
-
-AT_CHECK([ovs-appctl netdev-dummy/receive p2 '0060970769ea0000860580da86dd6000000000203afffe80000000000000020086fffe0580dafe80000000000000026097fffe0769ea870068bd00000000fe80000000000000026097fffe0769ea01010000860580da'])
-
-OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 1])
-OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit])
-
-AT_CHECK([cat ofctl_monitor.log], [0], [dnl
-NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=86 ct_state=inv|trk,in_port=2 (via action) data_len=86 (unbuffered)
-icmp6,vlan_tci=0x0000,dl_src=00:00:86:05:80:da,dl_dst=00:60:97:07:69:ea,ipv6_src=fe80::200:86ff:fe05:80da,ipv6_dst=fe80::260:97ff:fe07:69ea,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fe80::260:97ff:fe07:69ea,nd_sll=00:00:86:05:80:da,nd_tll=00:00:00:00:00:00 icmp6_csum:68bd
-])
-
-OVS_VSWITCHD_STOP
-AT_CLEANUP
-
AT_SETUP([ofproto-dpif - conntrack - zones])
OVS_VSWITCHD_START
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 2a7575c..fba5f1f 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -785,6 +785,41 @@ icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>,type=128,code=0),reply=(src=fc
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - neighbor discovery])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START()
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+table=0,priority=1,action=drop
+table=0,priority=100,ip6,action=ct(table=1)
+dnl
+table=1,priority=100,ip6,action=controller
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+AT_CAPTURE_FILE([ofctl_monitor.log])
+AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log])
+
+dnl e6:6f:38:26:34:c4 > 33:33:ff:00:00:02, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::1 > ff02::1:ff00:2: [icmp6 sum ok] ICMP6, neighbor solicitation, length 32, who has fc00::2
+dnl source link-address option (1), length 8 (1): e6:6f:38:26:34:c4
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' '3333ff000002e66f382634c486dd6000000000203afffc000000000000000000000000000001ff0200000000000000000001ff00000287002e3e00000000fc0000000000000000000000000000020101e66f382634c4'])
+
+dnl 56:5b:b3:68:8f:09 > e6:6f:38:26:34:c4, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::2 > fc00::1: [icmp6 sum ok] ICMP6, neighbor advertisement, length 32, tgt is fc00::2, Flags [solicited, override]
+dnl destination link-address option (2), length 8 (1): 56:5b:b3:68:8f:09
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' 'e66f382634c4565bb3688f0986dd6000000000203afffc000000000000000000000000000002fc000000000000000000000000000001880088ce60000000fc0000000000000000000000000000020201565bb3688f09'])
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered)
+icmp6,vlan_tci=0x0000,dl_src=e6:6f:38:26:34:c4,dl_dst=33:33:ff:00:00:02,ipv6_src=fc00::1,ipv6_dst=ff02::1:ff00:2,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fc00::2,nd_sll=e6:6f:38:26:34:c4,nd_tll=00:00:00:00:00:00 icmp6_csum:2e3e
+NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered)
+icmp6,vlan_tci=0x0000,dl_src=56:5b:b3:68:8f:09,dl_dst=e6:6f:38:26:34:c4,ipv6_src=fc00::2,ipv6_dst=fc00::1,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=136,icmp_code=0,nd_target=fc00::2,nd_sll=00:00:00:00:00:00,nd_tll=56:5b:b3:68:8f:09 icmp6_csum:88ce
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([conntrack - commit, recirc])
CHECK_CONNTRACK()
OVS_TRAFFIC_VSWITCHD_START()
--
2.10.2
_______________________________________________
dev mailing list
dev@openvswitch.org
https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DgICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=jT8R998Qlcz08PzMWXHlccbmTb60CDwaOsnwVQxg7EU&s=kyG0bhmEnHFmHbIAyoVIaaoWFw9QsRnaYwOG15MchPI&e=
On 20 December 2016 at 14:46, Darrell Ball <dball@vmware.com> wrote: > > > On 12/20/16, 12:25 PM, "ovs-dev-bounces@openvswitch.org on behalf of Daniele Di Proietto" <ovs-dev-bounces@openvswitch.org on behalf of diproiettod@vmware.com> wrote: > @@ -864,25 +896,32 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > &key->dst.addr.ipv6_aligned) > || !ipv6_addr_equals(&inner_key.dst.addr.ipv6_aligned, > &key->src.addr.ipv6_aligned)) { > - return false; > + return KEY_INVALID; > } > > key->src = inner_key.src; > key->dst = inner_key.dst; > key->nw_proto = inner_key.nw_proto; > > - ok = extract_l4(key, l4, tail - l4, NULL, l3); > - if (ok) { > + res = extract_l4(key, l4, tail - l4, NULL, l3); > + if (res != KEY_INVALID) { > conn_key_reverse(key); > *related = true; > } > - return ok; > + return res; > } > + case ND_NEIGHBOR_SOLICIT: > + case ND_NEIGHBOR_ADVERT: > + case ND_ROUTER_SOLICIT: > + case ND_ROUTER_ADVERT: > + if (icmp6->icmp6_code != 0) { > + return KEY_INVALID; > + } > + /* The packet is valid, but it's not going to be tracked. */ > > Do we want to say “The packet is valid” ? based on a verification being half-baked/partial. > or simply we are not going to track something that looks like a ND packet ? > > Maybe, just check the type and say KEY_NO_TRACK, meaning “We certify nothing.”. I agree at this layer, I think it would be a bit clearer to return something like KEY_NO_TRACK. When you get closer to the OpenFlow layer though, I think it's reasonable to end up converting that into something consistent with the Linux kernel implementation - that is, mark it as NEW and let the OpenFlow rule writer decide what to do with it. It's a subtle difference in semantics, but what we have in the OpenFlow API is an "INVALID" bit, which says that we've detected something that's pretty clearly invalid. We don't have a corresponding "valid" bit that certifies everything is guaranteed to be rosy. If someone is implementing a firewall with this feature, then they should consider whether this CT action is providing enough guarantees around the traffic identification for filtering, and therefore whether they should perform further validation on the packets. IMO that's a bit out of scope of CT action at this stage.
On 20/12/2016 14:46, "Darrell Ball" <dball@vmware.com> wrote: > > >On 12/20/16, 12:25 PM, "ovs-dev-bounces@openvswitch.org on behalf of Daniele Di Proietto" <ovs-dev-bounces@openvswitch.org on behalf of diproiettod@vmware.com> wrote: > > The userspace connection tracker treats Neighbor Discovery packets > as invalid, because they're not checked against any connection. > > This in inconsistent with the kernel connection tracker which always > returns 'CS_NEW'. > > Therefore, this commit makes the userspace connection tracker conforming > with the kernel. ND packets still do not create or read any state, but > they're treated as NEW. > >The kernel opens a security hole by default for ND packets – presumably for >“user friendliness”. >This patch follows that approach for consistency, so it seems like a tradeoff. > > To support this, the key extraction functions can now return > KEY_NO_TRACK, meaning that the packet is ok, > >It does not seem the code below does much checking for “ok”. I don't think sending ND to the connection tracker, both in kernel and userspace makes much sense. The connection tracker should only handle stateful decisions, everything else can be handled much better with stateless OpenFlow. Therefore, from my perspective, it doesn't make a big difference if we return NEW or INVALID, because I think that the pipeline shouldn't use ct action on neighbor discovery. I choose NEW here, as you point out, to be consistent with the kernel. I don't think the kernel connection tracker does any checking on ND (please correct me if I'm wrong, I haven't checked the code because it's GPL), other than checksum validation, which we do as well in userspace. Alternatively, what do you think we should check that could be useful? > but it should be treated > statelessly. > > We also have to remove a test that explicitly checked that neighbor > discovery was treated as invalid. > > Reported-by: Sridhar Gaddam <sgaddam@redhat.com> > Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com> > --- > lib/conntrack.c | 134 ++++++++++++++++++++++++++++++++---------------- > tests/ofproto-dpif.at | 32 ------------ > tests/system-traffic.at | 35 +++++++++++++ > 3 files changed, 125 insertions(+), 76 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index d459321..30f1ed0 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -52,9 +52,17 @@ struct conn_lookup_ctx { > bool related; > }; > > -static bool conn_key_extract(struct conntrack *, struct dp_packet *, > - ovs_be16 dl_type, struct conn_lookup_ctx *, > - uint16_t zone); > +enum key_status { > + KEY_INVALID, /* Could not extract the connection key: invalid. */ > + KEY_OK, /* Connection key is ok. */ > + KEY_NO_TRACK, /* Connection key is ok, but it should not be tracked. */ > +}; > + > +static enum key_status conn_key_extract(struct conntrack *, > + struct dp_packet *, > + ovs_be16 dl_type, > + struct conn_lookup_ctx *, > + uint16_t zone); > static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis); > static void conn_key_reverse(struct conn_key *); > static void conn_key_lookup(struct conntrack_bucket *ctb, > @@ -157,6 +165,20 @@ static unsigned hash_to_bucket(uint32_t hash) > return (hash >> (32 - CONNTRACK_BUCKETS_SHIFT)) % CONNTRACK_BUCKETS; > } > > +static uint16_t > +key_status_to_cs(enum key_status s) > +{ > + switch (s) { > + case KEY_INVALID: > + return CS_INVALID; > + case KEY_OK: > + case KEY_NO_TRACK: > + return CS_NEW; > + default: > + OVS_NOT_REACHED(); > + } > +} > + > static void > write_ct_md(struct dp_packet *pkt, uint16_t state, uint16_t zone, > uint32_t mark, ovs_u128 label) > @@ -301,10 +323,13 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, > > memset(bucket_list, INT8_C(-1), sizeof bucket_list); > for (i = 0; i < cnt; i++) { > + enum key_status extract_res; > unsigned bucket; > > - if (!conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone)) { > - write_ct_md(pkts[i], CS_INVALID, zone, 0, OVS_U128_ZERO); > + extract_res = conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone); > + if (extract_res != KEY_OK) { > + write_ct_md(pkts[i], key_status_to_cs(extract_res), zone, 0, > + OVS_U128_ZERO); > continue; > } > > @@ -691,8 +716,11 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size) > return key->src.port && key->dst.port; > } > > -static inline bool extract_l4(struct conn_key *key, const void *data, > - size_t size, bool *related, const void *l3); > +static inline enum key_status extract_l4(struct conn_key *key, > + const void *data, > + size_t size, > + bool *related, > + const void *l3); > > static uint8_t > reverse_icmp_type(uint8_t type) > @@ -722,14 +750,14 @@ reverse_icmp_type(uint8_t type) > * instead and set *related to true. If 'related' is NULL we're > * already processing a nested header and no such recursion is > * possible */ > -static inline int > +static inline enum key_status > extract_l4_icmp(struct conn_key *key, const void *data, size_t size, > bool *related) > { > const struct icmp_header *icmp = data; > > if (OVS_UNLIKELY(size < ICMP_HEADER_LEN)) { > - return false; > + return KEY_INVALID; > } > > switch (icmp->icmp_type) { > @@ -740,13 +768,15 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size, > case ICMP4_INFOREQUEST: > case ICMP4_INFOREPLY: > if (icmp->icmp_code != 0) { > - return false; > + return KEY_INVALID; > } > /* Separate ICMP connection: identified using id */ > key->src.icmp_id = key->dst.icmp_id = icmp->icmp_fields.echo.id; > key->src.icmp_type = icmp->icmp_type; > key->dst.icmp_type = reverse_icmp_type(icmp->icmp_type); > - break; > + > + return KEY_OK; > + > case ICMP4_DST_UNREACH: > case ICMP4_TIME_EXCEEDED: > case ICMP4_PARAM_PROB: > @@ -758,41 +788,40 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size, > const char *l3 = (const char *) (icmp + 1); > const char *tail = (const char *) data + size; > const char *l4; > + enum key_status res; > bool ok; > > if (!related) { > - return false; > + return KEY_INVALID; > } > > memset(&inner_key, 0, sizeof inner_key); > inner_key.dl_type = htons(ETH_TYPE_IP); > ok = extract_l3_ipv4(&inner_key, l3, tail - l3, &l4, false); > if (!ok) { > - return false; > + return KEY_INVALID; > } > > /* pf doesn't do this, but it seems a good idea */ > if (inner_key.src.addr.ipv4_aligned != key->dst.addr.ipv4_aligned > || inner_key.dst.addr.ipv4_aligned != key->src.addr.ipv4_aligned) { > - return false; > + return KEY_INVALID; > } > > key->src = inner_key.src; > key->dst = inner_key.dst; > key->nw_proto = inner_key.nw_proto; > > - ok = extract_l4(key, l4, tail - l4, NULL, l3); > - if (ok) { > + res = extract_l4(key, l4, tail - l4, NULL, l3); > + if (res != KEY_INVALID) { > conn_key_reverse(key); > *related = true; > } > - return ok; > + return res; > } > default: > - return false; > + return KEY_INVALID; > } > - > - return true; > } > > static uint8_t > @@ -813,7 +842,7 @@ reverse_icmp6_type(uint8_t type) > * instead and set *related to true. If 'related' is NULL we're > * already processing a nested header and no such recursion is > * possible */ > -static inline bool > +static inline enum key_status > extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > bool *related) > { > @@ -822,20 +851,22 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > /* All the messages that we support need at least 4 bytes after > * the header */ > if (size < sizeof *icmp6 + 4) { > - return false; > + return KEY_INVALID; > } > > switch (icmp6->icmp6_type) { > case ICMP6_ECHO_REQUEST: > case ICMP6_ECHO_REPLY: > if (icmp6->icmp6_code != 0) { > - return false; > + return KEY_INVALID; > } > /* Separate ICMP connection: identified using id */ > key->src.icmp_id = key->dst.icmp_id = *(ovs_be16 *) (icmp6 + 1); > key->src.icmp_type = icmp6->icmp6_type; > key->dst.icmp_type = reverse_icmp6_type(icmp6->icmp6_type); > - break; > + > + return KEY_OK; > + > case ICMP6_DST_UNREACH: > case ICMP6_PACKET_TOO_BIG: > case ICMP6_TIME_EXCEEDED: > @@ -846,17 +877,18 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > const char *l3 = (const char *) icmp6 + 8; > const char *tail = (const char *) data + size; > const char *l4 = NULL; > + enum key_status res; > bool ok; > > if (!related) { > - return false; > + return KEY_INVALID; > } > > memset(&inner_key, 0, sizeof inner_key); > inner_key.dl_type = htons(ETH_TYPE_IPV6); > ok = extract_l3_ipv6(&inner_key, l3, tail - l3, &l4); > if (!ok) { > - return false; > + return KEY_INVALID; > } > > /* pf doesn't do this, but it seems a good idea */ > @@ -864,25 +896,32 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > &key->dst.addr.ipv6_aligned) > || !ipv6_addr_equals(&inner_key.dst.addr.ipv6_aligned, > &key->src.addr.ipv6_aligned)) { > - return false; > + return KEY_INVALID; > } > > key->src = inner_key.src; > key->dst = inner_key.dst; > key->nw_proto = inner_key.nw_proto; > > - ok = extract_l4(key, l4, tail - l4, NULL, l3); > - if (ok) { > + res = extract_l4(key, l4, tail - l4, NULL, l3); > + if (res != KEY_INVALID) { > conn_key_reverse(key); > *related = true; > } > - return ok; > + return res; > } > + case ND_NEIGHBOR_SOLICIT: > + case ND_NEIGHBOR_ADVERT: > + case ND_ROUTER_SOLICIT: > + case ND_ROUTER_ADVERT: > + if (icmp6->icmp6_code != 0) { > + return KEY_INVALID; > + } > + /* The packet is valid, but it's not going to be tracked. */ > >Do we want to say “The packet is valid” ? based on a verification being half-baked/partial. >or simply we are not going to track something that looks like a ND packet ? > >Maybe, just check the type and say KEY_NO_TRACK, meaning “We certify nothing.”. Ok, I can change the comment, if that's what you mean. > > > + return KEY_NO_TRACK; > default: > - return false; > + return KEY_INVALID; > } > - > - return true; > } > > /* Extract l4 fields into 'key', which must already contain valid l3 > @@ -894,30 +933,34 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > * > * If 'related' is NULL, it means that we're already parsing a header nested > * in an ICMP error. In this case, we skip checksum and length validation. */ > -static inline bool > +static inline enum key_status > extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, > const void *l3) > { > if (key->nw_proto == IPPROTO_TCP) { > return (!related || check_l4_tcp(key, data, size, l3)) > - && extract_l4_tcp(key, data, size); > + ? extract_l4_tcp(key, data, size) > + : KEY_INVALID; > } else if (key->nw_proto == IPPROTO_UDP) { > return (!related || check_l4_udp(key, data, size, l3)) > - && extract_l4_udp(key, data, size); > + ? extract_l4_udp(key, data, size) > + : KEY_INVALID; > } else if (key->dl_type == htons(ETH_TYPE_IP) > && key->nw_proto == IPPROTO_ICMP) { > return (!related || check_l4_icmp(data, size)) > - && extract_l4_icmp(key, data, size, related); > + ? extract_l4_icmp(key, data, size, related) > + : KEY_INVALID; > } else if (key->dl_type == htons(ETH_TYPE_IPV6) > && key->nw_proto == IPPROTO_ICMPV6) { > return (!related || check_l4_icmp6(key, data, size, l3)) > - && extract_l4_icmp6(key, data, size, related); > + ? extract_l4_icmp6(key, data, size, related) > + : KEY_INVALID; > } else { > - return false; > + return KEY_INVALID; > } > } > > -static bool > +static enum key_status > conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, > struct conn_lookup_ctx *ctx, uint16_t zone) > { > @@ -930,7 +973,7 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, > memset(ctx, 0, sizeof *ctx); > > if (!l2 || !l3 || !l4) { > - return false; > + return KEY_INVALID; > } > > ctx->key.zone = zone; > @@ -977,13 +1020,16 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, > } > > if (ok) { > - if (extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3)) { > + enum key_status res; > + > + res = extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3); > + if (res == KEY_OK) { > ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); > - return true; > } > + return res; > } > > - return false; > + return KEY_INVALID; > } > ? > /* Symmetric */ > diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at > index 07a1675..a63f031 100644 > --- a/tests/ofproto-dpif.at > +++ b/tests/ofproto-dpif.at > @@ -8608,38 +8608,6 @@ OFPT_ECHO_REQUEST (xid=0x0): 0 bytes of payload > OVS_VSWITCHD_STOP > AT_CLEANUP > > -AT_SETUP([ofproto-dpif - conntrack - untrackable traffic]) > -OVS_VSWITCHD_START > - > -add_of_ports br0 1 2 > - > -AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info]) > - > -AT_DATA([flows.txt], [dnl > -ipv6,ct_state=-trk,action=ct(table=0,zone=0) > -ct_state=+trk,action=controller > -]) > - > -AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > - > -AT_CAPTURE_FILE([ofctl_monitor.log]) > -AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log]) > - > -AT_CHECK([ovs-appctl time/stop]) > - > -AT_CHECK([ovs-appctl netdev-dummy/receive p2 '0060970769ea0000860580da86dd6000000000203afffe80000000000000020086fffe0580dafe80000000000000026097fffe0769ea870068bd00000000fe80000000000000026097fffe0769ea01010000860580da']) > - > -OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 1]) > -OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) > - > -AT_CHECK([cat ofctl_monitor.log], [0], [dnl > -NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=86 ct_state=inv|trk,in_port=2 (via action) data_len=86 (unbuffered) > -icmp6,vlan_tci=0x0000,dl_src=00:00:86:05:80:da,dl_dst=00:60:97:07:69:ea,ipv6_src=fe80::200:86ff:fe05:80da,ipv6_dst=fe80::260:97ff:fe07:69ea,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fe80::260:97ff:fe07:69ea,nd_sll=00:00:86:05:80:da,nd_tll=00:00:00:00:00:00 icmp6_csum:68bd > -]) > - > -OVS_VSWITCHD_STOP > -AT_CLEANUP > - > AT_SETUP([ofproto-dpif - conntrack - zones]) > OVS_VSWITCHD_START > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 2a7575c..fba5f1f 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -785,6 +785,41 @@ icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>,type=128,code=0),reply=(src=fc > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([conntrack - neighbor discovery]) > +CHECK_CONNTRACK() > +OVS_TRAFFIC_VSWITCHD_START() > + > +dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. > +AT_DATA([flows.txt], [dnl > +table=0,priority=1,action=drop > +table=0,priority=100,ip6,action=ct(table=1) > +dnl > +table=1,priority=100,ip6,action=controller > +]) > + > +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) > + > +AT_CAPTURE_FILE([ofctl_monitor.log]) > +AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log]) > + > +dnl e6:6f:38:26:34:c4 > 33:33:ff:00:00:02, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::1 > ff02::1:ff00:2: [icmp6 sum ok] ICMP6, neighbor solicitation, length 32, who has fc00::2 > +dnl source link-address option (1), length 8 (1): e6:6f:38:26:34:c4 > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' '3333ff000002e66f382634c486dd6000000000203afffc000000000000000000000000000001ff0200000000000000000001ff00000287002e3e00000000fc0000000000000000000000000000020101e66f382634c4']) > + > +dnl 56:5b:b3:68:8f:09 > e6:6f:38:26:34:c4, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::2 > fc00::1: [icmp6 sum ok] ICMP6, neighbor advertisement, length 32, tgt is fc00::2, Flags [solicited, override] > +dnl destination link-address option (2), length 8 (1): 56:5b:b3:68:8f:09 > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' 'e66f382634c4565bb3688f0986dd6000000000203afffc000000000000000000000000000002fc000000000000000000000000000001880088ce60000000fc0000000000000000000000000000020201565bb3688f09']) > + > +AT_CHECK([cat ofctl_monitor.log], [0], [dnl > +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered) > +icmp6,vlan_tci=0x0000,dl_src=e6:6f:38:26:34:c4,dl_dst=33:33:ff:00:00:02,ipv6_src=fc00::1,ipv6_dst=ff02::1:ff00:2,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fc00::2,nd_sll=e6:6f:38:26:34:c4,nd_tll=00:00:00:00:00:00 icmp6_csum:2e3e > +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered) > +icmp6,vlan_tci=0x0000,dl_src=56:5b:b3:68:8f:09,dl_dst=e6:6f:38:26:34:c4,ipv6_src=fc00::2,ipv6_dst=fc00::1,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=136,icmp_code=0,nd_target=fc00::2,nd_sll=00:00:00:00:00:00,nd_tll=56:5b:b3:68:8f:09 icmp6_csum:88ce > +]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([conntrack - commit, recirc]) > CHECK_CONNTRACK() > OVS_TRAFFIC_VSWITCHD_START() > -- > 2.10.2 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DgICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=jT8R998Qlcz08PzMWXHlccbmTb60CDwaOsnwVQxg7EU&s=kyG0bhmEnHFmHbIAyoVIaaoWFw9QsRnaYwOG15MchPI&e= > > > > > >
On 20/12/2016 17:14, "Joe Stringer" <joe@ovn.org> wrote: >On 20 December 2016 at 14:46, Darrell Ball <dball@vmware.com> wrote: >> >> >> On 12/20/16, 12:25 PM, "ovs-dev-bounces@openvswitch.org on behalf of Daniele Di Proietto" <ovs-dev-bounces@openvswitch.org on behalf of diproiettod@vmware.com> wrote: >> @@ -864,25 +896,32 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, >> &key->dst.addr.ipv6_aligned) >> || !ipv6_addr_equals(&inner_key.dst.addr.ipv6_aligned, >> &key->src.addr.ipv6_aligned)) { >> - return false; >> + return KEY_INVALID; >> } >> >> key->src = inner_key.src; >> key->dst = inner_key.dst; >> key->nw_proto = inner_key.nw_proto; >> >> - ok = extract_l4(key, l4, tail - l4, NULL, l3); >> - if (ok) { >> + res = extract_l4(key, l4, tail - l4, NULL, l3); >> + if (res != KEY_INVALID) { >> conn_key_reverse(key); >> *related = true; >> } >> - return ok; >> + return res; >> } >> + case ND_NEIGHBOR_SOLICIT: >> + case ND_NEIGHBOR_ADVERT: >> + case ND_ROUTER_SOLICIT: >> + case ND_ROUTER_ADVERT: >> + if (icmp6->icmp6_code != 0) { >> + return KEY_INVALID; >> + } >> + /* The packet is valid, but it's not going to be tracked. */ >> >> Do we want to say “The packet is valid” ? based on a verification being half-baked/partial. >> or simply we are not going to track something that looks like a ND packet ? >> >> Maybe, just check the type and say KEY_NO_TRACK, meaning “We certify nothing.”. > >I agree at this layer, I think it would be a bit clearer to return >something like KEY_NO_TRACK. The patch is already returning KEY_NO_TRACK (see below). It returns KEY_INVALID above if the ICMP code is wrong. Did I misinterpret your comment? > >When you get closer to the OpenFlow layer though, I think it's >reasonable to end up converting that into something consistent with >the Linux kernel implementation - that is, mark it as NEW and let the >OpenFlow rule writer decide what to do with it. It's a subtle >difference in semantics, but what we have in the OpenFlow API is an >"INVALID" bit, which says that we've detected something that's pretty >clearly invalid. We don't have a corresponding "valid" bit that >certifies everything is guaranteed to be rosy. If someone is >implementing a firewall with this feature, then they should consider >whether this CT action is providing enough guarantees around the >traffic identification for filtering, and therefore whether they >should perform further validation on the packets. IMO that's a bit >out of scope of CT action at this stage. I agree with you Thanks, Daniele
On 12/20/16, 5:37 PM, "Daniele Di Proietto" <diproiettod@vmware.com> wrote: On 20/12/2016 14:46, "Darrell Ball" <dball@vmware.com> wrote: > > >On 12/20/16, 12:25 PM, "ovs-dev-bounces@openvswitch.org on behalf of Daniele Di Proietto" <ovs-dev-bounces@openvswitch.org on behalf of diproiettod@vmware.com> wrote: > > The userspace connection tracker treats Neighbor Discovery packets > as invalid, because they're not checked against any connection. > > This in inconsistent with the kernel connection tracker which always > returns 'CS_NEW'. > > Therefore, this commit makes the userspace connection tracker conforming > with the kernel. ND packets still do not create or read any state, but > they're treated as NEW. > >The kernel opens a security hole by default for ND packets – presumably for >“user friendliness”. >This patch follows that approach for consistency, so it seems like a tradeoff. > > To support this, the key extraction functions can now return > KEY_NO_TRACK, meaning that the packet is ok, > >It does not seem the code below does much checking for “ok”. I don't think sending ND to the connection tracker, both in kernel and userspace makes much sense. The connection tracker should only handle stateful decisions, everything else can be handled much better with stateless OpenFlow. Therefore, from my perspective, it doesn't make a big difference if we return NEW or INVALID, because I think that the pipeline shouldn't use ct action on neighbor discovery. I choose NEW here, as you point out, to be consistent with the kernel. Originally, I was thinking INVALID is safer default behavior. But it a tradeoff with requiring the user to bypass conntrack for ND, which requires additional non-trivial knowledge. I don't think the kernel connection tracker does any checking on ND (please correct me if I'm wrong, I haven't checked the code because it's GPL), other than checksum validation, which we do as well in userspace. Alternatively, what do you think we should check that could be useful? I was not suggesting we check additional content if we are doing no_track option. See below. > but it should be treated > statelessly. > > We also have to remove a test that explicitly checked that neighbor > discovery was treated as invalid. > > Reported-by: Sridhar Gaddam <sgaddam@redhat.com> > Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com> > --- > lib/conntrack.c | 134 ++++++++++++++++++++++++++++++++---------------- > tests/ofproto-dpif.at | 32 ------------ > tests/system-traffic.at | 35 +++++++++++++ > 3 files changed, 125 insertions(+), 76 deletions(-) > > diff --git a/lib/conntrack.c b/lib/conntrack.c > index d459321..30f1ed0 100644 > --- a/lib/conntrack.c > +++ b/lib/conntrack.c > @@ -52,9 +52,17 @@ struct conn_lookup_ctx { > bool related; > }; > > -static bool conn_key_extract(struct conntrack *, struct dp_packet *, > - ovs_be16 dl_type, struct conn_lookup_ctx *, > - uint16_t zone); > +enum key_status { > + KEY_INVALID, /* Could not extract the connection key: invalid. */ > + KEY_OK, /* Connection key is ok. */ > + KEY_NO_TRACK, /* Connection key is ok, but it should not be tracked. */ > +}; > + > +static enum key_status conn_key_extract(struct conntrack *, > + struct dp_packet *, > + ovs_be16 dl_type, > + struct conn_lookup_ctx *, > + uint16_t zone); > static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis); > static void conn_key_reverse(struct conn_key *); > static void conn_key_lookup(struct conntrack_bucket *ctb, > @@ -157,6 +165,20 @@ static unsigned hash_to_bucket(uint32_t hash) > return (hash >> (32 - CONNTRACK_BUCKETS_SHIFT)) % CONNTRACK_BUCKETS; > } > > +static uint16_t > +key_status_to_cs(enum key_status s) > +{ > + switch (s) { > + case KEY_INVALID: > + return CS_INVALID; > + case KEY_OK: > + case KEY_NO_TRACK: > + return CS_NEW; > + default: > + OVS_NOT_REACHED(); > + } > +} > + > static void > write_ct_md(struct dp_packet *pkt, uint16_t state, uint16_t zone, > uint32_t mark, ovs_u128 label) > @@ -301,10 +323,13 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, > > memset(bucket_list, INT8_C(-1), sizeof bucket_list); > for (i = 0; i < cnt; i++) { > + enum key_status extract_res; > unsigned bucket; > > - if (!conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone)) { > - write_ct_md(pkts[i], CS_INVALID, zone, 0, OVS_U128_ZERO); > + extract_res = conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone); > + if (extract_res != KEY_OK) { > + write_ct_md(pkts[i], key_status_to_cs(extract_res), zone, 0, > + OVS_U128_ZERO); > continue; > } > > @@ -691,8 +716,11 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size) > return key->src.port && key->dst.port; > } > > -static inline bool extract_l4(struct conn_key *key, const void *data, > - size_t size, bool *related, const void *l3); > +static inline enum key_status extract_l4(struct conn_key *key, > + const void *data, > + size_t size, > + bool *related, > + const void *l3); > > static uint8_t > reverse_icmp_type(uint8_t type) > @@ -722,14 +750,14 @@ reverse_icmp_type(uint8_t type) > * instead and set *related to true. If 'related' is NULL we're > * already processing a nested header and no such recursion is > * possible */ > -static inline int > +static inline enum key_status > extract_l4_icmp(struct conn_key *key, const void *data, size_t size, > bool *related) > { > const struct icmp_header *icmp = data; > > if (OVS_UNLIKELY(size < ICMP_HEADER_LEN)) { > - return false; > + return KEY_INVALID; > } > > switch (icmp->icmp_type) { > @@ -740,13 +768,15 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size, > case ICMP4_INFOREQUEST: > case ICMP4_INFOREPLY: > if (icmp->icmp_code != 0) { > - return false; > + return KEY_INVALID; > } > /* Separate ICMP connection: identified using id */ > key->src.icmp_id = key->dst.icmp_id = icmp->icmp_fields.echo.id; > key->src.icmp_type = icmp->icmp_type; > key->dst.icmp_type = reverse_icmp_type(icmp->icmp_type); > - break; > + > + return KEY_OK; > + > case ICMP4_DST_UNREACH: > case ICMP4_TIME_EXCEEDED: > case ICMP4_PARAM_PROB: > @@ -758,41 +788,40 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size, > const char *l3 = (const char *) (icmp + 1); > const char *tail = (const char *) data + size; > const char *l4; > + enum key_status res; > bool ok; > > if (!related) { > - return false; > + return KEY_INVALID; > } > > memset(&inner_key, 0, sizeof inner_key); > inner_key.dl_type = htons(ETH_TYPE_IP); > ok = extract_l3_ipv4(&inner_key, l3, tail - l3, &l4, false); > if (!ok) { > - return false; > + return KEY_INVALID; > } > > /* pf doesn't do this, but it seems a good idea */ > if (inner_key.src.addr.ipv4_aligned != key->dst.addr.ipv4_aligned > || inner_key.dst.addr.ipv4_aligned != key->src.addr.ipv4_aligned) { > - return false; > + return KEY_INVALID; > } > > key->src = inner_key.src; > key->dst = inner_key.dst; > key->nw_proto = inner_key.nw_proto; > > - ok = extract_l4(key, l4, tail - l4, NULL, l3); > - if (ok) { > + res = extract_l4(key, l4, tail - l4, NULL, l3); > + if (res != KEY_INVALID) { > conn_key_reverse(key); > *related = true; > } > - return ok; > + return res; > } > default: > - return false; > + return KEY_INVALID; > } > - > - return true; > } > > static uint8_t > @@ -813,7 +842,7 @@ reverse_icmp6_type(uint8_t type) > * instead and set *related to true. If 'related' is NULL we're > * already processing a nested header and no such recursion is > * possible */ > -static inline bool > +static inline enum key_status > extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > bool *related) > { > @@ -822,20 +851,22 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > /* All the messages that we support need at least 4 bytes after > * the header */ > if (size < sizeof *icmp6 + 4) { > - return false; > + return KEY_INVALID; > } > > switch (icmp6->icmp6_type) { > case ICMP6_ECHO_REQUEST: > case ICMP6_ECHO_REPLY: > if (icmp6->icmp6_code != 0) { > - return false; > + return KEY_INVALID; > } > /* Separate ICMP connection: identified using id */ > key->src.icmp_id = key->dst.icmp_id = *(ovs_be16 *) (icmp6 + 1); > key->src.icmp_type = icmp6->icmp6_type; > key->dst.icmp_type = reverse_icmp6_type(icmp6->icmp6_type); > - break; > + > + return KEY_OK; > + > case ICMP6_DST_UNREACH: > case ICMP6_PACKET_TOO_BIG: > case ICMP6_TIME_EXCEEDED: > @@ -846,17 +877,18 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > const char *l3 = (const char *) icmp6 + 8; > const char *tail = (const char *) data + size; > const char *l4 = NULL; > + enum key_status res; > bool ok; > > if (!related) { > - return false; > + return KEY_INVALID; > } > > memset(&inner_key, 0, sizeof inner_key); > inner_key.dl_type = htons(ETH_TYPE_IPV6); > ok = extract_l3_ipv6(&inner_key, l3, tail - l3, &l4); > if (!ok) { > - return false; > + return KEY_INVALID; > } > > /* pf doesn't do this, but it seems a good idea */ > @@ -864,25 +896,32 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > &key->dst.addr.ipv6_aligned) > || !ipv6_addr_equals(&inner_key.dst.addr.ipv6_aligned, > &key->src.addr.ipv6_aligned)) { > - return false; > + return KEY_INVALID; > } > > key->src = inner_key.src; > key->dst = inner_key.dst; > key->nw_proto = inner_key.nw_proto; > > - ok = extract_l4(key, l4, tail - l4, NULL, l3); > - if (ok) { > + res = extract_l4(key, l4, tail - l4, NULL, l3); > + if (res != KEY_INVALID) { > conn_key_reverse(key); > *related = true; > } > - return ok; > + return res; > } > + case ND_NEIGHBOR_SOLICIT: > + case ND_NEIGHBOR_ADVERT: > + case ND_ROUTER_SOLICIT: > + case ND_ROUTER_ADVERT: > + if (icmp6->icmp6_code != 0) { > + return KEY_INVALID; > + } > + /* The packet is valid, but it's not going to be tracked. */ > >Do we want to say “The packet is valid” ? based on a verification being half-baked/partial. >or simply we are not going to track something that looks like a ND packet ? > >Maybe, just check the type and say KEY_NO_TRACK, meaning “We certify nothing.”. Ok, I can change the comment, if that's what you mean. Yes, that is one part. If we are doing no_track, we are telling the user to do stateless validation elsewhere. So why are we doing L4 checksum and the trivial check for code == 0 for ND. It has some cost and is trivially partial validation. If the user does the stateless validation, then it is redundant. If the user does not do the stateless validation, then they don’t gain anything, and it seems we offer a false security check. > > > + return KEY_NO_TRACK; > default: > - return false; > + return KEY_INVALID; > } > - > - return true; > } > > /* Extract l4 fields into 'key', which must already contain valid l3 > @@ -894,30 +933,34 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, > * > * If 'related' is NULL, it means that we're already parsing a header nested > * in an ICMP error. In this case, we skip checksum and length validation. */ > -static inline bool > +static inline enum key_status > extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, > const void *l3) > { > if (key->nw_proto == IPPROTO_TCP) { > return (!related || check_l4_tcp(key, data, size, l3)) > - && extract_l4_tcp(key, data, size); > + ? extract_l4_tcp(key, data, size) > + : KEY_INVALID; > } else if (key->nw_proto == IPPROTO_UDP) { > return (!related || check_l4_udp(key, data, size, l3)) > - && extract_l4_udp(key, data, size); > + ? extract_l4_udp(key, data, size) > + : KEY_INVALID; > } else if (key->dl_type == htons(ETH_TYPE_IP) > && key->nw_proto == IPPROTO_ICMP) { > return (!related || check_l4_icmp(data, size)) > - && extract_l4_icmp(key, data, size, related); > + ? extract_l4_icmp(key, data, size, related) > + : KEY_INVALID; > } else if (key->dl_type == htons(ETH_TYPE_IPV6) > && key->nw_proto == IPPROTO_ICMPV6) { > return (!related || check_l4_icmp6(key, data, size, l3)) > - && extract_l4_icmp6(key, data, size, related); > + ? extract_l4_icmp6(key, data, size, related) > + : KEY_INVALID; > } else { > - return false; > + return KEY_INVALID; > } > } > > -static bool > +static enum key_status > conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, > struct conn_lookup_ctx *ctx, uint16_t zone) > { > @@ -930,7 +973,7 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, > memset(ctx, 0, sizeof *ctx); > > if (!l2 || !l3 || !l4) { > - return false; > + return KEY_INVALID; > } > > ctx->key.zone = zone; > @@ -977,13 +1020,16 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, > } > > if (ok) { > - if (extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3)) { > + enum key_status res; > + > + res = extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3); > + if (res == KEY_OK) { > ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); > - return true; > } > + return res; > } > > - return false; > + return KEY_INVALID; > } > ? > /* Symmetric */ > diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at > index 07a1675..a63f031 100644 > --- a/tests/ofproto-dpif.at > +++ b/tests/ofproto-dpif.at > @@ -8608,38 +8608,6 @@ OFPT_ECHO_REQUEST (xid=0x0): 0 bytes of payload > OVS_VSWITCHD_STOP > AT_CLEANUP > > -AT_SETUP([ofproto-dpif - conntrack - untrackable traffic]) > -OVS_VSWITCHD_START > - > -add_of_ports br0 1 2 > - > -AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info]) > - > -AT_DATA([flows.txt], [dnl > -ipv6,ct_state=-trk,action=ct(table=0,zone=0) > -ct_state=+trk,action=controller > -]) > - > -AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) > - > -AT_CAPTURE_FILE([ofctl_monitor.log]) > -AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log]) > - > -AT_CHECK([ovs-appctl time/stop]) > - > -AT_CHECK([ovs-appctl netdev-dummy/receive p2 '0060970769ea0000860580da86dd6000000000203afffe80000000000000020086fffe0580dafe80000000000000026097fffe0769ea870068bd00000000fe80000000000000026097fffe0769ea01010000860580da']) > - > -OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 1]) > -OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) > - > -AT_CHECK([cat ofctl_monitor.log], [0], [dnl > -NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=86 ct_state=inv|trk,in_port=2 (via action) data_len=86 (unbuffered) > -icmp6,vlan_tci=0x0000,dl_src=00:00:86:05:80:da,dl_dst=00:60:97:07:69:ea,ipv6_src=fe80::200:86ff:fe05:80da,ipv6_dst=fe80::260:97ff:fe07:69ea,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fe80::260:97ff:fe07:69ea,nd_sll=00:00:86:05:80:da,nd_tll=00:00:00:00:00:00 icmp6_csum:68bd > -]) > - > -OVS_VSWITCHD_STOP > -AT_CLEANUP > - > AT_SETUP([ofproto-dpif - conntrack - zones]) > OVS_VSWITCHD_START > > diff --git a/tests/system-traffic.at b/tests/system-traffic.at > index 2a7575c..fba5f1f 100644 > --- a/tests/system-traffic.at > +++ b/tests/system-traffic.at > @@ -785,6 +785,41 @@ icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>,type=128,code=0),reply=(src=fc > OVS_TRAFFIC_VSWITCHD_STOP > AT_CLEANUP > > +AT_SETUP([conntrack - neighbor discovery]) > +CHECK_CONNTRACK() > +OVS_TRAFFIC_VSWITCHD_START() > + > +dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. > +AT_DATA([flows.txt], [dnl > +table=0,priority=1,action=drop > +table=0,priority=100,ip6,action=ct(table=1) > +dnl > +table=1,priority=100,ip6,action=controller > +]) > + > +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) > + > +AT_CAPTURE_FILE([ofctl_monitor.log]) > +AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log]) > + > +dnl e6:6f:38:26:34:c4 > 33:33:ff:00:00:02, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::1 > ff02::1:ff00:2: [icmp6 sum ok] ICMP6, neighbor solicitation, length 32, who has fc00::2 > +dnl source link-address option (1), length 8 (1): e6:6f:38:26:34:c4 > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' '3333ff000002e66f382634c486dd6000000000203afffc000000000000000000000000000001ff0200000000000000000001ff00000287002e3e00000000fc0000000000000000000000000000020101e66f382634c4']) > + > +dnl 56:5b:b3:68:8f:09 > e6:6f:38:26:34:c4, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::2 > fc00::1: [icmp6 sum ok] ICMP6, neighbor advertisement, length 32, tgt is fc00::2, Flags [solicited, override] > +dnl destination link-address option (2), length 8 (1): 56:5b:b3:68:8f:09 > +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' 'e66f382634c4565bb3688f0986dd6000000000203afffc000000000000000000000000000002fc000000000000000000000000000001880088ce60000000fc0000000000000000000000000000020201565bb3688f09']) > + > +AT_CHECK([cat ofctl_monitor.log], [0], [dnl > +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered) > +icmp6,vlan_tci=0x0000,dl_src=e6:6f:38:26:34:c4,dl_dst=33:33:ff:00:00:02,ipv6_src=fc00::1,ipv6_dst=ff02::1:ff00:2,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fc00::2,nd_sll=e6:6f:38:26:34:c4,nd_tll=00:00:00:00:00:00 icmp6_csum:2e3e > +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered) > +icmp6,vlan_tci=0x0000,dl_src=56:5b:b3:68:8f:09,dl_dst=e6:6f:38:26:34:c4,ipv6_src=fc00::2,ipv6_dst=fc00::1,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=136,icmp_code=0,nd_target=fc00::2,nd_sll=00:00:00:00:00:00,nd_tll=56:5b:b3:68:8f:09 icmp6_csum:88ce > +]) > + > +OVS_TRAFFIC_VSWITCHD_STOP > +AT_CLEANUP > + > AT_SETUP([conntrack - commit, recirc]) > CHECK_CONNTRACK() > OVS_TRAFFIC_VSWITCHD_START() > -- > 2.10.2 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DgICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=jT8R998Qlcz08PzMWXHlccbmTb60CDwaOsnwVQxg7EU&s=kyG0bhmEnHFmHbIAyoVIaaoWFw9QsRnaYwOG15MchPI&e= > > > > > >
diff --git a/lib/conntrack.c b/lib/conntrack.c index d459321..30f1ed0 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -52,9 +52,17 @@ struct conn_lookup_ctx { bool related; }; -static bool conn_key_extract(struct conntrack *, struct dp_packet *, - ovs_be16 dl_type, struct conn_lookup_ctx *, - uint16_t zone); +enum key_status { + KEY_INVALID, /* Could not extract the connection key: invalid. */ + KEY_OK, /* Connection key is ok. */ + KEY_NO_TRACK, /* Connection key is ok, but it should not be tracked. */ +}; + +static enum key_status conn_key_extract(struct conntrack *, + struct dp_packet *, + ovs_be16 dl_type, + struct conn_lookup_ctx *, + uint16_t zone); static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis); static void conn_key_reverse(struct conn_key *); static void conn_key_lookup(struct conntrack_bucket *ctb, @@ -157,6 +165,20 @@ static unsigned hash_to_bucket(uint32_t hash) return (hash >> (32 - CONNTRACK_BUCKETS_SHIFT)) % CONNTRACK_BUCKETS; } +static uint16_t +key_status_to_cs(enum key_status s) +{ + switch (s) { + case KEY_INVALID: + return CS_INVALID; + case KEY_OK: + case KEY_NO_TRACK: + return CS_NEW; + default: + OVS_NOT_REACHED(); + } +} + static void write_ct_md(struct dp_packet *pkt, uint16_t state, uint16_t zone, uint32_t mark, ovs_u128 label) @@ -301,10 +323,13 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, memset(bucket_list, INT8_C(-1), sizeof bucket_list); for (i = 0; i < cnt; i++) { + enum key_status extract_res; unsigned bucket; - if (!conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone)) { - write_ct_md(pkts[i], CS_INVALID, zone, 0, OVS_U128_ZERO); + extract_res = conn_key_extract(ct, pkts[i], dl_type, &ctxs[i], zone); + if (extract_res != KEY_OK) { + write_ct_md(pkts[i], key_status_to_cs(extract_res), zone, 0, + OVS_U128_ZERO); continue; } @@ -691,8 +716,11 @@ extract_l4_udp(struct conn_key *key, const void *data, size_t size) return key->src.port && key->dst.port; } -static inline bool extract_l4(struct conn_key *key, const void *data, - size_t size, bool *related, const void *l3); +static inline enum key_status extract_l4(struct conn_key *key, + const void *data, + size_t size, + bool *related, + const void *l3); static uint8_t reverse_icmp_type(uint8_t type) @@ -722,14 +750,14 @@ reverse_icmp_type(uint8_t type) * instead and set *related to true. If 'related' is NULL we're * already processing a nested header and no such recursion is * possible */ -static inline int +static inline enum key_status extract_l4_icmp(struct conn_key *key, const void *data, size_t size, bool *related) { const struct icmp_header *icmp = data; if (OVS_UNLIKELY(size < ICMP_HEADER_LEN)) { - return false; + return KEY_INVALID; } switch (icmp->icmp_type) { @@ -740,13 +768,15 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size, case ICMP4_INFOREQUEST: case ICMP4_INFOREPLY: if (icmp->icmp_code != 0) { - return false; + return KEY_INVALID; } /* Separate ICMP connection: identified using id */ key->src.icmp_id = key->dst.icmp_id = icmp->icmp_fields.echo.id; key->src.icmp_type = icmp->icmp_type; key->dst.icmp_type = reverse_icmp_type(icmp->icmp_type); - break; + + return KEY_OK; + case ICMP4_DST_UNREACH: case ICMP4_TIME_EXCEEDED: case ICMP4_PARAM_PROB: @@ -758,41 +788,40 @@ extract_l4_icmp(struct conn_key *key, const void *data, size_t size, const char *l3 = (const char *) (icmp + 1); const char *tail = (const char *) data + size; const char *l4; + enum key_status res; bool ok; if (!related) { - return false; + return KEY_INVALID; } memset(&inner_key, 0, sizeof inner_key); inner_key.dl_type = htons(ETH_TYPE_IP); ok = extract_l3_ipv4(&inner_key, l3, tail - l3, &l4, false); if (!ok) { - return false; + return KEY_INVALID; } /* pf doesn't do this, but it seems a good idea */ if (inner_key.src.addr.ipv4_aligned != key->dst.addr.ipv4_aligned || inner_key.dst.addr.ipv4_aligned != key->src.addr.ipv4_aligned) { - return false; + return KEY_INVALID; } key->src = inner_key.src; key->dst = inner_key.dst; key->nw_proto = inner_key.nw_proto; - ok = extract_l4(key, l4, tail - l4, NULL, l3); - if (ok) { + res = extract_l4(key, l4, tail - l4, NULL, l3); + if (res != KEY_INVALID) { conn_key_reverse(key); *related = true; } - return ok; + return res; } default: - return false; + return KEY_INVALID; } - - return true; } static uint8_t @@ -813,7 +842,7 @@ reverse_icmp6_type(uint8_t type) * instead and set *related to true. If 'related' is NULL we're * already processing a nested header and no such recursion is * possible */ -static inline bool +static inline enum key_status extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, bool *related) { @@ -822,20 +851,22 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, /* All the messages that we support need at least 4 bytes after * the header */ if (size < sizeof *icmp6 + 4) { - return false; + return KEY_INVALID; } switch (icmp6->icmp6_type) { case ICMP6_ECHO_REQUEST: case ICMP6_ECHO_REPLY: if (icmp6->icmp6_code != 0) { - return false; + return KEY_INVALID; } /* Separate ICMP connection: identified using id */ key->src.icmp_id = key->dst.icmp_id = *(ovs_be16 *) (icmp6 + 1); key->src.icmp_type = icmp6->icmp6_type; key->dst.icmp_type = reverse_icmp6_type(icmp6->icmp6_type); - break; + + return KEY_OK; + case ICMP6_DST_UNREACH: case ICMP6_PACKET_TOO_BIG: case ICMP6_TIME_EXCEEDED: @@ -846,17 +877,18 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, const char *l3 = (const char *) icmp6 + 8; const char *tail = (const char *) data + size; const char *l4 = NULL; + enum key_status res; bool ok; if (!related) { - return false; + return KEY_INVALID; } memset(&inner_key, 0, sizeof inner_key); inner_key.dl_type = htons(ETH_TYPE_IPV6); ok = extract_l3_ipv6(&inner_key, l3, tail - l3, &l4); if (!ok) { - return false; + return KEY_INVALID; } /* pf doesn't do this, but it seems a good idea */ @@ -864,25 +896,32 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, &key->dst.addr.ipv6_aligned) || !ipv6_addr_equals(&inner_key.dst.addr.ipv6_aligned, &key->src.addr.ipv6_aligned)) { - return false; + return KEY_INVALID; } key->src = inner_key.src; key->dst = inner_key.dst; key->nw_proto = inner_key.nw_proto; - ok = extract_l4(key, l4, tail - l4, NULL, l3); - if (ok) { + res = extract_l4(key, l4, tail - l4, NULL, l3); + if (res != KEY_INVALID) { conn_key_reverse(key); *related = true; } - return ok; + return res; } + case ND_NEIGHBOR_SOLICIT: + case ND_NEIGHBOR_ADVERT: + case ND_ROUTER_SOLICIT: + case ND_ROUTER_ADVERT: + if (icmp6->icmp6_code != 0) { + return KEY_INVALID; + } + /* The packet is valid, but it's not going to be tracked. */ + return KEY_NO_TRACK; default: - return false; + return KEY_INVALID; } - - return true; } /* Extract l4 fields into 'key', which must already contain valid l3 @@ -894,30 +933,34 @@ extract_l4_icmp6(struct conn_key *key, const void *data, size_t size, * * If 'related' is NULL, it means that we're already parsing a header nested * in an ICMP error. In this case, we skip checksum and length validation. */ -static inline bool +static inline enum key_status extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, const void *l3) { if (key->nw_proto == IPPROTO_TCP) { return (!related || check_l4_tcp(key, data, size, l3)) - && extract_l4_tcp(key, data, size); + ? extract_l4_tcp(key, data, size) + : KEY_INVALID; } else if (key->nw_proto == IPPROTO_UDP) { return (!related || check_l4_udp(key, data, size, l3)) - && extract_l4_udp(key, data, size); + ? extract_l4_udp(key, data, size) + : KEY_INVALID; } else if (key->dl_type == htons(ETH_TYPE_IP) && key->nw_proto == IPPROTO_ICMP) { return (!related || check_l4_icmp(data, size)) - && extract_l4_icmp(key, data, size, related); + ? extract_l4_icmp(key, data, size, related) + : KEY_INVALID; } else if (key->dl_type == htons(ETH_TYPE_IPV6) && key->nw_proto == IPPROTO_ICMPV6) { return (!related || check_l4_icmp6(key, data, size, l3)) - && extract_l4_icmp6(key, data, size, related); + ? extract_l4_icmp6(key, data, size, related) + : KEY_INVALID; } else { - return false; + return KEY_INVALID; } } -static bool +static enum key_status conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, struct conn_lookup_ctx *ctx, uint16_t zone) { @@ -930,7 +973,7 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, memset(ctx, 0, sizeof *ctx); if (!l2 || !l3 || !l4) { - return false; + return KEY_INVALID; } ctx->key.zone = zone; @@ -977,13 +1020,16 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type, } if (ok) { - if (extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3)) { + enum key_status res; + + res = extract_l4(&ctx->key, l4, tail - l4, &ctx->related, l3); + if (res == KEY_OK) { ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); - return true; } + return res; } - return false; + return KEY_INVALID; } /* Symmetric */ diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 07a1675..a63f031 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -8608,38 +8608,6 @@ OFPT_ECHO_REQUEST (xid=0x0): 0 bytes of payload OVS_VSWITCHD_STOP AT_CLEANUP -AT_SETUP([ofproto-dpif - conntrack - untrackable traffic]) -OVS_VSWITCHD_START - -add_of_ports br0 1 2 - -AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info]) - -AT_DATA([flows.txt], [dnl -ipv6,ct_state=-trk,action=ct(table=0,zone=0) -ct_state=+trk,action=controller -]) - -AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) - -AT_CAPTURE_FILE([ofctl_monitor.log]) -AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log]) - -AT_CHECK([ovs-appctl time/stop]) - -AT_CHECK([ovs-appctl netdev-dummy/receive p2 '0060970769ea0000860580da86dd6000000000203afffe80000000000000020086fffe0580dafe80000000000000026097fffe0769ea870068bd00000000fe80000000000000026097fffe0769ea01010000860580da']) - -OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 1]) -OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) - -AT_CHECK([cat ofctl_monitor.log], [0], [dnl -NXT_PACKET_IN (xid=0x0): cookie=0x0 total_len=86 ct_state=inv|trk,in_port=2 (via action) data_len=86 (unbuffered) -icmp6,vlan_tci=0x0000,dl_src=00:00:86:05:80:da,dl_dst=00:60:97:07:69:ea,ipv6_src=fe80::200:86ff:fe05:80da,ipv6_dst=fe80::260:97ff:fe07:69ea,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fe80::260:97ff:fe07:69ea,nd_sll=00:00:86:05:80:da,nd_tll=00:00:00:00:00:00 icmp6_csum:68bd -]) - -OVS_VSWITCHD_STOP -AT_CLEANUP - AT_SETUP([ofproto-dpif - conntrack - zones]) OVS_VSWITCHD_START diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 2a7575c..fba5f1f 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -785,6 +785,41 @@ icmpv6,orig=(src=fc00::1,dst=fc00::2,id=<cleared>,type=128,code=0),reply=(src=fc OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - neighbor discovery]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() + +dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. +AT_DATA([flows.txt], [dnl +table=0,priority=1,action=drop +table=0,priority=100,ip6,action=ct(table=1) +dnl +table=1,priority=100,ip6,action=controller +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +AT_CAPTURE_FILE([ofctl_monitor.log]) +AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log]) + +dnl e6:6f:38:26:34:c4 > 33:33:ff:00:00:02, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::1 > ff02::1:ff00:2: [icmp6 sum ok] ICMP6, neighbor solicitation, length 32, who has fc00::2 +dnl source link-address option (1), length 8 (1): e6:6f:38:26:34:c4 +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' '3333ff000002e66f382634c486dd6000000000203afffc000000000000000000000000000001ff0200000000000000000001ff00000287002e3e00000000fc0000000000000000000000000000020101e66f382634c4']) + +dnl 56:5b:b3:68:8f:09 > e6:6f:38:26:34:c4, ethertype IPv6 (0x86dd), length 86: (hlim 255, next-header ICMPv6 (58) payload length: 32) fc00::2 > fc00::1: [icmp6 sum ok] ICMP6, neighbor advertisement, length 32, tgt is fc00::2, Flags [solicited, override] +dnl destination link-address option (2), length 8 (1): 56:5b:b3:68:8f:09 +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 LOCAL 'resubmit(,0)' 'e66f382634c4565bb3688f0986dd6000000000203afffc000000000000000000000000000002fc000000000000000000000000000001880088ce60000000fc0000000000000000000000000000020201565bb3688f09']) + +AT_CHECK([cat ofctl_monitor.log], [0], [dnl +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered) +icmp6,vlan_tci=0x0000,dl_src=e6:6f:38:26:34:c4,dl_dst=33:33:ff:00:00:02,ipv6_src=fc00::1,ipv6_dst=ff02::1:ff00:2,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=fc00::2,nd_sll=e6:6f:38:26:34:c4,nd_tll=00:00:00:00:00:00 icmp6_csum:2e3e +NXT_PACKET_IN2 (xid=0x0): table_id=1 cookie=0x0 total_len=86 ct_state=new|trk,in_port=LOCAL (via action) data_len=86 (unbuffered) +icmp6,vlan_tci=0x0000,dl_src=56:5b:b3:68:8f:09,dl_dst=e6:6f:38:26:34:c4,ipv6_src=fc00::2,ipv6_dst=fc00::1,ipv6_label=0x00000,nw_tos=0,nw_ecn=0,nw_ttl=255,icmp_type=136,icmp_code=0,nd_target=fc00::2,nd_sll=00:00:00:00:00:00,nd_tll=56:5b:b3:68:8f:09 icmp6_csum:88ce +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - commit, recirc]) CHECK_CONNTRACK() OVS_TRAFFIC_VSWITCHD_START()
The userspace connection tracker treats Neighbor Discovery packets as invalid, because they're not checked against any connection. This in inconsistent with the kernel connection tracker which always returns 'CS_NEW'. Therefore, this commit makes the userspace connection tracker conforming with the kernel. ND packets still do not create or read any state, but they're treated as NEW. To support this, the key extraction functions can now return KEY_NO_TRACK, meaning that the packet is ok, but it should be treated statelessly. We also have to remove a test that explicitly checked that neighbor discovery was treated as invalid. Reported-by: Sridhar Gaddam <sgaddam@redhat.com> Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com> --- lib/conntrack.c | 134 ++++++++++++++++++++++++++++++++---------------- tests/ofproto-dpif.at | 32 ------------ tests/system-traffic.at | 35 +++++++++++++ 3 files changed, 125 insertions(+), 76 deletions(-)