diff mbox series

[ovs-dev,v2] conntrack: add generic IP protocol support

Message ID 20200917083907.11036.82752.stgit@netdev64
State Superseded
Headers show
Series [ovs-dev,v2] conntrack: add generic IP protocol support | expand

Commit Message

Eelco Chaudron Sept. 17, 2020, 8:41 a.m. UTC
Currently, userspace conntrack only tracks TCP, UDP, and ICMP, and all
other IP protocols are discarded, and the +inv state is returned. This
is not in line with the kernel conntrack. Where if no L4 information can
be extracted it's treated as generic L3. The change below mimics the
behavior of the kernel.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
v2: Small style fix suggested by Aaron Conole.

 lib/conntrack-private.h |    3 +++
 lib/conntrack.c         |   29 +++++++++++++++++++----------
 tests/system-traffic.at |   29 +++++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 10 deletions(-)

Comments

Flavio Leitner Sept. 17, 2020, 4:22 p.m. UTC | #1
On Thu, Sep 17, 2020 at 04:41:33AM -0400, Eelco Chaudron wrote:
> Currently, userspace conntrack only tracks TCP, UDP, and ICMP, and all
> other IP protocols are discarded, and the +inv state is returned. This
> is not in line with the kernel conntrack. Where if no L4 information can
> be extracted it's treated as generic L3. The change below mimics the
> behavior of the kernel.
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---

LGTM
Acked-by: Flavio Leitner <fbl@sysclose.org>
Aaron Conole Sept. 21, 2020, 9:33 p.m. UTC | #2
Eelco Chaudron <echaudro@redhat.com> writes:

> Currently, userspace conntrack only tracks TCP, UDP, and ICMP, and all
> other IP protocols are discarded, and the +inv state is returned. This
> is not in line with the kernel conntrack. Where if no L4 information can
> be extracted it's treated as generic L3. The change below mimics the
> behavior of the kernel.
>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---

Acked-by: Aaron Conole <aconole@redhat.com>
Ilya Maximets Oct. 8, 2020, 2:33 p.m. UTC | #3
On 9/17/20 10:41 AM, Eelco Chaudron wrote:
> Currently, userspace conntrack only tracks TCP, UDP, and ICMP, and all
> other IP protocols are discarded, and the +inv state is returned. This
> is not in line with the kernel conntrack. Where if no L4 information can
> be extracted it's treated as generic L3. The change below mimics the
> behavior of the kernel.
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
> v2: Small style fix suggested by Aaron Conole.
> 
>  lib/conntrack-private.h |    3 +++
>  lib/conntrack.c         |   29 +++++++++++++++++++----------
>  tests/system-traffic.at |   29 +++++++++++++++++++++++++++++
>  3 files changed, 51 insertions(+), 10 deletions(-)

Hi, Eelco.  Thanks for the patch!

Could you, please, add a NEWS entry since this is kind of user-visible change.
It should be something like:

   - Userspace datapath:
     * ...

Some small nitpicks inline. :)

Best regards, Ilya Maximets.

> 
> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
> index 9a8ca39..85329e8 100644
> --- a/lib/conntrack-private.h
> +++ b/lib/conntrack-private.h
> @@ -59,6 +59,9 @@ struct conn_key {
>      uint8_t nw_proto;
>  };
>  
> +/* Verify that nw_proto stays uint8_t as it's used to index into l4_protos[] */
> +BUILD_ASSERT_DECL(sizeof(((struct conn_key *)0)->nw_proto) == sizeof(uint8_t));

This should be 'MEMBER_SIZEOF(struct conn_key, nw_proto) == sizeof(uint8_t)'.

> +
>  /* This is used for alg expectations; an expectation is a
>   * context created in preparation for establishing a data
>   * connection. The expectation is created by the control
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 0cbc8f6..3597112 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -143,12 +143,7 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
>  static void
>  expectation_clean(struct conntrack *ct, const struct conn_key *master_key);
>  
> -static struct ct_l4_proto *l4_protos[] = {
> -    [IPPROTO_TCP] = &ct_proto_tcp,
> -    [IPPROTO_UDP] = &ct_proto_other,
> -    [IPPROTO_ICMP] = &ct_proto_icmp4,
> -    [IPPROTO_ICMPV6] = &ct_proto_icmp6,
> -};
> +static struct ct_l4_proto *l4_protos[UINT8_MAX + 1];
>  
>  static void
>  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
> @@ -296,6 +291,7 @@ ct_print_conn_info(const struct conn *c, const char *log_msg,
>  struct conntrack *
>  conntrack_init(void)
>  {
> +    static struct ovsthread_once setup_l4_once = OVSTHREAD_ONCE_INITIALIZER;
>      struct conntrack *ct = xzalloc(sizeof *ct);
>  
>      ovs_rwlock_init(&ct->resources_lock);
> @@ -322,6 +318,18 @@ conntrack_init(void)
>      ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
>      ct->ipf = ipf_init();
>  
> +    /* Initialize the l4 protocols. */
> +    if (ovsthread_once_start(&setup_l4_once)) {
> +        for (int i = 0; i < ARRAY_SIZE(l4_protos); i++) {
> +            l4_protos[i] = &ct_proto_other;
> +        }
> +        /* IPPROTO_UDP uses ct_proto_other, so no need to initialize it. */
> +        l4_protos[IPPROTO_TCP]    = &ct_proto_tcp;
> +        l4_protos[IPPROTO_ICMP]   = &ct_proto_icmp4;
> +        l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6;
> +
> +        ovsthread_once_done(&setup_l4_once);
> +    }
>      return ct;
>  }
>  
> @@ -1970,9 +1978,10 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
>          return (!related || check_l4_icmp6(key, data, size, l3,
>                  validate_checksum))
>                 && extract_l4_icmp6(key, data, size, related);
> -    } else {
> -        return false;
>      }
> +
> +    /* For all other protocols we do not have L4 keys, so keep them zero */

Period in the end of a comment.

> +    return true;
>  }
>  
>  static bool
> @@ -2255,8 +2264,8 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
>                conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
>            ? true : false;
>      union ct_addr first_addr = ct_addr;
> -    bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
> -                       conn->key.nw_proto != IPPROTO_ICMPV6;
> +    bool pat_enabled = conn->key.nw_proto == IPPROTO_TCP ||
> +                       conn->key.nw_proto == IPPROTO_UDP;
>  
>      while (true) {
>          if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 3ed03d9..b7aca93 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -2341,6 +2341,35 @@ NXST_FLOW reply:
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  
> +AT_SETUP([conntrack - generic IP protocol])
> +CHECK_CONNTRACK()
> +OVS_TRAFFIC_VSWITCHD_START()
> +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg ofproto_dpif_upcall:dbg])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +
> +AT_DATA([flows.txt], [dnl
> +table=0, priority=1,action=drop
> +table=0, priority=10,arp,action=normal
> +table=0, priority=100,ip,action=ct(table=1)
> +table=1, priority=100,in_port=1,ip,ct_state=+trk+new,action=ct(commit)
> +table=1, priority=100,in_port=1,ct_state=+trk+est,action=normal
> +])
> +
> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
> +
> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=01005e00001200005e000101080045c0002800000000ff7019cdc0a8001ee0000012210164010001ba52c0a800010000000000000000000000000000 actions=resubmit(,0)"])
> +
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=192\.168\.0\.30,"], [], [dnl
> +112,orig=(src=192.168.0.30,dst=224.0.0.18,sport=0,dport=0),reply=(src=224.0.0.18,dst=192.168.0.30,sport=0,dport=0)
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  AT_SETUP([conntrack - ICMP related])
>  AT_SKIP_IF([test $HAVE_NC = no])
>  CHECK_CONNTRACK()
Eelco Chaudron Oct. 12, 2020, 11:39 a.m. UTC | #4
On 8 Oct 2020, at 16:33, Ilya Maximets wrote:

> On 9/17/20 10:41 AM, Eelco Chaudron wrote:
>> Currently, userspace conntrack only tracks TCP, UDP, and ICMP, and 
>> all
>> other IP protocols are discarded, and the +inv state is returned. 
>> This
>> is not in line with the kernel conntrack. Where if no L4 information 
>> can
>> be extracted it's treated as generic L3. The change below mimics the
>> behavior of the kernel.
>>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>> v2: Small style fix suggested by Aaron Conole.
>>
>>  lib/conntrack-private.h |    3 +++
>>  lib/conntrack.c         |   29 +++++++++++++++++++----------
>>  tests/system-traffic.at |   29 +++++++++++++++++++++++++++++
>>  3 files changed, 51 insertions(+), 10 deletions(-)
>
> Hi, Eelco.  Thanks for the patch!
>
> Could you, please, add a NEWS entry since this is kind of user-visible 
> change.
> It should be something like:
>
>    - Userspace datapath:
>      * ...
>
> Some small nitpicks inline. :)

Thanks for the review. Just sent out a V3, including the two nitpicks 
below, and adding the NEWS section as suggested here.

Cheers,

Eelco

>>
>> diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
>> index 9a8ca39..85329e8 100644
>> --- a/lib/conntrack-private.h
>> +++ b/lib/conntrack-private.h
>> @@ -59,6 +59,9 @@ struct conn_key {
>>      uint8_t nw_proto;
>>  };
>>
>> +/* Verify that nw_proto stays uint8_t as it's used to index into 
>> l4_protos[] */
>> +BUILD_ASSERT_DECL(sizeof(((struct conn_key *)0)->nw_proto) == 
>> sizeof(uint8_t));
>
> This should be 'MEMBER_SIZEOF(struct conn_key, nw_proto) == 
> sizeof(uint8_t)'.
>
>> +
>>  /* This is used for alg expectations; an expectation is a
>>   * context created in preparation for establishing a data
>>   * connection. The expectation is created by the control
>> diff --git a/lib/conntrack.c b/lib/conntrack.c
>> index 0cbc8f6..3597112 100644
>> --- a/lib/conntrack.c
>> +++ b/lib/conntrack.c
>> @@ -143,12 +143,7 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx 
>> *ctx,
>>  static void
>>  expectation_clean(struct conntrack *ct, const struct conn_key 
>> *master_key);
>>
>> -static struct ct_l4_proto *l4_protos[] = {
>> -    [IPPROTO_TCP] = &ct_proto_tcp,
>> -    [IPPROTO_UDP] = &ct_proto_other,
>> -    [IPPROTO_ICMP] = &ct_proto_icmp4,
>> -    [IPPROTO_ICMPV6] = &ct_proto_icmp6,
>> -};
>> +static struct ct_l4_proto *l4_protos[UINT8_MAX + 1];
>>
>>  static void
>>  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx 
>> *ctx,
>> @@ -296,6 +291,7 @@ ct_print_conn_info(const struct conn *c, const 
>> char *log_msg,
>>  struct conntrack *
>>  conntrack_init(void)
>>  {
>> +    static struct ovsthread_once setup_l4_once = 
>> OVSTHREAD_ONCE_INITIALIZER;
>>      struct conntrack *ct = xzalloc(sizeof *ct);
>>
>>      ovs_rwlock_init(&ct->resources_lock);
>> @@ -322,6 +318,18 @@ conntrack_init(void)
>>      ct->clean_thread = ovs_thread_create("ct_clean", 
>> clean_thread_main, ct);
>>      ct->ipf = ipf_init();
>>
>> +    /* Initialize the l4 protocols. */
>> +    if (ovsthread_once_start(&setup_l4_once)) {
>> +        for (int i = 0; i < ARRAY_SIZE(l4_protos); i++) {
>> +            l4_protos[i] = &ct_proto_other;
>> +        }
>> +        /* IPPROTO_UDP uses ct_proto_other, so no need to initialize 
>> it. */
>> +        l4_protos[IPPROTO_TCP]    = &ct_proto_tcp;
>> +        l4_protos[IPPROTO_ICMP]   = &ct_proto_icmp4;
>> +        l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6;
>> +
>> +        ovsthread_once_done(&setup_l4_once);
>> +    }
>>      return ct;
>>  }
>>
>> @@ -1970,9 +1978,10 @@ extract_l4(struct conn_key *key, const void 
>> *data, size_t size, bool *related,
>>          return (!related || check_l4_icmp6(key, data, size, l3,
>>                  validate_checksum))
>>                 && extract_l4_icmp6(key, data, size, related);
>> -    } else {
>> -        return false;
>>      }
>> +
>> +    /* For all other protocols we do not have L4 keys, so keep them 
>> zero */
>
> Period in the end of a comment.
>
>> +    return true;
>>  }
>>
>>  static bool
>> @@ -2255,8 +2264,8 @@ nat_select_range_tuple(struct conntrack *ct, 
>> const struct conn *conn,
>>                conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
>>            ? true : false;
>>      union ct_addr first_addr = ct_addr;
>> -    bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
>> -                       conn->key.nw_proto != IPPROTO_ICMPV6;
>> +    bool pat_enabled = conn->key.nw_proto == IPPROTO_TCP ||
>> +                       conn->key.nw_proto == IPPROTO_UDP;
>>
>>      while (true) {
>>          if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
>> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
>> index 3ed03d9..b7aca93 100644
>> --- a/tests/system-traffic.at
>> +++ b/tests/system-traffic.at
>> @@ -2341,6 +2341,35 @@ NXST_FLOW reply:
>>  OVS_TRAFFIC_VSWITCHD_STOP
>>  AT_CLEANUP
>>
>> +AT_SETUP([conntrack - generic IP protocol])
>> +CHECK_CONNTRACK()
>> +OVS_TRAFFIC_VSWITCHD_START()
>> +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg 
>> ofproto_dpif_upcall:dbg])
>> +
>> +ADD_NAMESPACES(at_ns0, at_ns1)
>> +
>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>> +
>> +AT_DATA([flows.txt], [dnl
>> +table=0, priority=1,action=drop
>> +table=0, priority=10,arp,action=normal
>> +table=0, priority=100,ip,action=ct(table=1)
>> +table=1, 
>> priority=100,in_port=1,ip,ct_state=+trk+new,action=ct(commit)
>> +table=1, priority=100,in_port=1,ct_state=+trk+est,action=normal
>> +])
>> +
>> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
>> +
>> +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 
>> packet=01005e00001200005e000101080045c0002800000000ff7019cdc0a8001ee0000012210164010001ba52c0a800010000000000000000000000000000 
>> actions=resubmit(,0)"])
>> +
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 
>> "orig=.src=192\.168\.0\.30,"], [], [dnl
>> +112,orig=(src=192.168.0.30,dst=224.0.0.18,sport=0,dport=0),reply=(src=224.0.0.18,dst=192.168.0.30,sport=0,dport=0)
>> +])
>> +
>> +OVS_TRAFFIC_VSWITCHD_STOP
>> +AT_CLEANUP
>> +
>>  AT_SETUP([conntrack - ICMP related])
>>  AT_SKIP_IF([test $HAVE_NC = no])
>>  CHECK_CONNTRACK()
diff mbox series

Patch

diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 9a8ca39..85329e8 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -59,6 +59,9 @@  struct conn_key {
     uint8_t nw_proto;
 };
 
+/* Verify that nw_proto stays uint8_t as it's used to index into l4_protos[] */
+BUILD_ASSERT_DECL(sizeof(((struct conn_key *)0)->nw_proto) == sizeof(uint8_t));
+
 /* This is used for alg expectations; an expectation is a
  * context created in preparation for establishing a data
  * connection. The expectation is created by the control
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 0cbc8f6..3597112 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -143,12 +143,7 @@  detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
 static void
 expectation_clean(struct conntrack *ct, const struct conn_key *master_key);
 
-static struct ct_l4_proto *l4_protos[] = {
-    [IPPROTO_TCP] = &ct_proto_tcp,
-    [IPPROTO_UDP] = &ct_proto_other,
-    [IPPROTO_ICMP] = &ct_proto_icmp4,
-    [IPPROTO_ICMPV6] = &ct_proto_icmp6,
-};
+static struct ct_l4_proto *l4_protos[UINT8_MAX + 1];
 
 static void
 handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
@@ -296,6 +291,7 @@  ct_print_conn_info(const struct conn *c, const char *log_msg,
 struct conntrack *
 conntrack_init(void)
 {
+    static struct ovsthread_once setup_l4_once = OVSTHREAD_ONCE_INITIALIZER;
     struct conntrack *ct = xzalloc(sizeof *ct);
 
     ovs_rwlock_init(&ct->resources_lock);
@@ -322,6 +318,18 @@  conntrack_init(void)
     ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
     ct->ipf = ipf_init();
 
+    /* Initialize the l4 protocols. */
+    if (ovsthread_once_start(&setup_l4_once)) {
+        for (int i = 0; i < ARRAY_SIZE(l4_protos); i++) {
+            l4_protos[i] = &ct_proto_other;
+        }
+        /* IPPROTO_UDP uses ct_proto_other, so no need to initialize it. */
+        l4_protos[IPPROTO_TCP] = &ct_proto_tcp;
+        l4_protos[IPPROTO_ICMP] = &ct_proto_icmp4;
+        l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6;
+
+        ovsthread_once_done(&setup_l4_once);
+    }
     return ct;
 }
 
@@ -1970,9 +1978,10 @@  extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
         return (!related || check_l4_icmp6(key, data, size, l3,
                 validate_checksum))
                && extract_l4_icmp6(key, data, size, related);
-    } else {
-        return false;
     }
+
+    /* For all other protocols we do not have L4 keys, so keep them zero */
+    return true;
 }
 
 static bool
@@ -2255,8 +2264,8 @@  nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
               conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
           ? true : false;
     union ct_addr first_addr = ct_addr;
-    bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
-                       conn->key.nw_proto != IPPROTO_ICMPV6;
+    bool pat_enabled = conn->key.nw_proto == IPPROTO_TCP ||
+                       conn->key.nw_proto == IPPROTO_UDP;
 
     while (true) {
         if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 3ed03d9..b7aca93 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -2341,6 +2341,35 @@  NXST_FLOW reply:
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([conntrack - generic IP protocol])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START()
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg ofproto_dpif_upcall:dbg])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_DATA([flows.txt], [dnl
+table=0, priority=1,action=drop
+table=0, priority=10,arp,action=normal
+table=0, priority=100,ip,action=ct(table=1)
+table=1, priority=100,in_port=1,ip,ct_state=+trk+new,action=ct(commit)
+table=1, priority=100,in_port=1,ct_state=+trk+est,action=normal
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=01005e00001200005e000101080045c0002800000000ff7019cdc0a8001ee0000012210164010001ba52c0a800010000000000000000000000000000 actions=resubmit(,0)"])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=192\.168\.0\.30,"], [], [dnl
+112,orig=(src=192.168.0.30,dst=224.0.0.18,sport=0,dport=0),reply=(src=224.0.0.18,dst=192.168.0.30,sport=0,dport=0)
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([conntrack - ICMP related])
 AT_SKIP_IF([test $HAVE_NC = no])
 CHECK_CONNTRACK()