diff mbox series

[ovs-dev] conntrack: document NULL SNAT behavior and add a test case

Message ID 161597728286.68850.15375221431540116773.stgit@ebuild
State Superseded, archived
Headers show
Series [ovs-dev] conntrack: document NULL SNAT behavior and add a test case | expand

Commit Message

Eelco Chaudron March 17, 2021, 10:35 a.m. UTC
Currently, conntrack in the kernel has an undocumented feature referred
to as NULL SNAT. Basically, when a source port collision is detected
during the commit, the source port will be translated to an ephemeral
port. If there is no collision, no SNAT is performed.

This patchset documents this behavior and adds a self-test to verify
it's not changing.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/ovs-actions.xml              |   10 ++++++++
 tests/system-kmod-macros.at      |    7 ++++++
 tests/system-traffic.at          |   45 ++++++++++++++++++++++++++++++++++++++
 tests/system-userspace-macros.at |   10 ++++++++
 4 files changed, 72 insertions(+)

Comments

Paolo Valerio March 22, 2021, 6:50 p.m. UTC | #1
Hi Eelco,

Thanks for working on this, very useful indeed.
not a full review, but I have a question about a minor thing.

Eelco Chaudron <echaudro@redhat.com> writes:

> Currently, conntrack in the kernel has an undocumented feature referred
> to as NULL SNAT. Basically, when a source port collision is detected
> during the commit, the source port will be translated to an ephemeral
> port. If there is no collision, no SNAT is performed.
>
> This patchset documents this behavior and adds a self-test to verify
> it's not changing.
>
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  lib/ovs-actions.xml              |   10 ++++++++
>  tests/system-kmod-macros.at      |    7 ++++++
>  tests/system-traffic.at          |   45 ++++++++++++++++++++++++++++++++++++++
>  tests/system-userspace-macros.at |   10 ++++++++
>  4 files changed, 72 insertions(+)
>
> diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml
> index a2778de4b..a0070e6c6 100644
> --- a/lib/ovs-actions.xml
> +++ b/lib/ovs-actions.xml
> @@ -1833,6 +1833,16 @@ for <var>i</var> in [1,<var>n_members</var>]:
>              connection, will behave the same as a bare <code>nat</code>.
>            </p>
>  
> +          <p>
> +            For SNAT, there is a special case when the <code>src</code> IP
> +            address is configured as all 0's, i.e.,
> +            <code>nat(src=0.0.0.0)</code>. In this case, when a source port
> +            collision is detected during the commit, the source port will be
> +            translated to an ephemeral port. If there is no collision, no SNAT
> +            is performed. Note that this is currently only implemented in the
> +            Linux kernel datapath.
> +          </p>
> +
>            <p>
>              Open vSwitch 2.6 introduced <code>nat</code>.  Linux 4.6 was the
>              earliest upstream kernel that implemented <code>ct</code> support for
> diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
> index 15628a7c6..38bb1c55c 100644
> --- a/tests/system-kmod-macros.at
> +++ b/tests/system-kmod-macros.at
> @@ -99,6 +99,13 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
>  #
>  m4_define([CHECK_CONNTRACK_NAT])
>  
> +# CHECK_CONNTRACK_NULL_SNAT()
> +#
> +# Perform requirements checks for running conntrack SNAT NULL tests.
> +# The kernel always supports NULL SNAT, so no check is needed.
> +#
> +m4_define([CHECK_CONNTRACK_NULL_SNAT])
> +
>  # CHECK_CONNTRACK_TIMEOUT()
>  #
>  # Perform requirements checks for running conntrack customized timeout tests.
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index fb5b9a36d..1be425bb4 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -4433,6 +4433,51 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>  
> +
> +AT_SETUP([conntrack - NULL SNAT])
> +AT_SKIP_IF([test $HAVE_NC = no])
> +CHECK_CONNTRACK()
> +CHECK_CONNTRACK_NULL_SNAT()
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
> +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])
> +
> +OVS_START_L7([at_ns1], [http])
> +

I noticed you use nc clients, is there any specific reason you preferred
httpd over something like:

NETNS_DAEMONIZE([at_ns1], [nc -l -k 80 > /dev/null], [nc0.pid])

> +AT_DATA([flows.txt], [dnl
> +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
> +table=0,priority=20,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)
> +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
> +table=0,priority=10,arp,action=normal
> +table=0,priority=1,action=drop
> +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat)
> +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
> +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
> +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
> +])
> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
> +
> +dnl - Test to make sure src nat is NOT done when not needed
> +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log])
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl
> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
> +])
> +
> +dnl - Test to make sure src nat is done when needed
> +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log])
> +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log])
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], [0], [dnl
> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
> +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
> +
>  AT_SETUP([conntrack - simple DNAT])
>  CHECK_CONNTRACK()
>  CHECK_CONNTRACK_NAT()
> diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
> index 34f82cee3..71acc8618 100644
> --- a/tests/system-userspace-macros.at
> +++ b/tests/system-userspace-macros.at
> @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
>  #
>  m4_define([CHECK_CONNTRACK_NAT])
>  
> +# CHECK_CONNTRACK_NULL_SNAT()
> +#
> +# Perform requirements checks for running conntrack SNAT NULL tests.
> +# The userspace datapath does not support NULL SNAT.
> +#
> +m4_define([CHECK_CONNTRACK_NULL_SNAT],
> +[
> +    AT_SKIP_IF([:])
> +])
> +
>  # CHECK_CONNTRACK_TIMEOUT()
>  #
>  # Perform requirements checks for running conntrack customized timeout tests.
Eelco Chaudron March 23, 2021, 8:52 a.m. UTC | #2
On 22 Mar 2021, at 19:50, Paolo Valerio wrote:

> Hi Eelco,
>
> Thanks for working on this, very useful indeed.
> not a full review, but I have a question about a minor thing.
>
> Eelco Chaudron <echaudro@redhat.com> writes:
>
>> Currently, conntrack in the kernel has an undocumented feature 
>> referred
>> to as NULL SNAT. Basically, when a source port collision is detected
>> during the commit, the source port will be translated to an ephemeral
>> port. If there is no collision, no SNAT is performed.
>>
>> This patchset documents this behavior and adds a self-test to verify
>> it's not changing.
>>
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>  lib/ovs-actions.xml              |   10 ++++++++
>>  tests/system-kmod-macros.at      |    7 ++++++
>>  tests/system-traffic.at          |   45 
>> ++++++++++++++++++++++++++++++++++++++
>>  tests/system-userspace-macros.at |   10 ++++++++
>>  4 files changed, 72 insertions(+)
>>
>> diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml
>> index a2778de4b..a0070e6c6 100644
>> --- a/lib/ovs-actions.xml
>> +++ b/lib/ovs-actions.xml
>> @@ -1833,6 +1833,16 @@ for <var>i</var> in [1,<var>n_members</var>]:
>>              connection, will behave the same as a bare 
>> <code>nat</code>.
>>            </p>
>>
>> +          <p>
>> +            For SNAT, there is a special case when the 
>> <code>src</code> IP
>> +            address is configured as all 0's, i.e.,
>> +            <code>nat(src=0.0.0.0)</code>. In this case, when a 
>> source port
>> +            collision is detected during the commit, the source port 
>> will be
>> +            translated to an ephemeral port. If there is no 
>> collision, no SNAT
>> +            is performed. Note that this is currently only 
>> implemented in the
>> +            Linux kernel datapath.
>> +          </p>
>> +
>>            <p>
>>              Open vSwitch 2.6 introduced <code>nat</code>.  Linux 4.6 
>> was the
>>              earliest upstream kernel that implemented 
>> <code>ct</code> support for
>> diff --git a/tests/system-kmod-macros.at 
>> b/tests/system-kmod-macros.at
>> index 15628a7c6..38bb1c55c 100644
>> --- a/tests/system-kmod-macros.at
>> +++ b/tests/system-kmod-macros.at
>> @@ -99,6 +99,13 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
>>  #
>>  m4_define([CHECK_CONNTRACK_NAT])
>>
>> +# CHECK_CONNTRACK_NULL_SNAT()
>> +#
>> +# Perform requirements checks for running conntrack SNAT NULL tests.
>> +# The kernel always supports NULL SNAT, so no check is needed.
>> +#
>> +m4_define([CHECK_CONNTRACK_NULL_SNAT])
>> +
>>  # CHECK_CONNTRACK_TIMEOUT()
>>  #
>>  # Perform requirements checks for running conntrack customized 
>> timeout tests.
>> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
>> index fb5b9a36d..1be425bb4 100644
>> --- a/tests/system-traffic.at
>> +++ b/tests/system-traffic.at
>> @@ -4433,6 +4433,51 @@ 
>> tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
>>  OVS_TRAFFIC_VSWITCHD_STOP
>>  AT_CLEANUP
>>
>> +
>> +AT_SETUP([conntrack - NULL SNAT])
>> +AT_SKIP_IF([test $HAVE_NC = no])
>> +CHECK_CONNTRACK()
>> +CHECK_CONNTRACK_NULL_SNAT()
>> +OVS_TRAFFIC_VSWITCHD_START()
>> +
>> +ADD_NAMESPACES(at_ns0, at_ns1)
>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>> +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])
>> +
>> +OVS_START_L7([at_ns1], [http])
>> +
>
> I noticed you use nc clients, is there any specific reason you 
> preferred
> httpd over something like:
>
> NETNS_DAEMONIZE([at_ns1], [nc -l -k 80 > /dev/null], [nc0.pid])

No specific reason other than the http server python script was used in 
all the other tests above.

>> +AT_DATA([flows.txt], [dnl
>> +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
>> +table=0,priority=20,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)
>> +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
>> +table=0,priority=10,arp,action=normal
>> +table=0,priority=1,action=drop
>> +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 
>> actions=ct(table=20,nat)
>> +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
>> +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
>> +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
>> +])
>> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
>> +
>> +dnl - Test to make sure src nat is NOT done when not needed
>> +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > 
>> nc-1.log])
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 
>> "orig=.src=10\.1\.1\.1,"], [0], [dnl
>> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
>> +])
>> +
>> +dnl - Test to make sure src nat is done when needed
>> +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > 
>> nc-2.log])
>> +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > 
>> nc-3.log])
>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep 
>> "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' 
>> -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], 
>> [0], [dnl
>> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
>> +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
>> +])
>> +
>> +OVS_TRAFFIC_VSWITCHD_STOP
>> +AT_CLEANUP
>> +
>> +
>>  AT_SETUP([conntrack - simple DNAT])
>>  CHECK_CONNTRACK()
>>  CHECK_CONNTRACK_NAT()
>> diff --git a/tests/system-userspace-macros.at 
>> b/tests/system-userspace-macros.at
>> index 34f82cee3..71acc8618 100644
>> --- a/tests/system-userspace-macros.at
>> +++ b/tests/system-userspace-macros.at
>> @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
>>  #
>>  m4_define([CHECK_CONNTRACK_NAT])
>>
>> +# CHECK_CONNTRACK_NULL_SNAT()
>> +#
>> +# Perform requirements checks for running conntrack SNAT NULL tests.
>> +# The userspace datapath does not support NULL SNAT.
>> +#
>> +m4_define([CHECK_CONNTRACK_NULL_SNAT],
>> +[
>> +    AT_SKIP_IF([:])
>> +])
>> +
>>  # CHECK_CONNTRACK_TIMEOUT()
>>  #
>>  # Perform requirements checks for running conntrack customized 
>> timeout tests.
Paolo Valerio March 30, 2021, 11:38 a.m. UTC | #3
"Eelco Chaudron" <echaudro@redhat.com> writes:

> On 22 Mar 2021, at 19:50, Paolo Valerio wrote:
>
>> Hi Eelco,
>>
>> Thanks for working on this, very useful indeed.
>> not a full review, but I have a question about a minor thing.
>>
>> Eelco Chaudron <echaudro@redhat.com> writes:
>>
>>> Currently, conntrack in the kernel has an undocumented feature 
>>> referred
>>> to as NULL SNAT. Basically, when a source port collision is detected
>>> during the commit, the source port will be translated to an ephemeral
>>> port. If there is no collision, no SNAT is performed.
>>>
>>> This patchset documents this behavior and adds a self-test to verify
>>> it's not changing.
>>>
>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>> ---
>>>  lib/ovs-actions.xml              |   10 ++++++++
>>>  tests/system-kmod-macros.at      |    7 ++++++
>>>  tests/system-traffic.at          |   45 
>>> ++++++++++++++++++++++++++++++++++++++
>>>  tests/system-userspace-macros.at |   10 ++++++++
>>>  4 files changed, 72 insertions(+)
>>>
>>> diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml
>>> index a2778de4b..a0070e6c6 100644
>>> --- a/lib/ovs-actions.xml
>>> +++ b/lib/ovs-actions.xml
>>> @@ -1833,6 +1833,16 @@ for <var>i</var> in [1,<var>n_members</var>]:
>>>              connection, will behave the same as a bare 
>>> <code>nat</code>.
>>>            </p>
>>>
>>> +          <p>
>>> +            For SNAT, there is a special case when the 
>>> <code>src</code> IP
>>> +            address is configured as all 0's, i.e.,
>>> +            <code>nat(src=0.0.0.0)</code>. In this case, when a 
>>> source port
>>> +            collision is detected during the commit, the source port 
>>> will be
>>> +            translated to an ephemeral port. If there is no 
>>> collision, no SNAT
>>> +            is performed. Note that this is currently only 
>>> implemented in the
>>> +            Linux kernel datapath.
>>> +          </p>
>>> +
>>>            <p>
>>>              Open vSwitch 2.6 introduced <code>nat</code>.  Linux 4.6 
>>> was the
>>>              earliest upstream kernel that implemented 
>>> <code>ct</code> support for
>>> diff --git a/tests/system-kmod-macros.at 
>>> b/tests/system-kmod-macros.at
>>> index 15628a7c6..38bb1c55c 100644
>>> --- a/tests/system-kmod-macros.at
>>> +++ b/tests/system-kmod-macros.at
>>> @@ -99,6 +99,13 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
>>>  #
>>>  m4_define([CHECK_CONNTRACK_NAT])
>>>
>>> +# CHECK_CONNTRACK_NULL_SNAT()
>>> +#
>>> +# Perform requirements checks for running conntrack SNAT NULL tests.
>>> +# The kernel always supports NULL SNAT, so no check is needed.
>>> +#
>>> +m4_define([CHECK_CONNTRACK_NULL_SNAT])
>>> +
>>>  # CHECK_CONNTRACK_TIMEOUT()
>>>  #
>>>  # Perform requirements checks for running conntrack customized 
>>> timeout tests.
>>> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
>>> index fb5b9a36d..1be425bb4 100644
>>> --- a/tests/system-traffic.at
>>> +++ b/tests/system-traffic.at
>>> @@ -4433,6 +4433,51 @@ 
>>> tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>  AT_CLEANUP
>>>
>>> +
>>> +AT_SETUP([conntrack - NULL SNAT])
>>> +AT_SKIP_IF([test $HAVE_NC = no])
>>> +CHECK_CONNTRACK()
>>> +CHECK_CONNTRACK_NULL_SNAT()
>>> +OVS_TRAFFIC_VSWITCHD_START()
>>> +
>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>>> +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])
>>> +
>>> +OVS_START_L7([at_ns1], [http])
>>> +
>>
>> I noticed you use nc clients, is there any specific reason you 
>> preferred
>> httpd over something like:
>>
>> NETNS_DAEMONIZE([at_ns1], [nc -l -k 80 > /dev/null], [nc0.pid])
>
> No specific reason other than the http server python script was used in 
> all the other tests above.
>

ok, thanks.

>>> +AT_DATA([flows.txt], [dnl
>>> +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
>>> +table=0,priority=20,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)

While at it, I was noticing the rule above.
This would be matched in both directions.
I think it would be better to split the logic here matching -rpl (just like the bare
nat rule below matches replies only).

WDYT?

>>> +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
>>> +table=0,priority=10,arp,action=normal
>>> +table=0,priority=1,action=drop
>>> +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 
>>> actions=ct(table=20,nat)
>>> +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
>>> +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
>>> +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
>>> +])
>>> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
>>> +
>>> +dnl - Test to make sure src nat is NOT done when not needed
>>> +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > 
>>> nc-1.log])
>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 
>>> "orig=.src=10\.1\.1\.1,"], [0], [dnl
>>> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
>>> +])
>>> +
>>> +dnl - Test to make sure src nat is done when needed
>>> +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > 
>>> nc-2.log])
>>> +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > 
>>> nc-3.log])
>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep 
>>> "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' 
>>> -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], 
>>> [0], [dnl
>>> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
>>> +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
>>> +])
>>> +
>>> +OVS_TRAFFIC_VSWITCHD_STOP
>>> +AT_CLEANUP
>>> +
>>> +
>>>  AT_SETUP([conntrack - simple DNAT])
>>>  CHECK_CONNTRACK()
>>>  CHECK_CONNTRACK_NAT()
>>> diff --git a/tests/system-userspace-macros.at 
>>> b/tests/system-userspace-macros.at
>>> index 34f82cee3..71acc8618 100644
>>> --- a/tests/system-userspace-macros.at
>>> +++ b/tests/system-userspace-macros.at
>>> @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
>>>  #
>>>  m4_define([CHECK_CONNTRACK_NAT])
>>>
>>> +# CHECK_CONNTRACK_NULL_SNAT()
>>> +#
>>> +# Perform requirements checks for running conntrack SNAT NULL tests.
>>> +# The userspace datapath does not support NULL SNAT.
>>> +#
>>> +m4_define([CHECK_CONNTRACK_NULL_SNAT],
>>> +[
>>> +    AT_SKIP_IF([:])
>>> +])
>>> +
>>>  # CHECK_CONNTRACK_TIMEOUT()
>>>  #
>>>  # Perform requirements checks for running conntrack customized 
>>> timeout tests.
Eelco Chaudron March 30, 2021, 12:22 p.m. UTC | #4
On 30 Mar 2021, at 13:38, Paolo Valerio wrote:

> "Eelco Chaudron" <echaudro@redhat.com> writes:
>
>> On 22 Mar 2021, at 19:50, Paolo Valerio wrote:
>>
>>> Hi Eelco,
>>>
>>> Thanks for working on this, very useful indeed.
>>> not a full review, but I have a question about a minor thing.
>>>
>>> Eelco Chaudron <echaudro@redhat.com> writes:
>>>
>>>> Currently, conntrack in the kernel has an undocumented feature
>>>> referred
>>>> to as NULL SNAT. Basically, when a source port collision is 
>>>> detected
>>>> during the commit, the source port will be translated to an 
>>>> ephemeral
>>>> port. If there is no collision, no SNAT is performed.
>>>>
>>>> This patchset documents this behavior and adds a self-test to 
>>>> verify
>>>> it's not changing.
>>>>
>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>> ---
>>>>  lib/ovs-actions.xml              |   10 ++++++++
>>>>  tests/system-kmod-macros.at      |    7 ++++++
>>>>  tests/system-traffic.at          |   45
>>>> ++++++++++++++++++++++++++++++++++++++
>>>>  tests/system-userspace-macros.at |   10 ++++++++
>>>>  4 files changed, 72 insertions(+)
>>>>
>>>> diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml
>>>> index a2778de4b..a0070e6c6 100644
>>>> --- a/lib/ovs-actions.xml
>>>> +++ b/lib/ovs-actions.xml
>>>> @@ -1833,6 +1833,16 @@ for <var>i</var> in 
>>>> [1,<var>n_members</var>]:
>>>>              connection, will behave the same as a bare
>>>> <code>nat</code>.
>>>>            </p>
>>>>
>>>> +          <p>
>>>> +            For SNAT, there is a special case when the
>>>> <code>src</code> IP
>>>> +            address is configured as all 0's, i.e.,
>>>> +            <code>nat(src=0.0.0.0)</code>. In this case, when a
>>>> source port
>>>> +            collision is detected during the commit, the source 
>>>> port
>>>> will be
>>>> +            translated to an ephemeral port. If there is no
>>>> collision, no SNAT
>>>> +            is performed. Note that this is currently only
>>>> implemented in the
>>>> +            Linux kernel datapath.
>>>> +          </p>
>>>> +
>>>>            <p>
>>>>              Open vSwitch 2.6 introduced <code>nat</code>.  Linux 
>>>> 4.6
>>>> was the
>>>>              earliest upstream kernel that implemented
>>>> <code>ct</code> support for
>>>> diff --git a/tests/system-kmod-macros.at
>>>> b/tests/system-kmod-macros.at
>>>> index 15628a7c6..38bb1c55c 100644
>>>> --- a/tests/system-kmod-macros.at
>>>> +++ b/tests/system-kmod-macros.at
>>>> @@ -99,6 +99,13 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
>>>>  #
>>>>  m4_define([CHECK_CONNTRACK_NAT])
>>>>
>>>> +# CHECK_CONNTRACK_NULL_SNAT()
>>>> +#
>>>> +# Perform requirements checks for running conntrack SNAT NULL 
>>>> tests.
>>>> +# The kernel always supports NULL SNAT, so no check is needed.
>>>> +#
>>>> +m4_define([CHECK_CONNTRACK_NULL_SNAT])
>>>> +
>>>>  # CHECK_CONNTRACK_TIMEOUT()
>>>>  #
>>>>  # Perform requirements checks for running conntrack customized
>>>> timeout tests.
>>>> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
>>>> index fb5b9a36d..1be425bb4 100644
>>>> --- a/tests/system-traffic.at
>>>> +++ b/tests/system-traffic.at
>>>> @@ -4433,6 +4433,51 @@
>>>> tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
>>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>>  AT_CLEANUP
>>>>
>>>> +
>>>> +AT_SETUP([conntrack - NULL SNAT])
>>>> +AT_SKIP_IF([test $HAVE_NC = no])
>>>> +CHECK_CONNTRACK()
>>>> +CHECK_CONNTRACK_NULL_SNAT()
>>>> +OVS_TRAFFIC_VSWITCHD_START()
>>>> +
>>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
>>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>>>> +NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])
>>>> +
>>>> +OVS_START_L7([at_ns1], [http])
>>>> +
>>>
>>> I noticed you use nc clients, is there any specific reason you
>>> preferred
>>> httpd over something like:
>>>
>>> NETNS_DAEMONIZE([at_ns1], [nc -l -k 80 > /dev/null], [nc0.pid])
>>
>> No specific reason other than the http server python script was used 
>> in
>> all the other tests above.
>>
>
> ok, thanks.
>
>>>> +AT_DATA([flows.txt], [dnl
>>>> +table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
>>>> +table=0,priority=20,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)
>
> While at it, I was noticing the rule above.
> This would be matched in both directions.
> I think it would be better to split the logic here matching -rpl (just 
> like the bare
> nat rule below matches replies only).
>
> WDYT?

Makes sense as it aligns with the openshift SDN solution, will send out 
a v2.

>>>> +table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
>>>> +table=0,priority=10,arp,action=normal
>>>> +table=0,priority=1,action=drop
>>>> +table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24
>>>> actions=ct(table=20,nat)
>>>> +table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
>>>> +table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
>>>> +table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
>>>> +])
>>>> +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
>>>> +
>>>> +dnl - Test to make sure src nat is NOT done when not needed
>>>> +NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 >
>>>> nc-1.log])
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep
>>>> "orig=.src=10\.1\.1\.1,"], [0], [dnl
>>>> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
>>>> +])
>>>> +
>>>> +dnl - Test to make sure src nat is done when needed
>>>> +NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 >
>>>> nc-2.log])
>>>> +NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 >
>>>> nc-3.log])
>>>> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep
>>>> "orig=.src=10\.1\.1\.1," | sed -e 
>>>> 's/port=30001/port=<clnt_s_port>/g'
>>>> -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | 
>>>> sort],
>>>> [0], [dnl
>>>> +tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
>>>> +tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
>>>> +])
>>>> +
>>>> +OVS_TRAFFIC_VSWITCHD_STOP
>>>> +AT_CLEANUP
>>>> +
>>>> +
>>>>  AT_SETUP([conntrack - simple DNAT])
>>>>  CHECK_CONNTRACK()
>>>>  CHECK_CONNTRACK_NAT()
>>>> diff --git a/tests/system-userspace-macros.at
>>>> b/tests/system-userspace-macros.at
>>>> index 34f82cee3..71acc8618 100644
>>>> --- a/tests/system-userspace-macros.at
>>>> +++ b/tests/system-userspace-macros.at
>>>> @@ -96,6 +96,16 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
>>>>  #
>>>>  m4_define([CHECK_CONNTRACK_NAT])
>>>>
>>>> +# CHECK_CONNTRACK_NULL_SNAT()
>>>> +#
>>>> +# Perform requirements checks for running conntrack SNAT NULL 
>>>> tests.
>>>> +# The userspace datapath does not support NULL SNAT.
>>>> +#
>>>> +m4_define([CHECK_CONNTRACK_NULL_SNAT],
>>>> +[
>>>> +    AT_SKIP_IF([:])
>>>> +])
>>>> +
>>>>  # CHECK_CONNTRACK_TIMEOUT()
>>>>  #
>>>>  # Perform requirements checks for running conntrack customized
>>>> timeout tests.
diff mbox series

Patch

diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml
index a2778de4b..a0070e6c6 100644
--- a/lib/ovs-actions.xml
+++ b/lib/ovs-actions.xml
@@ -1833,6 +1833,16 @@  for <var>i</var> in [1,<var>n_members</var>]:
             connection, will behave the same as a bare <code>nat</code>.
           </p>
 
+          <p>
+            For SNAT, there is a special case when the <code>src</code> IP
+            address is configured as all 0's, i.e.,
+            <code>nat(src=0.0.0.0)</code>. In this case, when a source port
+            collision is detected during the commit, the source port will be
+            translated to an ephemeral port. If there is no collision, no SNAT
+            is performed. Note that this is currently only implemented in the
+            Linux kernel datapath.
+          </p>
+
           <p>
             Open vSwitch 2.6 introduced <code>nat</code>.  Linux 4.6 was the
             earliest upstream kernel that implemented <code>ct</code> support for
diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
index 15628a7c6..38bb1c55c 100644
--- a/tests/system-kmod-macros.at
+++ b/tests/system-kmod-macros.at
@@ -99,6 +99,13 @@  m4_define([CHECK_CONNTRACK_FRAG_OVERLAP],
 #
 m4_define([CHECK_CONNTRACK_NAT])
 
+# CHECK_CONNTRACK_NULL_SNAT()
+#
+# Perform requirements checks for running conntrack SNAT NULL tests.
+# The kernel always supports NULL SNAT, so no check is needed.
+#
+m4_define([CHECK_CONNTRACK_NULL_SNAT])
+
 # CHECK_CONNTRACK_TIMEOUT()
 #
 # Perform requirements checks for running conntrack customized timeout tests.
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index fb5b9a36d..1be425bb4 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -4433,6 +4433,51 @@  tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src=
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+
+AT_SETUP([conntrack - NULL SNAT])
+AT_SKIP_IF([test $HAVE_NC = no])
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NULL_SNAT()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+NS_CHECK_EXEC([at_ns0], [ip route add 172.1.1.0/24 via 10.1.1.2])
+
+OVS_START_L7([at_ns1], [http])
+
+AT_DATA([flows.txt], [dnl
+table=0,priority=30,ct_state=-trk,ip,action=ct(table=0)
+table=0,priority=20,ip,nw_dst=10.1.1.0/24,actions=ct(commit,nat(src=0.0.0.0),table=10)
+table=0,priority=20,ip,nw_dst=172.1.1.2,actions=ct(commit,nat(dst=10.1.1.2),table=10)
+table=0,priority=10,arp,action=normal
+table=0,priority=1,action=drop
+table=10,priority=20,ct_state=+rpl,ip,nw_dst=10.1.1.0/24 actions=ct(table=20,nat)
+table=10,priority=10,ip,nw_dst=10.1.1.0/24 actions=resubmit(,20)
+table=20,priority=10,ip,nw_dst=10.1.1.1,action=1
+table=20,priority=10,ip,nw_dst=10.1.1.2,action=2
+])
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+dnl - Test to make sure src nat is NOT done when not needed
+NS_CHECK_EXEC([at_ns0], [echo "TEST" | nc -p 30000 10.1.1.2 80 > nc-1.log])
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.1,"], [0], [dnl
+tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=30000,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=30000),protoinfo=(state=TIME_WAIT)
+])
+
+dnl - Test to make sure src nat is done when needed
+NS_CHECK_EXEC([at_ns0], [echo "TEST2" | nc -p 30001 172.1.1.2 80 > nc-2.log])
+NS_CHECK_EXEC([at_ns0], [echo "TEST3" | nc -p 30001 10.1.1.2 80 > nc-3.log])
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep 30001 | grep "orig=.src=10\.1\.1\.1," | sed -e 's/port=30001/port=<clnt_s_port>/g' -e 's/sport=80,dport=[[0-9]]\+/sport=80,dport=<rnd_port>/g' | sort], [0], [dnl
+tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<rnd_port>),protoinfo=(state=TIME_WAIT)
+tcp,orig=(src=10.1.1.1,dst=172.1.1.2,sport=<clnt_s_port>,dport=80),reply=(src=10.1.1.2,dst=10.1.1.1,sport=80,dport=<clnt_s_port>),protoinfo=(state=TIME_WAIT)
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+
 AT_SETUP([conntrack - simple DNAT])
 CHECK_CONNTRACK()
 CHECK_CONNTRACK_NAT()
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index 34f82cee3..71acc8618 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -96,6 +96,16 @@  m4_define([CHECK_CONNTRACK_FRAG_OVERLAP])
 #
 m4_define([CHECK_CONNTRACK_NAT])
 
+# CHECK_CONNTRACK_NULL_SNAT()
+#
+# Perform requirements checks for running conntrack SNAT NULL tests.
+# The userspace datapath does not support NULL SNAT.
+#
+m4_define([CHECK_CONNTRACK_NULL_SNAT],
+[
+    AT_SKIP_IF([:])
+])
+
 # CHECK_CONNTRACK_TIMEOUT()
 #
 # Perform requirements checks for running conntrack customized timeout tests.