diff mbox

[ovs-dev,ovs-discuss] ovn: unsnat handling error for Distributed Gateway

Message ID 4fe04ad3-5fec-8c51-42cf-cb6aafab743b@dtdream.com
State Not Applicable
Headers show

Commit Message

Guoshuai Li April 18, 2017, 11:31 a.m. UTC
>
> On Sun, Apr 9, 2017 at 3:23 PM, Mickey Spiegel <mickeys.dev@gmail.com 
> <mailto:mickeys.dev@gmail.com>> wrote:
>
>
>
>     On Thu, Apr 6, 2017 at 7:34 AM, Guoshuai Li <ligs@dtdream.com
>     <mailto:ligs@dtdream.com>> wrote:
>
>
>         revese my topology:
>
>          +---------+--------+
>                                      |  VM 172.16.1.7  |
>          +---------+--------+
>                                                |
>          +---------+--------+
>                                      |  Logical Switch  |
>          +---------+--------+
>          |172.16.1.254
>                       10.157.142.3 +-----------+--------+
>                   +----------------+  Logical Router 1  +
>                   | +--------------------+
>         +---------+--------+
>         |  Logical Switch  |
>         +------------------+
>                   | +--------------------+
>                   +----------------+  Logical Router 2  |
>                       10.157.142.1 +--------------------+
>
>
>             Hi All, I am having a problem for ovn and need help, thanks.
>
>
>             I created two logical routes and connected the two
>             LogicalRoutes through a external LogicalSwitch (connected
>             to the external network) .
>
>             And then LogicalRoute-1 connected to the VM through the
>             internal LogicalSwitch .
>
>             my topology:
>
>                                   10.157.142.3       172.16.1.254
>              +--------------------+ +---------+--------+
>              +---------+--------+
>                       +----------------+  Logical Router 1
>             +------------------|  Logical Switch 
>             +-------------------+ VM 172.16.1.7   |
>                       | +--------------------+ +------------------+
>              +------------------+
>             +---------+--------+
>             |  Logical Switch  |
>             +------------------+
>                       | +--------------------+
>                       +----------------+  Logical Router 2  |
>              +--------------------+
>                                   10.157.142.1
>
>             I tested the master and Branch2.7, it Can not be
>             transferred from VM (172.16.1.7) to LogicaRouter-2 's port
>             (10.157.142.
>
>             Sorry, The destination address is 10.157.142.1, And The
>         SNAT/unSNAT address is 10.157.142.3.
>
>             ) via ping.
>             My logical router is a distributed gateway, and the two
>             logical router ports that connect external LogicalSwitch
>             are on the same chassis.
>             If the two logical router ports are not on the same
>             chassis ping is also OK, And ping from VM (172.16.1.7) to
>             external network is also OK.
>
>             I looked at the openflow tables on gateway chassis,  I
>             suspected unsnat handling error in Router1 input for icmp
>             replay.
>             I think it is necessary to replace the destination address
>             10.157.142.3 with 172.16.1.7 in Table 19 and route
>             172.16.1.7 in Table 21, but now the route match is
>             10.157.142.0/24 <http://10.157.142.0/24>.
>
>             cookie=0x92bd0055, duration=68.468s, table=16,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=50,reg14=0x4,metadata=0x7,dl_dst=fa:16:3e:58:1c:8a
>             actions=resubmit(,17)
>             cookie=0x45765344, duration=68.467s, table=17,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=0,metadata=0x7 actions=resubmit(,18)
>             cookie=0xaeaaed29, duration=68.479s, table=18,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=0,metadata=0x7 actions=resubmit(,19)
>             cookie=0xce785d51, duration=68.479s, table=19,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=100,ip,reg14=0x4,metadata=0x7,nw_dst=10.157.142.3
>             actions=ct(table=20,zone=NXM_NX_REG12[0..15],nat)
>             cookie=0xbd994421, duration=68.481s, table=20,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=0,metadata=0x7 actions=resubmit(,21)
>             cookie=0xaea3a6ae, duration=68.479s, table=21,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=49,ip,metadata=0x7,nw_dst=10.157.142.0/24
>             <http://10.157.142.0/24>
>             actions=dec_ttl(),move:NXM_OF_IP_DST[]->NXM_NX_XXREG0[96..127],load:0xa9d8e03->NXM_NX_XXREG0[64..95],mod_dl_src:fa:16:3e:58:1c:8a,load:0x4->NXM_NX_REG15[],load:0x1->NXM_NX_REG10[0],resubmit(,22)
>             cookie=0xce6e8d4e, duration=68.482s, table=22,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=0,ip,metadata=0x7
>             actions=push:NXM_NX_REG0[],push:NXM_NX_XXREG0[96..127],pop:NXM_NX_REG0[],mod_dl_dst:00:00:00:00:00:00,resubmit(,66),pop:NXM_NX_REG0[],resubmit(,23)
>             cookie=0xce89c4ed, duration=68.481s, table=23,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=150,reg15=0x4,metadata=0x7,dl_dst=00:00:00:00:00:00
>             actions=load:0x5->NXM_NX_REG15[],resubmit(,24)
>             cookie=0xb2d84350, duration=68.469s, table=24,
>             n_packets=1, n_bytes=98, idle_age=36,
>             priority=100,ip,metadata=0x7,dl_dst=00:00:00:00:00:00
>
>             I do not know why and need help, thanks.
>
>
>     I was able to reproduce this. I agree with your analysis. Looking
>     at ovs-ofctl dump-flows, the packet counts indicate that the
>     packet is subject to ct(...,nat), but the routing table match is
>     as if NAT never occurred.
>
>     I tried with gateway routers and it worked. There are some
>     differences in ovs-dpctl dump-flows.
>
>     For the case of gateway routers:
>
>     vagrant@compute2:~$ sudo ovs-dpctl dump-flows
>
>     recirc_id(0x14),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:00:00:00:00/01:00:00:00:00:00,dst=00:00:02:02:03:04),eth_type(0x0800),ipv4(src=172.16.1.3,dst=172.16.1.10,proto=1,ttl=62,frag=no),icmp(type=8,code=0),
>     packets:3, bytes:294, used:1.981s,
>     actions:userspace(pid=2658598031,slow_path(action))
>
>     recirc_id(0x16),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({class=0x102,type=0x80,len=4,0x10002}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:02:03:04,dst=00:00:02:01:02:03),eth_type(0x0800),ipv4(src=172.16.1.10,dst=192.168.1.2,tos=0/0x3,ttl=254,frag=no),
>     packets:3, bytes:294, used:1.981s,
>     actions:set(tunnel(tun_id=0x3,dst=192.168.33.31,ttl=64,tp_src=24284,tp_dst=6081,geneve({class=0x102,type=0x80,len=4,0x10002}),flags(df|csum|key))),set(eth(src=00:00:01:01:02:03,dst=f0:00:00:01:02:03)),set(ipv4(src=172.16.1.10,dst=192.168.1.2,tos=0/0x3,ttl=252)),4
>
>     recirc_id(0),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({class=0x102,type=0x80,len=4,0x10002/0x7fffffff}),flags(-df+csum+key)),in_port(4),eth(src=00:00:00:00:00:00/01:00:00:00:00:00,dst=00:00:04:01:02:04),eth_type(0x0800),ipv4(src=192.168.1.2/255.255.255.254,dst=172.16.1.10,proto=1,ttl=63,frag=no
>     <http://192.168.1.2/255.255.255.254,dst=172.16.1.10,proto=1,ttl=63,frag=no>),
>     packets:3, bytes:294, used:1.981s, actions:ct(zone=1,nat),recirc(0x13)
>
>     recirc_id(0x15),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:01:02:03,dst=00:00:02:02:03:04),eth_type(0x0800),ipv4(src=172.16.1.10,dst=172.16.1.3,proto=1,ttl=255,frag=no),
>     packets:3, bytes:294, used:1.981s,
>     actions:set(eth(src=00:00:02:02:03:04,dst=00:00:02:01:02:03)),set(ipv4(src=172.16.1.10,dst=172.16.1.3,ttl=254)),ct(zone=2,nat),ct(commit,zone=1,nat(dst=192.168.1.2)),recirc(0x16)
>
>     recirc_id(0x13),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:04:01:02:03,dst=00:00:04:01:02:04),eth_type(0x0800),ipv4(src=192.168.1.2,dst=172.16.1.10,ttl=63,frag=no),
>     packets:3, bytes:294, used:1.981s,
>     actions:set(eth(src=00:00:02:01:02:03,dst=00:00:02:02:03:04)),set(ipv4(src=192.168.1.2,dst=172.16.1.10,ttl=62)),ct(commit,zone=2,nat(src=172.16.1.3)),recirc(0x14)
>
>     Note that recirc_id(0x15) goes to ct() actions after the ICMP
>     response is generated in the slow path, which is required for ICMP
>     type change.
>
>
>     With distributed routers and distributed gateway ports:
>
>     vagrant@compute2:~$ sudo ovs-dpctl dump-flows
>
>     recirc_id(0),tunnel(tun_id=0x1,src=192.168.33.31,dst=192.168.33.32,geneve({class=0x102,type=0x80,len=4,0x10004/0x7fffffff}),flags(-df+csum+key)),in_port(4),eth_type(0x0800),ipv4(src=192.168.1.3,frag=no),
>     packets:3, bytes:294, used:2.388s,
>     actions:ct(commit,zone=3,nat(src=172.16.1.1)),recirc(0x3)
>
>     recirc_id(0x3),tunnel(tun_id=0x1,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:01:02:03,dst=00:00:02:01:02:04),eth_type(0x0800),ipv4(src=172.16.1.1,dst=172.16.1.10,proto=1,ttl=63,frag=no),icmp(type=8,code=0),
>     packets:3, bytes:294, used:2.389s,
>     actions:userspace(pid=2248102802,slow_path(action))
>
>     recirc_id(0x4),tunnel(tun_id=0x1,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:01:02:04,dst=00:00:02:01:02:03),eth_type(0x0800),ipv4(dst=172.16.1.1,ttl=254,frag=no),
>     packets:3, bytes:294, used:2.389s,
>     actions:userspace(pid=2248102802,slow_path(controller))
>
>     The recirc_id(0x4) entry ends up with the slow_path(controller)
>     action resulting from table 24. I do not yet know why the earlier
>     ct() actions from table 19 were skipped.
>
> The problems have to do with hitting NAT actions when already in the 
> slow path due to the change of ICMP type.
>
> When pinging from either a gateway router or a distributed router with 
> a distributed gateway port, to the IP address of another gateway 
> router, the ping works. On the second gateway router, the hit in table 
> 17 to generate the ICMP echo reply forces slow path due to the action 
> (change of ICMP type). On gateway routers, all IP packets are subject 
> to recirc in the DNAT pipeline stage (table 20), even though the ICMP 
> echo reply would never hit a DNAT entry. The recirc forced by this 
> action ends the slow path processing. By the time the packet gets back 
> to the first router (gateway router or distributed router), the packet 
> is no longer in the slow path and NAT succeeds as it should.
>
> When pinging from either a gateway router or a distributed router with 
> a distributed gateway port, to the IP address of another distributed 
> router with a distributed gateway port, the ping fails. There is no 
> recirc between the hit on the second gateway router in table 17 to 
> generate the ICMP echo reply (which forces slow path due to the ICMP 
> type change), and the reply packet's hit of the UNSNAT rule in table 
> 19 of the first router (gateway router or distributed router). The 
> latter does not find a match, and the packet ends up hitting the table 
> 24 slow path controller entry that applies when ARP could not resolve 
> a MAC address.
>
> Discussing this with Guru on IRC earlier today, the suggestion was to 
> create a system test in tests/system-traffic.at 
> <http://system-traffic.at> (without OVN) that demonstrates the 
> problem. From one namespace trying to ping a virtual IP, there should 
> be flows to apply SNAT, then apply clone and ct_clear (emulating the 
> behavior of OVN patch ports), then generate the ICMP echo reply just 
> like OVN does, back through clone and ct_clear, then UNSNAT, then back 
> to the source. Once we can demonstrate the problem in this way, then 
> we can ask for further help to debug this.
>
> Mickey
>
>     Mickey
>
>
>

I'm sorry to have finished writing so long.

this is my test case patch:

 From 916a47b11316786c2fd3ccddbc564dd56bfbd665 Mon Sep 17 00:00:00 2001
From: Guoshuai Li <ligs@dtdream.com>
Date: Tue, 18 Apr 2017 19:15:54 +0800
Subject: [PATCH] system-ovn.at: Add test for ping other router's port on
  distributed router

Signed-off-by: Guoshuai Li <ligs@dtdream.com>
---
  tests/system-ovn.at     | 101 
++++++++++++++++++++++++++++++++++++++++++++++++
  tests/system-traffic.at |  20 ++++++++++
  2 files changed, 121 insertions(+)

+
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br0, "192.168.1.2/24", "f0:00:00:01:02:01", 
"192.168.1.1")
+NS_CHECK_EXEC([foo1], [arp -s 192.168.1.1 00:00:01:01:02:01])
+
+NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.2 | 
FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP

Comments

Guoshuai Li April 18, 2017, 11:52 a.m. UTC | #1
Sorry, This patch "git am" error, Please use this:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331034.html

>
>>
>> On Sun, Apr 9, 2017 at 3:23 PM, Mickey Spiegel <mickeys.dev@gmail.com 
>> <mailto:mickeys.dev@gmail.com>> wrote:
>>
>>
>>
>>     On Thu, Apr 6, 2017 at 7:34 AM, Guoshuai Li <ligs@dtdream.com
>>     <mailto:ligs@dtdream.com>> wrote:
>>
>>
>>         revese my topology:
>>
>>          +---------+--------+
>>                                      |  VM 172.16.1.7  |
>>          +---------+--------+
>>                                                |
>>          +---------+--------+
>>                                      |  Logical Switch  |
>>          +---------+--------+
>>          |172.16.1.254
>>                       10.157.142.3 +-----------+--------+
>>                   +----------------+  Logical Router 1  +
>>                   | +--------------------+
>>         +---------+--------+
>>         |  Logical Switch  |
>>         +------------------+
>>                   | +--------------------+
>>                   +----------------+  Logical Router 2  |
>>                       10.157.142.1 +--------------------+
>>
>>
>>             Hi All, I am having a problem for ovn and need help, thanks.
>>
>>
>>             I created two logical routes and connected the two
>>             LogicalRoutes through a external LogicalSwitch (connected
>>             to the external network) .
>>
>>             And then LogicalRoute-1 connected to the VM through the
>>             internal LogicalSwitch .
>>
>>             my topology:
>>
>>                                   10.157.142.3         172.16.1.254
>>              +--------------------+ +---------+--------+
>>              +---------+--------+
>>                       +----------------+  Logical Router 1
>>             +------------------|  Logical Switch 
>>             +-------------------+ VM 172.16.1.7   |
>>                       | +--------------------+ +------------------+
>>              +------------------+
>>             +---------+--------+
>>             |  Logical Switch  |
>>             +------------------+
>>                       | +--------------------+
>>                       +----------------+  Logical Router 2  |
>>              +--------------------+
>>                                   10.157.142.1
>>
>>             I tested the master and Branch2.7, it Can not be
>>             transferred from VM (172.16.1.7) to LogicaRouter-2 's
>>             port (10.157.142.
>>
>>             Sorry, The destination address is 10.157.142.1, And The
>>         SNAT/unSNAT address is 10.157.142.3.
>>
>>             ) via ping.
>>             My logical router is a distributed gateway, and the two
>>             logical router ports that connect external LogicalSwitch
>>             are on the same chassis.
>>             If the two logical router ports are not on the same
>>             chassis ping is also OK, And ping from VM (172.16.1.7) to
>>             external network is also OK.
>>
>>             I looked at the openflow tables on gateway chassis,  I
>>             suspected unsnat handling error in Router1 input for icmp
>>             replay.
>>             I think it is necessary to replace the destination
>>             address 10.157.142.3 with 172.16.1.7 in Table 19 and
>>             route 172.16.1.7 in Table 21, but now the route match is
>>             10.157.142.0/24 <http://10.157.142.0/24>.
>>
>>             cookie=0x92bd0055, duration=68.468s, table=16,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=50,reg14=0x4,metadata=0x7,dl_dst=fa:16:3e:58:1c:8a
>>             actions=resubmit(,17)
>>             cookie=0x45765344, duration=68.467s, table=17,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=0,metadata=0x7 actions=resubmit(,18)
>>             cookie=0xaeaaed29, duration=68.479s, table=18,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=0,metadata=0x7 actions=resubmit(,19)
>>             cookie=0xce785d51, duration=68.479s, table=19,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=100,ip,reg14=0x4,metadata=0x7,nw_dst=10.157.142.3
>>             actions=ct(table=20,zone=NXM_NX_REG12[0..15],nat)
>>             cookie=0xbd994421, duration=68.481s, table=20,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=0,metadata=0x7 actions=resubmit(,21)
>>             cookie=0xaea3a6ae, duration=68.479s, table=21,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=49,ip,metadata=0x7,nw_dst=10.157.142.0/24
>>             <http://10.157.142.0/24>
>>             actions=dec_ttl(),move:NXM_OF_IP_DST[]->NXM_NX_XXREG0[96..127],load:0xa9d8e03->NXM_NX_XXREG0[64..95],mod_dl_src:fa:16:3e:58:1c:8a,load:0x4->NXM_NX_REG15[],load:0x1->NXM_NX_REG10[0],resubmit(,22)
>>             cookie=0xce6e8d4e, duration=68.482s, table=22,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=0,ip,metadata=0x7
>>             actions=push:NXM_NX_REG0[],push:NXM_NX_XXREG0[96..127],pop:NXM_NX_REG0[],mod_dl_dst:00:00:00:00:00:00,resubmit(,66),pop:NXM_NX_REG0[],resubmit(,23)
>>             cookie=0xce89c4ed, duration=68.481s, table=23,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=150,reg15=0x4,metadata=0x7,dl_dst=00:00:00:00:00:00
>>             actions=load:0x5->NXM_NX_REG15[],resubmit(,24)
>>             cookie=0xb2d84350, duration=68.469s, table=24,
>>             n_packets=1, n_bytes=98, idle_age=36,
>>             priority=100,ip,metadata=0x7,dl_dst=00:00:00:00:00:00
>>
>>             I do not know why and need help, thanks.
>>
>>
>>     I was able to reproduce this. I agree with your analysis. Looking
>>     at ovs-ofctl dump-flows, the packet counts indicate that the
>>     packet is subject to ct(...,nat), but the routing table match is
>>     as if NAT never occurred.
>>
>>     I tried with gateway routers and it worked. There are some
>>     differences in ovs-dpctl dump-flows.
>>
>>     For the case of gateway routers:
>>
>>     vagrant@compute2:~$ sudo ovs-dpctl dump-flows
>>
>>     recirc_id(0x14),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:00:00:00:00/01:00:00:00:00:00,dst=00:00:02:02:03:04),eth_type(0x0800),ipv4(src=172.16.1.3,dst=172.16.1.10,proto=1,ttl=62,frag=no),icmp(type=8,code=0),
>>     packets:3, bytes:294, used:1.981s,
>>     actions:userspace(pid=2658598031,slow_path(action))
>>
>>     recirc_id(0x16),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({class=0x102,type=0x80,len=4,0x10002}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:02:03:04,dst=00:00:02:01:02:03),eth_type(0x0800),ipv4(src=172.16.1.10,dst=192.168.1.2,tos=0/0x3,ttl=254,frag=no),
>>     packets:3, bytes:294, used:1.981s,
>>     actions:set(tunnel(tun_id=0x3,dst=192.168.33.31,ttl=64,tp_src=24284,tp_dst=6081,geneve({class=0x102,type=0x80,len=4,0x10002}),flags(df|csum|key))),set(eth(src=00:00:01:01:02:03,dst=f0:00:00:01:02:03)),set(ipv4(src=172.16.1.10,dst=192.168.1.2,tos=0/0x3,ttl=252)),4
>>
>>     recirc_id(0),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({class=0x102,type=0x80,len=4,0x10002/0x7fffffff}),flags(-df+csum+key)),in_port(4),eth(src=00:00:00:00:00:00/01:00:00:00:00:00,dst=00:00:04:01:02:04),eth_type(0x0800),ipv4(src=192.168.1.2/255.255.255.254,dst=172.16.1.10,proto=1,ttl=63,frag=no
>>     <http://192.168.1.2/255.255.255.254,dst=172.16.1.10,proto=1,ttl=63,frag=no>),
>>     packets:3, bytes:294, used:1.981s,
>>     actions:ct(zone=1,nat),recirc(0x13)
>>
>>     recirc_id(0x15),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:01:02:03,dst=00:00:02:02:03:04),eth_type(0x0800),ipv4(src=172.16.1.10,dst=172.16.1.3,proto=1,ttl=255,frag=no),
>>     packets:3, bytes:294, used:1.981s,
>>     actions:set(eth(src=00:00:02:02:03:04,dst=00:00:02:01:02:03)),set(ipv4(src=172.16.1.10,dst=172.16.1.3,ttl=254)),ct(zone=2,nat),ct(commit,zone=1,nat(dst=192.168.1.2)),recirc(0x16)
>>
>>     recirc_id(0x13),tunnel(tun_id=0x6,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:04:01:02:03,dst=00:00:04:01:02:04),eth_type(0x0800),ipv4(src=192.168.1.2,dst=172.16.1.10,ttl=63,frag=no),
>>     packets:3, bytes:294, used:1.981s,
>>     actions:set(eth(src=00:00:02:01:02:03,dst=00:00:02:02:03:04)),set(ipv4(src=192.168.1.2,dst=172.16.1.10,ttl=62)),ct(commit,zone=2,nat(src=172.16.1.3)),recirc(0x14)
>>
>>     Note that recirc_id(0x15) goes to ct() actions after the ICMP
>>     response is generated in the slow path, which is required for
>>     ICMP type change.
>>
>>
>>     With distributed routers and distributed gateway ports:
>>
>>     vagrant@compute2:~$ sudo ovs-dpctl dump-flows
>>
>>     recirc_id(0),tunnel(tun_id=0x1,src=192.168.33.31,dst=192.168.33.32,geneve({class=0x102,type=0x80,len=4,0x10004/0x7fffffff}),flags(-df+csum+key)),in_port(4),eth_type(0x0800),ipv4(src=192.168.1.3,frag=no),
>>     packets:3, bytes:294, used:2.388s,
>>     actions:ct(commit,zone=3,nat(src=172.16.1.1)),recirc(0x3)
>>
>>     recirc_id(0x3),tunnel(tun_id=0x1,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:01:02:03,dst=00:00:02:01:02:04),eth_type(0x0800),ipv4(src=172.16.1.1,dst=172.16.1.10,proto=1,ttl=63,frag=no),icmp(type=8,code=0),
>>     packets:3, bytes:294, used:2.389s,
>>     actions:userspace(pid=2248102802,slow_path(action))
>>
>>     recirc_id(0x4),tunnel(tun_id=0x1,src=192.168.33.31,dst=192.168.33.32,geneve({}{}),flags(-df+csum+key)),in_port(4),eth(src=00:00:02:01:02:04,dst=00:00:02:01:02:03),eth_type(0x0800),ipv4(dst=172.16.1.1,ttl=254,frag=no),
>>     packets:3, bytes:294, used:2.389s,
>>     actions:userspace(pid=2248102802,slow_path(controller))
>>
>>     The recirc_id(0x4) entry ends up with the slow_path(controller)
>>     action resulting from table 24. I do not yet know why the earlier
>>     ct() actions from table 19 were skipped.
>>
>> The problems have to do with hitting NAT actions when already in the 
>> slow path due to the change of ICMP type.
>>
>> When pinging from either a gateway router or a distributed router 
>> with a distributed gateway port, to the IP address of another gateway 
>> router, the ping works. On the second gateway router, the hit in 
>> table 17 to generate the ICMP echo reply forces slow path due to the 
>> action (change of ICMP type). On gateway routers, all IP packets are 
>> subject to recirc in the DNAT pipeline stage (table 20), even though 
>> the ICMP echo reply would never hit a DNAT entry. The recirc forced 
>> by this action ends the slow path processing. By the time the packet 
>> gets back to the first router (gateway router or distributed router), 
>> the packet is no longer in the slow path and NAT succeeds as it should.
>>
>> When pinging from either a gateway router or a distributed router 
>> with a distributed gateway port, to the IP address of another 
>> distributed router with a distributed gateway port, the ping fails. 
>> There is no recirc between the hit on the second gateway router in 
>> table 17 to generate the ICMP echo reply (which forces slow path due 
>> to the ICMP type change), and the reply packet's hit of the UNSNAT 
>> rule in table 19 of the first router (gateway router or distributed 
>> router). The latter does not find a match, and the packet ends up 
>> hitting the table 24 slow path controller entry that applies when ARP 
>> could not resolve a MAC address.
>>
>> Discussing this with Guru on IRC earlier today, the suggestion was to 
>> create a system test in tests/system-traffic.at 
>> <http://system-traffic.at> (without OVN) that demonstrates the 
>> problem. From one namespace trying to ping a virtual IP, there should 
>> be flows to apply SNAT, then apply clone and ct_clear (emulating the 
>> behavior of OVN patch ports), then generate the ICMP echo reply just 
>> like OVN does, back through clone and ct_clear, then UNSNAT, then 
>> back to the source. Once we can demonstrate the problem in this way, 
>> then we can ask for further help to debug this.
>>
>> Mickey
>>
>>     Mickey
>>
>>
>>
>
> I'm sorry to have finished writing so long.
>
> this is my test case patch:
>
> From 916a47b11316786c2fd3ccddbc564dd56bfbd665 Mon Sep 17 00:00:00 2001
> From: Guoshuai Li <ligs@dtdream.com>
> Date: Tue, 18 Apr 2017 19:15:54 +0800
> Subject: [PATCH] system-ovn.at: Add test for ping other router's port on
>  distributed router
>
> Signed-off-by: Guoshuai Li <ligs@dtdream.com>
> ---
>  tests/system-ovn.at     | 101 
> ++++++++++++++++++++++++++++++++++++++++++++++++
>  tests/system-traffic.at |  20 ++++++++++
>  2 files changed, 121 insertions(+)
>
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index dd62bd1..fc6a923 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -1396,3 +1396,104 @@ as
>  OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
>  /connection dropped.*/d"])
>  AT_CLEANUP
> +
> +AT_SETUP([ovn -- ping other router port on distributed router])
> +AT_KEYWORDS([ovnnat])
> +
> +CHECK_CONNTRACK()
> +CHECK_CONNTRACK_NAT()
> +ovn_start
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_BR([br-int])
> +
> +# Set external-ids in br-int needed for ovn-controller
> +ovs-vsctl \
> +        -- set Open_vSwitch . external-ids:system-id=hv1 \
> +        -- set Open_vSwitch . 
> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
> +        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
> +        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
> +        -- set bridge br-int fail-mode=secure 
> other-config:disable-in-band=true
> +
> +# Start ovn-controller
> +start_daemon ovn-controller
> +
> +# Logical network:
> +# Two LRs - R1 and R2 that are connected to LS "join" (20.0.0.0/24).
> +# R1 has switchess foo (192.168.1.0/24).
> +# The port between R1/R2 and "join" is the router gateway port where
> +# the NAT rules are applied.
> +#
> +#    foo -- R1 -- join -- R2
> +#
> +
> +ovn-nbctl lr-add R1
> +ovn-nbctl lr-add R2
> +
> +ovn-nbctl ls-add foo
> +ovn-nbctl ls-add join
> +
> +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:01 192.168.1.1/24
> +ovn-nbctl lrp-add R1 join1 00:00:02:01:02:01 20.0.0.1/24 \
> +    -- set Logical_Router_Port join1 options:redirect-chassis=hv1
> +ovn-nbctl lrp-add R2 join2 00:00:02:01:02:02 20.0.0.2/24 \
> +    -- set Logical_Router_Port join2 options:redirect-chassis=hv1
> +
> +# Connect foo to R1
> +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
> +    type=router options:router-port=foo \
> +    -- lsp-set-addresses rp-foo router
> +
> +# Connect join to R1
> +ovn-nbctl lsp-add join rp-join1 -- set Logical_Switch_Port rp-join1 \
> +    type=router options:router-port=join1 \
> +    -- lsp-set-addresses rp-join1 router
> +
> +# Connect join to R2
> +ovn-nbctl lsp-add join rp-join2 -- set Logical_Switch_Port rp-join2 \
> +    type=router options:router-port=join2 \
> +    -- lsp-set-addresses rp-join2 router
> +
> +# Logical port 'foo1' in switch 'foo'.
> +ADD_NAMESPACES(foo1)
> +ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:01", \
> +         "192.168.1.1")
> +ovn-nbctl lsp-add foo foo1 \
> +-- lsp-set-addresses foo1 "f0:00:00:01:02:01 192.168.1.2"
> +
> +# Add SNAT rule
> +ovn-nbctl lr-nat-add R1 snat 20.0.0.1 192.168.1.0/24
> +
> +ovn-nbctl --wait=hv sync
> +
> +echo "------ hv dump ------"
> +ovs-ofctl show br-int
> +ovs-ofctl dump-flows br-int
> +echo "---------------------"
> +
> +# East-West No NAT: 'foo1' pings 'R2' using 20.0.0.2
> +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.2 | 
> FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# We verify that SNAT indeed happened via 'dump-conntrack' command.
> +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(192.168.1.2) | \
> +sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
> +icmp,orig=(src=192.168.1.2,dst=20.0.0.2,id=<cleared>,type=8,code=0),reply=(src=20.0.0.2,dst=20.0.0.1,id=<cleared>,type=0,code=0),zone=<cleared>
> +])
> +
> +OVS_APP_EXIT_AND_WAIT([ovn-controller])
> +
> +as ovn-sb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as ovn-nb
> +OVS_APP_EXIT_AND_WAIT([ovsdb-server])
> +
> +as northd
> +OVS_APP_EXIT_AND_WAIT([ovn-northd])
> +
> +as
> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
> +/connection dropped.*/d"])
> +AT_CLEANUP
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index c042773..295e606 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -3678,3 +3678,23 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 1 -w 3 
> 10.4.2.2], [1], [ignore])
>
>  OVS_TRAFFIC_VSWITCHD_STOP(["/dropping VLAN \(0\|300\) packet received 
> on dot1q-tunnel port/d"])
>  AT_CLEANUP
> +
> +AT_SETUP([datapath - SNAT and UNSNAT])
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +AT_CHECK([ovs-ofctl add-flow br0 "table=0, 
> priority=100,in_port=1,ip,nw_dst=20.0.0.2 
> actions=dec_ttl(),mod_dl_src:00:00:02:01:02:01,mod_dl_dst:00:00:02:01:02:02,resubmit(,1)"])
> +AT_CHECK([ovs-ofctl add-flow br0 "table=1, 
> priority=100,ip,nw_src=192.168.1.2 
> actions=ct(commit,table=2,zone=6,nat(src=20.0.0.1))"])
> +AT_CHECK([ovs-ofctl add-flow br0 "table=2, 
> priority=100,icmp,nw_dst=20.0.0.2,icmp_type=8,icmp_code=0 
> actions=push:NXM_OF_IP_SRC[],push:NXM_OF_IP_DST[],pop:NXM_OF_IP_SRC[],pop:NXM_OF_IP_DST[],load:0xff->NXM_NX_IP_TTL[],load:0->NXM_OF_ICMP_TYPE[],dec_ttl(),mod_dl_src:00:00:02:01:02:02,mod_dl_dst:00:00:02:01:02:01,resubmit(,3)"])
> +AT_CHECK([ovs-ofctl add-flow br0 "table=3, 
> priority=100,ip,nw_dst=20.0.0.1 actions=ct(table=4,zone=6,nat)"])
> +AT_CHECK([ovs-ofctl add-flow br0 "table=4, 
> priority=100,ip,nw_dst=192.168.1.2 
> actions=dec_ttl(),mod_dl_src:00:00:01:01:02:01,mod_dl_dst:f0:00:00:01:02:01,load:0->NXM_OF_IN_PORT[],output:1"])
> +
> +ADD_NAMESPACES(foo1)
> +ADD_VETH(foo1, foo1, br0, "192.168.1.2/24", "f0:00:00:01:02:01", 
> "192.168.1.1")
> +NS_CHECK_EXEC([foo1], [arp -s 192.168.1.1 00:00:01:01:02:01])
> +
> +NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.2 | 
> FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> -- 
> 2.10.1.windows.1
>
> I wrote an OVN test case and a no OVN test case.
>
> In the system-traffic.at test case, I tried to duplicate the UNSNAT in 
> two tables, ICMP traffic can be OK~~!
>
>   AT_CHECK([ovs-ofctl add-flow br0 "table=3, 
> priority=100,ip,nw_dst=20.0.0.1 actions=ct(table=4,zone=6,nat)"])
>   AT_CHECK([ovs-ofctl add-flow br0 "table=4, 
> priority=100,ip,nw_dst=20.0.0.1 actions=ct(table=5,zone=6,nat)"])
>   AT_CHECK([ovs-ofctl add-flow br0 "table=5, 
> priority=100,ip,nw_dst=192.168.1.2 
> actions=dec_ttl(),mod_dl_src:00:00:01:01:02:01,mod_dl_dst:f0:00:00:01:02:01,load:0->NXM_OF_IN_PORT[],output:1"])
>
> Learning to write test cases took a long time, so there is no further 
> analysis.  Thanks for further analysis.
>
diff mbox

Patch

diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index dd62bd1..fc6a923 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -1396,3 +1396,104 @@  as
  OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
  /connection dropped.*/d"])
  AT_CLEANUP
+
+AT_SETUP([ovn -- ping other router port on distributed router])
+AT_KEYWORDS([ovnnat])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure 
other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# Two LRs - R1 and R2 that are connected to LS "join" (20.0.0.0/24).
+# R1 has switchess foo (192.168.1.0/24).
+# The port between R1/R2 and "join" is the router gateway port where
+# the NAT rules are applied.
+#
+#    foo -- R1 -- join -- R2
+#
+
+ovn-nbctl lr-add R1
+ovn-nbctl lr-add R2
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add join
+
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:01 192.168.1.1/24
+ovn-nbctl lrp-add R1 join1 00:00:02:01:02:01 20.0.0.1/24 \
+    -- set Logical_Router_Port join1 options:redirect-chassis=hv1
+ovn-nbctl lrp-add R2 join2 00:00:02:01:02:02 20.0.0.2/24 \
+    -- set Logical_Router_Port join2 options:redirect-chassis=hv1
+
+# Connect foo to R1
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+    type=router options:router-port=foo \
+    -- lsp-set-addresses rp-foo router
+
+# Connect join to R1
+ovn-nbctl lsp-add join rp-join1 -- set Logical_Switch_Port rp-join1 \
+    type=router options:router-port=join1 \
+    -- lsp-set-addresses rp-join1 router
+
+# Connect join to R2
+ovn-nbctl lsp-add join rp-join2 -- set Logical_Switch_Port rp-join2 \
+    type=router options:router-port=join2 \
+    -- lsp-set-addresses rp-join2 router
+
+# Logical port 'foo1' in switch 'foo'.
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "192.168.1.2/24", "f0:00:00:01:02:01", \
+         "192.168.1.1")
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:01 192.168.1.2"
+
+# Add SNAT rule
+ovn-nbctl lr-nat-add R1 snat 20.0.0.1 192.168.1.0/24
+
+ovn-nbctl --wait=hv sync
+
+echo "------ hv dump ------"
+ovs-ofctl show br-int
+ovs-ofctl dump-flows br-int
+echo "---------------------"
+
+# East-West No NAT: 'foo1' pings 'R2' using 20.0.0.2
+NS_CHECK_EXEC([foo1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# We verify that SNAT indeed happened via 'dump-conntrack' command.
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(192.168.1.2) | \
+sed -e 's/zone=[[0-9]]*/zone=<cleared>/'], [0], [dnl
+icmp,orig=(src=192.168.1.2,dst=20.0.0.2,id=<cleared>,type=8,code=0),reply=(src=20.0.0.2,dst=20.0.0.1,id=<cleared>,type=0,code=0),zone=<cleared>
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index c042773..295e606 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -3678,3 +3678,23 @@  NS_CHECK_EXEC([at_ns0], [ping -q -c 1 -w 3 
10.4.2.2], [1], [ignore])

  OVS_TRAFFIC_VSWITCHD_STOP(["/dropping VLAN \(0\|300\) packet received 
on dot1q-tunnel port/d"])
  AT_CLEANUP
+
+AT_SETUP([datapath - SNAT and UNSNAT])
+OVS_TRAFFIC_VSWITCHD_START()
+
+AT_CHECK([ovs-ofctl add-flow br0 "table=0, 
priority=100,in_port=1,ip,nw_dst=20.0.0.2 
actions=dec_ttl(),mod_dl_src:00:00:02:01:02:01,mod_dl_dst:00:00:02:01:02:02,resubmit(,1)"])
+AT_CHECK([ovs-ofctl add-flow br0 "table=1, 
priority=100,ip,nw_src=192.168.1.2 
actions=ct(commit,table=2,zone=6,nat(src=20.0.0.1))"])
+AT_CHECK([ovs-ofctl add-flow br0 "table=2, 
priority=100,icmp,nw_dst=20.0.0.2,icmp_type=8,icmp_code=0 
actions=push:NXM_OF_IP_SRC[],push:NXM_OF_IP_DST[],pop:NXM_OF_IP_SRC[],pop:NXM_OF_IP_DST[],load:0xff->NXM_NX_IP_TTL[],load:0->NXM_OF_ICMP_TYPE[],dec_ttl(),mod_dl_src:00:00:02:01:02:02,mod_dl_dst:00:00:02:01:02:01,resubmit(,3)"])
+AT_CHECK([ovs-ofctl add-flow br0 "table=3, 
priority=100,ip,nw_dst=20.0.0.1 actions=ct(table=4,zone=6,nat)"])
+AT_CHECK([ovs-ofctl add-flow br0 "table=4, 
priority=100,ip,nw_dst=192.168.1.2 
actions=dec_ttl(),mod_dl_src:00:00:01:01:02:01,mod_dl_dst:f0:00:00:01:02:01,load:0->NXM_OF_IN_PORT[],output:1"])