diff mbox series

[ovs-dev,v3] northd: Fix pmtud for non routed traffic.

Message ID 6390c21acbc18c8d01695f973e0433b94f2dd4ef.1713357427.git.lorenzo.bianconi@redhat.com
State Changes Requested
Delegated to: Dumitru Ceara
Headers show
Series [ovs-dev,v3] northd: Fix pmtud for non routed traffic. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Lorenzo Bianconi April 17, 2024, 12:41 p.m. UTC
Similar to what is already implemented for routed e/w traffic,
introduce pmtud support for e/w traffic between two logical switch ports
connected to the same logical switch, but running on two different
hypervisors.

Acked-by: Mark Michelson <mmichels@redhat.com>
Reported-at: https://issues.redhat.com/browse/FDP-362
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
Changes since v2:
- minor changes
Changes since v1:
- move logic in consider_port_binding
- add more self-test
- fix typos
---
 controller/lflow.h        |   1 +
 controller/physical.c     |  30 +++++++-
 northd/northd.c           |  33 +++++++--
 northd/ovn-northd.8.xml   |  16 +++-
 tests/multinode-macros.at |   4 +
 tests/multinode.at        | 151 ++++++++++++++++++++++++++++++++++++++
 tests/ovn-controller.at   |  63 ++++++++++++++++
 tests/ovn-macros.at       |   1 +
 tests/ovn-northd.at       |  24 ++++--
 tests/ovn.at              |   5 +-
 10 files changed, 307 insertions(+), 21 deletions(-)

Comments

Dumitru Ceara April 22, 2024, 8:43 p.m. UTC | #1
On 4/17/24 14:41, Lorenzo Bianconi wrote:
> Similar to what is already implemented for routed e/w traffic,
> introduce pmtud support for e/w traffic between two logical switch ports
> connected to the same logical switch, but running on two different
> hypervisors.
> 
> Acked-by: Mark Michelson <mmichels@redhat.com>
> Reported-at: https://issues.redhat.com/browse/FDP-362
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
> Changes since v2:
> - minor changes

Hi Lorenzo,

Thanks for the new version.  However, this fails multinode CI:

https://github.com/dceara/ovn/actions/runs/8786825542/job/24112259604

It hangs when cleaning up the second test, please see below.

> Changes since v1:
> - move logic in consider_port_binding
> - add more self-test
> - fix typos
> ---

[...]

>  
> diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
> index c04506a52..7a3b5cb50 100644
> --- a/tests/multinode-macros.at
> +++ b/tests/multinode-macros.at
> @@ -7,6 +7,10 @@
>  m4_define([M_NS_EXEC],
>      [podman exec $1 ip netns exec $2 $3])
>  
> +# M_NS_DAEMONIZE([fake_node],[namespace], [command], [pidfile])
> +m4_define([M_NS_DAEMONIZE],
> +    [podman exec $1 ip netns exec $2 $3 & echo $! > $4])
> +

This actually stores the PID (in the host pid namespace) of "podman
exec" into $4.  Later we wrongfully call "as <container> kill <pid>".

One way to make this work seems to be to automatically cleanup (similar
to NETNS_DAEMONIZE()):

# M_NS_DAEMONIZE([fake_node],[namespace], [command], [pidfile])
m4_define([M_NS_DAEMONIZE],
    [podman exec $1 ip netns exec $2 $3 & echo $! > $4
     echo "kill \`cat $4\`" >> cleanup
    ]
)

The "cleanup" script gets executed at the end of the test, outside any
container so it will kill all "podman exec" commands.  If we go this
way, we don't need the explicit "kill" calls below (podman exec is
killed so the command it run, nc, is also killed).

What do you think, Lorenzo?  Mark, Numan, do you have any other ideas?


>  # M_NS_CHECK_EXEC([fake_node], [namespace], [command], other_params...)
>  #
>  # Wrapper for AT_CHECK that executes 'command' inside 'fake_node''s namespace'.
> diff --git a/tests/multinode.at b/tests/multinode.at
> index 0187382be..d9085b64d 100644
> --- a/tests/multinode.at
> +++ b/tests/multinode.at
> @@ -154,6 +154,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>  
> +# create LB
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
> +check multinode_nbctl ls-lb-add sw0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
>  m_as ovn-gw-1 ip netns add ovn-ext0
>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> @@ -207,6 +212,14 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>  3 packets transmitted, 3 received, 0% packet loss, time 0ms
>  ])
>  
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
> +
> +m_as ovn-chassis-2 kill $(cat nc.pid)

This is wrong, nc.pid doesn't make sense inside ovn-chassis-2

> +
>  AT_CLEANUP
>  
>  AT_SETUP([ovn multinode pmtu - distributed router - vxlan])
> @@ -696,6 +709,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>  
> +# create LB
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
> +check multinode_nbctl lr-lb-add lr0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
>  m_as ovn-gw-1 ip netns add ovn-ext0
>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> @@ -751,6 +769,18 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>  M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
>  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
>  
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
> +
> +m_as ovn-chassis-2 kill $(cat nc.pid)

Same reasoning about wrong nc.pid inside the container.

> +
>  AT_CLEANUP
>  
>  AT_SETUP([ovn multinode pmtu - gw router - vxlan])
> @@ -834,6 +864,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>  
> +# create LB
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
> +check multinode_nbctl lr-lb-add lr0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
>  m_as ovn-gw-1 ip netns add ovn-ext0
>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> @@ -882,4 +917,120 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>  
>  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
>  
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 950'])
> +
> +m_as ovn-chassis-2 kill $(cat nc.pid)

Here too.

> +
> +AT_CLEANUP
> +
> +AT_SETUP([ovn multinode pmtu - logical switch - geneve])
> +
> +# Check that ovn-fake-multinode setup is up and running
> +check_fake_multinode_setup
> +
> +# Delete the multinode NB and OVS resources before starting the test.
> +cleanup_multinode_resources
> +
> +m_as ovn-chassis-1 ip link del sw0p1-p
> +m_as ovn-chassis-2 ip link del sw0p2-p
> +
> +# Reset geneve tunnels
> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> +do
> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> +done
> +
> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> +
> +# Test East-West switching
> +check multinode_nbctl ls-add sw0
> +check multinode_nbctl lsp-add sw0 sw0-port1
> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> +check multinode_nbctl lsp-add sw0 sw0-port2
> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> +
> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> +
> +# Create the second logical switch with one port
> +check multinode_nbctl ls-add sw1
> +check multinode_nbctl lsp-add sw1 sw1-port1
> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> +
> +# Create a logical router and attach both logical switches
> +check multinode_nbctl lr-add lr0
> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> +check multinode_nbctl lsp-add sw0 sw0-lr0
> +check multinode_nbctl lsp-set-type sw0-lr0 router
> +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> +
> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> +check multinode_nbctl lsp-add sw1 sw1-lr0
> +check multinode_nbctl lsp-set-type sw1-lr0 router
> +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> +
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> +
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> +
> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
> +
> +# create some ACLs
> +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> +
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
> +check multinode_nbctl ls-lb-add sw0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
> +m_wait_for_ports_up
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Change ptmu for the geneve tunnel
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 10.0.0.4 2>&1 |grep -q "message too long, mtu=1142"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Change ptmu for the geneve tunnel
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1042"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
> +
> +m_as ovn-chassis-2 kill $(cat nc.pid)

This too.

> +
>  AT_CLEANUP
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index f2c792c9c..0d694b1d9 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -2880,3 +2880,66 @@ AT_CHECK([test x"$port_uuid"=$(ovs-vsctl get port $fakech_tunnel _uuid)])
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - pmtud flows])
> +AT_KEYWORDS([pmtud])
> +
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +check ovn-nbctl ls-add ls1 \
> +    -- lsp-add ls1 lsp1 \
> +    -- lsp-set-addresses lsp1 "00:00:00:00:00:01 192.168.1.1" \
> +    -- lsp-add ls1 lsp2 \
> +    -- lsp-set-addresses lsp2 "00:00:00:00:00:02 192.168.1.2"
> +
> +as hv1
> +check ovs-vsctl \
> +    -- add-port br-int vif1 \
> +    -- set Interface vif1 external_ids:iface-id=lsp1 \
> +    -- add-port br-int vif2 \
> +    -- set Interface vif2 external_ids:iface-id=lsp2
> +
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
> +          grep -v NXST_FLOW |sort], [0], [dnl
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> +])
> +
> +check ovn-nbctl lsp-add ls1 lsp3 \
> +    -- lsp-set-addresses lsp3 "00:00:00:00:00:03 192.168.1.3"
> +check ovs-vsctl \
> +    -- add-port br-int vif3 \
> +    -- set Interface vif3 external_ids:iface-id=lsp3
> +
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
> +          grep -v NXST_FLOW |sort], [0], [dnl
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x3,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> +])
> +
> +check ovn-nbctl lsp-del lsp3
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' |
> +          grep -v NXST_FLOW |sort], [0], [dnl
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> +])
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])

Regards,
Dumitru
Dumitru Ceara April 22, 2024, 8:50 p.m. UTC | #2
On 4/22/24 22:43, Dumitru Ceara wrote:
> On 4/17/24 14:41, Lorenzo Bianconi wrote:
>> Similar to what is already implemented for routed e/w traffic,
>> introduce pmtud support for e/w traffic between two logical switch ports
>> connected to the same logical switch, but running on two different
>> hypervisors.
>>
>> Acked-by: Mark Michelson <mmichels@redhat.com>
>> Reported-at: https://issues.redhat.com/browse/FDP-362
>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
>> ---
>> Changes since v2:
>> - minor changes
> 
> Hi Lorenzo,
> 
> Thanks for the new version.  However, this fails multinode CI:
> 
> https://github.com/dceara/ovn/actions/runs/8786825542/job/24112259604
> 
> It hangs when cleaning up the second test, please see below.
> 
>> Changes since v1:
>> - move logic in consider_port_binding
>> - add more self-test
>> - fix typos
>> ---
> 
> [...]
> 
>>  
>> diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
>> index c04506a52..7a3b5cb50 100644
>> --- a/tests/multinode-macros.at
>> +++ b/tests/multinode-macros.at
>> @@ -7,6 +7,10 @@
>>  m4_define([M_NS_EXEC],
>>      [podman exec $1 ip netns exec $2 $3])
>>  
>> +# M_NS_DAEMONIZE([fake_node],[namespace], [command], [pidfile])
>> +m4_define([M_NS_DAEMONIZE],
>> +    [podman exec $1 ip netns exec $2 $3 & echo $! > $4])
>> +
> 
> This actually stores the PID (in the host pid namespace) of "podman
> exec" into $4.  Later we wrongfully call "as <container> kill <pid>".
> 
> One way to make this work seems to be to automatically cleanup (similar
> to NETNS_DAEMONIZE()):
> 
> # M_NS_DAEMONIZE([fake_node],[namespace], [command], [pidfile])
> m4_define([M_NS_DAEMONIZE],
>     [podman exec $1 ip netns exec $2 $3 & echo $! > $4
>      echo "kill \`cat $4\`" >> cleanup
>     ]
> )
> 
> The "cleanup" script gets executed at the end of the test, outside any
> container so it will kill all "podman exec" commands.  If we go this
> way, we don't need the explicit "kill" calls below (podman exec is
> killed so the command it run, nc, is also killed).
> 
> What do you think, Lorenzo?  Mark, Numan, do you have any other ideas?
> 
> 
>>  # M_NS_CHECK_EXEC([fake_node], [namespace], [command], other_params...)
>>  #
>>  # Wrapper for AT_CHECK that executes 'command' inside 'fake_node''s namespace'.
>> diff --git a/tests/multinode.at b/tests/multinode.at
>> index 0187382be..d9085b64d 100644
>> --- a/tests/multinode.at
>> +++ b/tests/multinode.at
>> @@ -154,6 +154,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>>  
>> +# create LB
>> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
>> +check multinode_nbctl ls-lb-add sw0 lb0
>> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
>> +
>>  m_as ovn-gw-1 ip netns add ovn-ext0
>>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
>> @@ -207,6 +212,14 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>>  3 packets transmitted, 3 received, 0% packet loss, time 0ms
>>  ])
>>  
>> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
>> +for i in $(seq 30); do
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
>> +done
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
>> +
>> +m_as ovn-chassis-2 kill $(cat nc.pid)
> 
> This is wrong, nc.pid doesn't make sense inside ovn-chassis-2
> 
>> +
>>  AT_CLEANUP
>>  
>>  AT_SETUP([ovn multinode pmtu - distributed router - vxlan])
>> @@ -696,6 +709,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>>  
>> +# create LB
>> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
>> +check multinode_nbctl lr-lb-add lr0 lb0
>> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
>> +
>>  m_as ovn-gw-1 ip netns add ovn-ext0
>>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
>> @@ -751,6 +769,18 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>>  M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
>>  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])

Missing space before "grep".

>>  
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
>> +
>> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
>> +for i in $(seq 30); do
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
>> +done
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
>> +
>> +m_as ovn-chassis-2 kill $(cat nc.pid)
> 
> Same reasoning about wrong nc.pid inside the container.
> 
>> +
>>  AT_CLEANUP
>>  
>>  AT_SETUP([ovn multinode pmtu - gw router - vxlan])
>> @@ -834,6 +864,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>>  
>> +# create LB
>> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
>> +check multinode_nbctl lr-lb-add lr0 lb0
>> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
>> +
>>  m_as ovn-gw-1 ip netns add ovn-ext0
>>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
>> @@ -882,4 +917,120 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>>  
>>  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])

Same here.

>>  
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
>> +
>> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
>> +for i in $(seq 30); do
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
>> +done
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 950'])
>> +
>> +m_as ovn-chassis-2 kill $(cat nc.pid)
> 
> Here too.
> 
>> +
>> +AT_CLEANUP
>> +
>> +AT_SETUP([ovn multinode pmtu - logical switch - geneve])
>> +
>> +# Check that ovn-fake-multinode setup is up and running
>> +check_fake_multinode_setup
>> +
>> +# Delete the multinode NB and OVS resources before starting the test.
>> +cleanup_multinode_resources
>> +
>> +m_as ovn-chassis-1 ip link del sw0p1-p
>> +m_as ovn-chassis-2 ip link del sw0p2-p
>> +
>> +# Reset geneve tunnels
>> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
>> +do
>> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
>> +done
>> +
>> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
>> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
>> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
>> +
>> +# Test East-West switching
>> +check multinode_nbctl ls-add sw0
>> +check multinode_nbctl lsp-add sw0 sw0-port1
>> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
>> +check multinode_nbctl lsp-add sw0 sw0-port2
>> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
>> +
>> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
>> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
>> +
>> +# Create the second logical switch with one port
>> +check multinode_nbctl ls-add sw1
>> +check multinode_nbctl lsp-add sw1 sw1-port1
>> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
>> +
>> +# Create a logical router and attach both logical switches
>> +check multinode_nbctl lr-add lr0
>> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
>> +check multinode_nbctl lsp-add sw0 sw0-lr0
>> +check multinode_nbctl lsp-set-type sw0-lr0 router
>> +check multinode_nbctl lsp-set-addresses sw0-lr0 router
>> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
>> +
>> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
>> +check multinode_nbctl lsp-add sw1 sw1-lr0
>> +check multinode_nbctl lsp-set-type sw1-lr0 router
>> +check multinode_nbctl lsp-set-addresses sw1-lr0 router
>> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
>> +
>> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
>> +
>> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
>> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>> +
>> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
>> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
>> +
>> +# create some ACLs
>> +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>> +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>> +
>> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
>> +check multinode_nbctl ls-lb-add sw0 lb0
>> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
>> +
>> +m_wait_for_ports_up
>> +
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +
>> +# Change ptmu for the geneve tunnel
>> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 10.0.0.4 2>&1 |grep -q "message too long, mtu=1142"])

Same here.

>> +
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
>> +
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
>> +[0], [dnl
>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
>> +])
>> +
>> +# Change ptmu for the geneve tunnel
>> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1042"])

Same here

>> +
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
>> +
>> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
>> +for i in $(seq 30); do
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
>> +done
>> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
>> +
>> +m_as ovn-chassis-2 kill $(cat nc.pid)
> 
> This too.
> 
>> +
>>  AT_CLEANUP
>> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
>> index f2c792c9c..0d694b1d9 100644
>> --- a/tests/ovn-controller.at
>> +++ b/tests/ovn-controller.at
>> @@ -2880,3 +2880,66 @@ AT_CHECK([test x"$port_uuid"=$(ovs-vsctl get port $fakech_tunnel _uuid)])
>>  OVN_CLEANUP([hv1])
>>  AT_CLEANUP
>>  ])
>> +
>> +OVN_FOR_EACH_NORTHD([
>> +AT_SETUP([ovn-controller - pmtud flows])
>> +AT_KEYWORDS([pmtud])
>> +
>> +ovn_start
>> +
>> +net_add n1
>> +sim_add hv1
>> +ovs-vsctl add-br br-phys
>> +ovn_attach n1 br-phys 192.168.0.1
>> +
>> +check ovn-nbctl ls-add ls1 \
>> +    -- lsp-add ls1 lsp1 \
>> +    -- lsp-set-addresses lsp1 "00:00:00:00:00:01 192.168.1.1" \
>> +    -- lsp-add ls1 lsp2 \
>> +    -- lsp-set-addresses lsp2 "00:00:00:00:00:02 192.168.1.2"
>> +
>> +as hv1
>> +check ovs-vsctl \
>> +    -- add-port br-int vif1 \
>> +    -- set Interface vif1 external_ids:iface-id=lsp1 \
>> +    -- add-port br-int vif2 \
>> +    -- set Interface vif2 external_ids:iface-id=lsp2
>> +
>> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
>> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
>> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
>> +          grep -v NXST_FLOW |sort], [0], [dnl
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> +])
>> +
>> +check ovn-nbctl lsp-add ls1 lsp3 \
>> +    -- lsp-set-addresses lsp3 "00:00:00:00:00:03 192.168.1.3"
>> +check ovs-vsctl \
>> +    -- add-port br-int vif3 \
>> +    -- set Interface vif3 external_ids:iface-id=lsp3
>> +
>> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
>> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
>> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
>> +          grep -v NXST_FLOW |sort], [0], [dnl
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x3,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> +])
>> +
>> +check ovn-nbctl lsp-del lsp3
>> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
>> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
>> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' |
>> +          grep -v NXST_FLOW |sort], [0], [dnl
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
>> +])
>> +
>> +OVN_CLEANUP([hv1])
>> +AT_CLEANUP
>> +])
> 
> Regards,
> Dumitru
>
Numan Siddique April 22, 2024, 9:47 p.m. UTC | #3
On Mon, Apr 22, 2024 at 4:51 PM Dumitru Ceara <dceara@redhat.com> wrote:

> On 4/22/24 22:43, Dumitru Ceara wrote:
> > On 4/17/24 14:41, Lorenzo Bianconi wrote:
> >> Similar to what is already implemented for routed e/w traffic,
> >> introduce pmtud support for e/w traffic between two logical switch ports
> >> connected to the same logical switch, but running on two different
> >> hypervisors.
> >>
> >> Acked-by: Mark Michelson <mmichels@redhat.com>
> >> Reported-at: https://issues.redhat.com/browse/FDP-362
> >> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> >> ---
> >> Changes since v2:
> >> - minor changes
> >
> > Hi Lorenzo,
> >
> > Thanks for the new version.  However, this fails multinode CI:
> >
> > https://github.com/dceara/ovn/actions/runs/8786825542/job/24112259604
> >
> > It hangs when cleaning up the second test, please see below.
> >
> >> Changes since v1:
> >> - move logic in consider_port_binding
> >> - add more self-test
> >> - fix typos
> >> ---
> >
> > [...]
> >
> >>
> >> diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
> >> index c04506a52..7a3b5cb50 100644
> >> --- a/tests/multinode-macros.at
> >> +++ b/tests/multinode-macros.at
> >> @@ -7,6 +7,10 @@
> >>  m4_define([M_NS_EXEC],
> >>      [podman exec $1 ip netns exec $2 $3])
> >>
> >> +# M_NS_DAEMONIZE([fake_node],[namespace], [command], [pidfile])
> >> +m4_define([M_NS_DAEMONIZE],
> >> +    [podman exec $1 ip netns exec $2 $3 & echo $! > $4])
> >> +
> >
> > This actually stores the PID (in the host pid namespace) of "podman
> > exec" into $4.  Later we wrongfully call "as <container> kill <pid>".
> >
> > One way to make this work seems to be to automatically cleanup (similar
> > to NETNS_DAEMONIZE()):
> >
> > # M_NS_DAEMONIZE([fake_node],[namespace], [command], [pidfile])
> > m4_define([M_NS_DAEMONIZE],
> >     [podman exec $1 ip netns exec $2 $3 & echo $! > $4
> >      echo "kill \`cat $4\`" >> cleanup
> >     ]
> > )
> >
> > The "cleanup" script gets executed at the end of the test, outside any
> > container so it will kill all "podman exec" commands.  If we go this
> > way, we don't need the explicit "kill" calls below (podman exec is
> > killed so the command it run, nc, is also killed).
> >
> > What do you think, Lorenzo?  Mark, Numan, do you have any other ideas?
>

+1.  Sounds good to me.

Numan

>
> >
> >>  # M_NS_CHECK_EXEC([fake_node], [namespace], [command], other_params...)
> >>  #
> >>  # Wrapper for AT_CHECK that executes 'command' inside 'fake_node''s
> namespace'.
> >> diff --git a/tests/multinode.at b/tests/multinode.at
> >> index 0187382be..d9085b64d 100644
> >> --- a/tests/multinode.at
> >> +++ b/tests/multinode.at
> >> @@ -154,6 +154,11 @@ check multinode_nbctl lr-nat-add lr0 snat
> 172.20.0.100 20.0.0.0/24
> >>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'
> allow-related
> >>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'
> allow-related
> >>
> >> +# create LB
> >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
> >> +check multinode_nbctl ls-lb-add sw0 lb0
> >> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null
> 2>&1], [nc.pid])
> >> +
> >>  m_as ovn-gw-1 ip netns add ovn-ext0
> >>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0
> type=internal
> >>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> >> @@ -207,6 +212,14 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q
> -c 3 -i 0.3 -w 2 172.20.1.2 |
> >>  3 packets transmitted, 3 received, 0% packet loss, time 0ms
> >>  ])
> >>
> >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> >> +for i in $(seq 30); do
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2
> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> >> +done
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev
> sw0p1 | grep -q 'mtu 942'])
> >> +
> >> +m_as ovn-chassis-2 kill $(cat nc.pid)
> >
> > This is wrong, nc.pid doesn't make sense inside ovn-chassis-2
> >
> >> +
> >>  AT_CLEANUP
> >>
> >>  AT_SETUP([ovn multinode pmtu - distributed router - vxlan])
> >> @@ -696,6 +709,11 @@ check multinode_nbctl lr-nat-add lr0 snat
> 172.20.0.100 20.0.0.0/24
> >>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'
> allow-related
> >>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'
> allow-related
> >>
> >> +# create LB
> >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
> >> +check multinode_nbctl lr-lb-add lr0 lb0
> >> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null
> 2>&1], [nc.pid])
> >> +
> >>  m_as ovn-gw-1 ip netns add ovn-ext0
> >>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0
> type=internal
> >>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> >> @@ -751,6 +769,18 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q
> -c 3 -i 0.3 -w 2 172.20.1.2 |
> >>  M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu
> 1100])
> >>  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300
> -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
>
> Missing space before "grep".
>
> >>
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24
> dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via
> 10.0.0.1 dev sw0p1])
> >> +
> >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> >> +for i in $(seq 30); do
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2
> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> >> +done
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev
> sw0p1 | grep -q 'mtu 942'])
> >> +
> >> +m_as ovn-chassis-2 kill $(cat nc.pid)
> >
> > Same reasoning about wrong nc.pid inside the container.
> >
> >> +
> >>  AT_CLEANUP
> >>
> >>  AT_SETUP([ovn multinode pmtu - gw router - vxlan])
> >> @@ -834,6 +864,11 @@ check multinode_nbctl lr-nat-add lr0 snat
> 172.20.0.100 20.0.0.0/24
> >>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'
> allow-related
> >>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'
> allow-related
> >>
> >> +# create LB
> >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
> >> +check multinode_nbctl lr-lb-add lr0 lb0
> >> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null
> 2>&1], [nc.pid])
> >> +
> >>  m_as ovn-gw-1 ip netns add ovn-ext0
> >>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0
> type=internal
> >>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> >> @@ -882,4 +917,120 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping
> -q -c 3 -i 0.3 -w 2 172.20.1.2 |
> >>
> >>  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300
> -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
>
> Same here.
>
> >>
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24
> dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via
> 10.0.0.1 dev sw0p1])
> >> +
> >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> >> +for i in $(seq 30); do
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2
> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> >> +done
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev
> sw0p1 | grep -q 'mtu 950'])
> >> +
> >> +m_as ovn-chassis-2 kill $(cat nc.pid)
> >
> > Here too.
> >
> >> +
> >> +AT_CLEANUP
> >> +
> >> +AT_SETUP([ovn multinode pmtu - logical switch - geneve])
> >> +
> >> +# Check that ovn-fake-multinode setup is up and running
> >> +check_fake_multinode_setup
> >> +
> >> +# Delete the multinode NB and OVS resources before starting the test.
> >> +cleanup_multinode_resources
> >> +
> >> +m_as ovn-chassis-1 ip link del sw0p1-p
> >> +m_as ovn-chassis-2 ip link del sw0p2-p
> >> +
> >> +# Reset geneve tunnels
> >> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> >> +do
> >> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> >> +done
> >> +
> >> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> >> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> >> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> >> +
> >> +# Test East-West switching
> >> +check multinode_nbctl ls-add sw0
> >> +check multinode_nbctl lsp-add sw0 sw0-port1
> >> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03
> 10.0.0.3 1000::3"
> >> +check multinode_nbctl lsp-add sw0 sw0-port2
> >> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04
> 10.0.0.4 1000::4"
> >> +
> >> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1
> 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> >> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2
> 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> >> +
> >> +# Create the second logical switch with one port
> >> +check multinode_nbctl ls-add sw1
> >> +check multinode_nbctl lsp-add sw1 sw1-port1
> >> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03
> 20.0.0.3 2000::3"
> >> +
> >> +# Create a logical router and attach both logical switches
> >> +check multinode_nbctl lr-add lr0
> >> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01
> 10.0.0.1/24 1000::a/64
> >> +check multinode_nbctl lsp-add sw0 sw0-lr0
> >> +check multinode_nbctl lsp-set-type sw0-lr0 router
> >> +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> >> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> >> +
> >> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02
> 20.0.0.1/24 2000::a/64
> >> +check multinode_nbctl lsp-add sw1 sw1-lr0
> >> +check multinode_nbctl lsp-set-type sw1-lr0 router
> >> +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> >> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> >> +
> >> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1
> 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> >> +
> >> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> >> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> >> +
> >> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
> >> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
> >> +
> >> +# create some ACLs
> >> +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'
> allow-related
> >> +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'
> allow-related
> >> +
> >> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
> >> +check multinode_nbctl ls-lb-add sw0 lb0
> >> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null
> 2>&1], [nc.pid])
> >> +
> >> +m_wait_for_ports_up
> >> +
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2
> 10.0.0.4 | FORMAT_PING], \
> >> +[0], [dnl
> >> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> >> +])
> >> +
> >> +# Change ptmu for the geneve tunnel
> >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do
> 10.0.0.4 2>&1 |grep -q "message too long, mtu=1142"])
>
> Same here.
>
> >> +
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24
> dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via
> 10.0.0.1 dev sw0p1])
> >> +
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2
> 20.0.0.3 | FORMAT_PING], \
> >> +[0], [dnl
> >> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> >> +])
> >> +
> >> +# Change ptmu for the geneve tunnel
> >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do
> 20.0.0.3 2>&1 |grep -q "message too long, mtu=1042"])
>
> Same here
>
> >> +
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24
> dev sw0p1])
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via
> 10.0.0.1 dev sw0p1])
> >> +
> >> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> >> +for i in $(seq 30); do
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2
> if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> >> +done
> >> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev
> sw0p1 | grep -q 'mtu 942'])
> >> +
> >> +m_as ovn-chassis-2 kill $(cat nc.pid)
> >
> > This too.
> >
> >> +
> >>  AT_CLEANUP
> >> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> >> index f2c792c9c..0d694b1d9 100644
> >> --- a/tests/ovn-controller.at
> >> +++ b/tests/ovn-controller.at
> >> @@ -2880,3 +2880,66 @@ AT_CHECK([test x"$port_uuid"=$(ovs-vsctl get
> port $fakech_tunnel _uuid)])
> >>  OVN_CLEANUP([hv1])
> >>  AT_CLEANUP
> >>  ])
> >> +
> >> +OVN_FOR_EACH_NORTHD([
> >> +AT_SETUP([ovn-controller - pmtud flows])
> >> +AT_KEYWORDS([pmtud])
> >> +
> >> +ovn_start
> >> +
> >> +net_add n1
> >> +sim_add hv1
> >> +ovs-vsctl add-br br-phys
> >> +ovn_attach n1 br-phys 192.168.0.1
> >> +
> >> +check ovn-nbctl ls-add ls1 \
> >> +    -- lsp-add ls1 lsp1 \
> >> +    -- lsp-set-addresses lsp1 "00:00:00:00:00:01 192.168.1.1" \
> >> +    -- lsp-add ls1 lsp2 \
> >> +    -- lsp-set-addresses lsp2 "00:00:00:00:00:02 192.168.1.2"
> >> +
> >> +as hv1
> >> +check ovs-vsctl \
> >> +    -- add-port br-int vif1 \
> >> +    -- set Interface vif1 external_ids:iface-id=lsp1 \
> >> +    -- add-port br-int vif2 \
> >> +    -- set Interface vif2 external_ids:iface-id=lsp2
> >> +
> >> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=OFTABLE_CT_ZONE_LOOKUP | \
> >> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??,
> duration=??, table/' | \
> >> +          sed -e
> 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
> >> +          grep -v NXST_FLOW |sort], [0], [dnl
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=0
> actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1
> actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1
> actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> +])
> >> +
> >> +check ovn-nbctl lsp-add ls1 lsp3 \
> >> +    -- lsp-set-addresses lsp3 "00:00:00:00:00:03 192.168.1.3"
> >> +check ovs-vsctl \
> >> +    -- add-port br-int vif3 \
> >> +    -- set Interface vif3 external_ids:iface-id=lsp3
> >> +
> >> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=OFTABLE_CT_ZONE_LOOKUP | \
> >> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??,
> duration=??, table/' | \
> >> +          sed -e
> 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
> >> +          grep -v NXST_FLOW |sort], [0], [dnl
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=0
> actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1
> actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1
> actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=100,reg14=0x3,metadata=0x1
> actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> +])
> >> +
> >> +check ovn-nbctl lsp-del lsp3
> >> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int
> table=OFTABLE_CT_ZONE_LOOKUP | \
> >> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??,
> duration=??, table/' | \
> >> +          sed -e
> 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' |
> >> +          grep -v NXST_FLOW |sort], [0], [dnl
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=0
> actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1
> actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0,
> n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1
> actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> >> +])
> >> +
> >> +OVN_CLEANUP([hv1])
> >> +AT_CLEANUP
> >> +])
> >
> > Regards,
> > Dumitru
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
diff mbox series

Patch

diff --git a/controller/lflow.h b/controller/lflow.h
index 9b7ffa19c..906a26280 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -94,6 +94,7 @@  struct uuid;
 #define OFTABLE_ECMP_NH                  77
 #define OFTABLE_CHK_LB_AFFINITY          78
 #define OFTABLE_MAC_CACHE_USE            79
+#define OFTABLE_CT_ZONE_LOOKUP           80
 
 struct lflow_ctx_in {
     struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath;
diff --git a/controller/physical.c b/controller/physical.c
index 7ee308694..25da789f0 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1498,6 +1498,26 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
         return;
     }
 
+    if (get_lport_type(binding) == LP_VIF) {
+        /* Table 80, priority 100.
+         * =======================
+         *
+         * Process ICMP{4,6} error packets too big locally generated from the
+         * kernel in order to lookup proper ct_zone. */
+        struct match match = MATCH_CATCHALL_INITIALIZER;
+        match_set_metadata(&match, htonll(dp_key));
+        match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, port_key);
+
+        struct zone_ids icmp_zone_ids = get_zone_ids(binding, ct_zones);
+        ofpbuf_clear(ofpacts_p);
+        put_zones_ofpacts(&icmp_zone_ids, ofpacts_p);
+        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
+        ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 100,
+                        binding->header_.uuid.parts[0], &match,
+                        ofpacts_p, &binding->header_.uuid);
+        ofpbuf_clear(ofpacts_p);
+    }
+
     struct match match;
     if (!strcmp(binding->type, "patch")
         || (!strcmp(binding->type, "l3gateway")
@@ -2464,6 +2484,14 @@  physical_run(struct physical_ctx *p_ctx,
                               flow_table, &ofpacts);
     }
 
+    /* Default flow for CT_ZONE_LOOKUP Table. */
+    struct match ct_look_def_match;
+    match_init_catchall(&ct_look_def_match);
+    ofpbuf_clear(&ofpacts);
+    put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
+    ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 0, 0,
+                    &ct_look_def_match, &ofpacts, hc_uuid);
+
     /* Handle output to multicast groups, in tables 40 and 41. */
     const struct sbrec_multicast_group *mc;
     SBREC_MULTICAST_GROUP_TABLE_FOR_EACH (mc, p_ctx->mc_group_table) {
@@ -2522,7 +2550,7 @@  physical_run(struct physical_ctx *p_ctx,
         /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets
          * do not fit path MTU.
          */
-        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
+        put_resubmit(OFTABLE_CT_ZONE_LOOKUP, &ofpacts);
 
         /* IPv4 */
         match_init_catchall(&match);
diff --git a/northd/northd.c b/northd/northd.c
index 37f443e70..82fdfbd5d 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -8662,7 +8662,7 @@  build_lswitch_lflows_admission_control(struct ovn_datapath *od,
     ovs_assert(od->nbs);
 
     /* Default action for recirculated ICMP error 'packet too big'. */
-    ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
+    ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 105,
                   "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
                   " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
                   " flags.tunnel_rx == 1", debug_drop_action(), lflow_ref);
@@ -11858,7 +11858,22 @@  build_lswitch_icmp_packet_toobig_admin_flows(
 {
     ovs_assert(op->nbsp);
 
+    ds_clear(match);
     if (!lsp_is_router(op->nbsp)) {
+        if (!op->n_lsp_addrs) {
+            return;
+        }
+
+        ds_put_format(match,
+                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
+                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
+                      " eth.src == "ETH_ADDR_FMT" && outport == %s &&"
+                      " !is_chassis_resident(%s) && flags.tunnel_rx == 1",
+                      ETH_ADDR_ARGS(op->lsp_addrs[0].ea), op->json_key,
+                      op->json_key);
+        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
+                      ds_cstr(match), "outport <-> inport; next;",
+                      op->lflow_ref);
         return;
     }
 
@@ -11867,26 +11882,28 @@  build_lswitch_icmp_packet_toobig_admin_flows(
         return;
     }
 
-    ds_clear(match);
     if (peer->od->is_gw_router) {
         ds_put_format(match,
                       "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
                       " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
                       "eth.src == %s && outport == %s && flags.tunnel_rx == 1",
                       peer->nbrp->mac, op->json_key);
+        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
+                      ds_cstr(match), "outport <-> inport; next;",
+                      op->lflow_ref);
     } else {
         ds_put_format(match,
                       "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
                       " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
                       "eth.dst == %s && flags.tunnel_rx == 1",
                       peer->nbrp->mac);
+        ds_clear(actions);
+        ds_put_format(actions,
+                      "outport <-> inport; next(pipeline=ingress,table=%d);",
+                      ovn_stage_get_table(S_SWITCH_IN_L2_LKUP));
+        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
+                      ds_cstr(match), ds_cstr(actions), op->lflow_ref);
     }
-    ds_clear(actions);
-    ds_put_format(actions,
-                  "outport <-> inport; next(pipeline=ingress,table=%d);",
-                  ovn_stage_get_table(S_SWITCH_IN_L2_LKUP));
-    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
-                  ds_cstr(match), ds_cstr(actions), op->lflow_ref);
 }
 
 static void
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index b14a30285..a63d3d2da 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -324,8 +324,7 @@ 
       'packet too big' and <code>eth.src == <var>D</var> &amp;&amp;
       outport == <var>P</var> &amp;&amp; flags.tunnel_rx == 1</code> where
       <var>D</var> is the peer logical router port <var>RP</var> mac address,
-      swaps inport and outport and applies the action <code>
-      next(pipeline=S_SWITCH_IN_L2_LKUP)</code>.
+      swaps inport and outport and applies the action <code>next</code>.
     </p>
 
     <p>
@@ -338,7 +337,16 @@ 
     </p>
 
     <p>
-      This table adds a priority-110 flow that matches 'recirculated' icmp{4,6}
+      For each logical switch port <var>P</var> a priority-110 flow that
+      matches 'recirculated' icmp{4,6} error 'packet too big' and <code>
+      eth.src == <var>D</var> &amp;&amp; outport == <var>P</var> &amp;&amp;
+      !is_chassis_resident("<var>P</var>") &amp;&amp; flags.tunnel_rx == 1
+      </code> where <var>D</var> is the logical switch port mac address,
+      swaps inport and outport and applies the action <code>next</code>.
+    </p>
+
+    <p>
+      This table adds a priority-105 flow that matches 'recirculated' icmp{4,6}
       error 'packet too big' to drop the packet.
     </p>
 
@@ -2467,7 +2475,7 @@  output;
         </p>
 
         <p>
-          This table adds a priority-110 flow that matches 'recirculated'
+          This table adds a priority-105 flow that matches 'recirculated'
           icmp{4,6} error 'packet too big' to drop the packet.
         </p>
 
diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
index c04506a52..7a3b5cb50 100644
--- a/tests/multinode-macros.at
+++ b/tests/multinode-macros.at
@@ -7,6 +7,10 @@ 
 m4_define([M_NS_EXEC],
     [podman exec $1 ip netns exec $2 $3])
 
+# M_NS_DAEMONIZE([fake_node],[namespace], [command], [pidfile])
+m4_define([M_NS_DAEMONIZE],
+    [podman exec $1 ip netns exec $2 $3 & echo $! > $4])
+
 # M_NS_CHECK_EXEC([fake_node], [namespace], [command], other_params...)
 #
 # Wrapper for AT_CHECK that executes 'command' inside 'fake_node''s namespace'.
diff --git a/tests/multinode.at b/tests/multinode.at
index 0187382be..d9085b64d 100644
--- a/tests/multinode.at
+++ b/tests/multinode.at
@@ -154,6 +154,11 @@  check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
 check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
 
+# create LB
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
+check multinode_nbctl ls-lb-add sw0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
 m_as ovn-gw-1 ip netns add ovn-ext0
 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
 m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
@@ -207,6 +212,14 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
+
+m_as ovn-chassis-2 kill $(cat nc.pid)
+
 AT_CLEANUP
 
 AT_SETUP([ovn multinode pmtu - distributed router - vxlan])
@@ -696,6 +709,11 @@  check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
 check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
 
+# create LB
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
+check multinode_nbctl lr-lb-add lr0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
 m_as ovn-gw-1 ip netns add ovn-ext0
 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
 m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
@@ -751,6 +769,18 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
 M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
 M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
 
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
+
+m_as ovn-chassis-2 kill $(cat nc.pid)
+
 AT_CLEANUP
 
 AT_SETUP([ovn multinode pmtu - gw router - vxlan])
@@ -834,6 +864,11 @@  check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
 check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
 
+# create LB
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
+check multinode_nbctl lr-lb-add lr0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
 m_as ovn-gw-1 ip netns add ovn-ext0
 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
 m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
@@ -882,4 +917,120 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
 
 M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
 
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 950'])
+
+m_as ovn-chassis-2 kill $(cat nc.pid)
+
+AT_CLEANUP
+
+AT_SETUP([ovn multinode pmtu - logical switch - geneve])
+
+# Check that ovn-fake-multinode setup is up and running
+check_fake_multinode_setup
+
+# Delete the multinode NB and OVS resources before starting the test.
+cleanup_multinode_resources
+
+m_as ovn-chassis-1 ip link del sw0p1-p
+m_as ovn-chassis-2 ip link del sw0p2-p
+
+# Reset geneve tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
+
+# Test East-West switching
+check multinode_nbctl ls-add sw0
+check multinode_nbctl lsp-add sw0 sw0-port1
+check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
+check multinode_nbctl lsp-add sw0 sw0-port2
+check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
+
+m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
+
+# Create the second logical switch with one port
+check multinode_nbctl ls-add sw1
+check multinode_nbctl lsp-add sw1 sw1-port1
+check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
+
+# Create a logical router and attach both logical switches
+check multinode_nbctl lr-add lr0
+check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
+check multinode_nbctl lsp-add sw0 sw0-lr0
+check multinode_nbctl lsp-set-type sw0-lr0 router
+check multinode_nbctl lsp-set-addresses sw0-lr0 router
+check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
+check multinode_nbctl lsp-add sw1 sw1-lr0
+check multinode_nbctl lsp-set-type sw1-lr0 router
+check multinode_nbctl lsp-set-addresses sw1-lr0 router
+check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
+
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
+
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
+
+# create some ACLs
+check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
+check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
+
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
+check multinode_nbctl ls-lb-add sw0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
+m_wait_for_ports_up
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Change ptmu for the geneve tunnel
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 10.0.0.4 2>&1 |grep -q "message too long, mtu=1142"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Change ptmu for the geneve tunnel
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 |grep -q "message too long, mtu=1042"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom |nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
+
+m_as ovn-chassis-2 kill $(cat nc.pid)
+
 AT_CLEANUP
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index f2c792c9c..0d694b1d9 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -2880,3 +2880,66 @@  AT_CHECK([test x"$port_uuid"=$(ovs-vsctl get port $fakech_tunnel _uuid)])
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - pmtud flows])
+AT_KEYWORDS([pmtud])
+
+ovn_start
+
+net_add n1
+sim_add hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+check ovn-nbctl ls-add ls1 \
+    -- lsp-add ls1 lsp1 \
+    -- lsp-set-addresses lsp1 "00:00:00:00:00:01 192.168.1.1" \
+    -- lsp-add ls1 lsp2 \
+    -- lsp-set-addresses lsp2 "00:00:00:00:00:02 192.168.1.2"
+
+as hv1
+check ovs-vsctl \
+    -- add-port br-int vif1 \
+    -- set Interface vif1 external_ids:iface-id=lsp1 \
+    -- add-port br-int vif2 \
+    -- set Interface vif2 external_ids:iface-id=lsp2
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
+          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
+          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
+          grep -v NXST_FLOW |sort], [0], [dnl
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+])
+
+check ovn-nbctl lsp-add ls1 lsp3 \
+    -- lsp-set-addresses lsp3 "00:00:00:00:00:03 192.168.1.3"
+check ovs-vsctl \
+    -- add-port br-int vif3 \
+    -- set Interface vif3 external_ids:iface-id=lsp3
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
+          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
+          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
+          grep -v NXST_FLOW |sort], [0], [dnl
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x3,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+])
+
+check ovn-nbctl lsp-del lsp3
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
+          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
+          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' |
+          grep -v NXST_FLOW |sort], [0], [dnl
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index 32ab3b69f..47ada5c70 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -1130,5 +1130,6 @@  m4_define([OFTABLE_ECMP_NH_MAC], [76])
 m4_define([OFTABLE_ECMP_NH], [77])
 m4_define([OFTABLE_CHK_LB_AFFINITY], [78])
 m4_define([OFTABLE_MAC_CACHE_USE], [79])
+m4_define([OFTABLE_CT_ZONE_LOOKUP], [80])
 
 m4_define([OFTABLE_SAVE_INPORT_HEX], [m4_eval(OFTABLE_SAVE_INPORT, 16)])
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index be006fb32..e684fdbdd 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -8532,7 +8532,7 @@  delete_switch_ports() {
     RUN_OVN_NBCTL()
 }
 
-m4_define([DUMP_FLOWS_SORTED], [sed -e 's/arp.tpa == 10.1.0.[[0-9]]\{1,3\}/arp.tpa == 10.1.0.??/;s/eth.dst == ..:..:..:..:..:../??:??:??:??:??:??/' | sort])
+m4_define([DUMP_FLOWS_SORTED], [sed -e 's/arp.tpa == 10.1.0.[[0-9]]\{1,3\}/arp.tpa == 10.1.0.??/;s/eth.dst == ..:..:..:..:..:../??:??:??:??:??:??/;s/eth.src == ..:..:..:..:..:../??:??:??:??:??:??/' | sort])
 
 # Build some rather heavy config and modify number of threads in the middle
 check ovn-nbctl ls-add ls1
@@ -8597,7 +8597,7 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8623,7 +8623,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8650,7 +8652,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8678,7 +8682,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8705,7 +8711,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
@@ -8735,7 +8743,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
diff --git a/tests/ovn.at b/tests/ovn.at
index c8cc1d37f..79a9a733c 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -34900,6 +34900,9 @@  m4_define([MULTIPLE_OVS_INT],
    check ovn-nbctl lsp-add ls lp
    if test X$1 != X; then
        check ovn-nbctl lsp-set-type lp $1
+       nb_flows_ref=0
+   else
+       nb_flows_ref=1
    fi
    check ovn-nbctl lsp-set-addresses lp "00:00:00:01:01:02 192.168.1.2"
 
@@ -34981,7 +34984,7 @@  m4_define([MULTIPLE_OVS_INT],
    check ovs-vsctl del-port br-int lpnew
    OVS_WAIT_UNTIL([
        nb_flows=`ovs-ofctl dump-flows br-int | grep $COOKIE | wc -l`
-       test "${nb_flows}" = 0
+       test "${nb_flows}" = $nb_flows_ref
    ])
 
    echo ======================================================