diff mbox series

[ovs-dev,v4] northd: Fix pmtud for non routed traffic.

Message ID 34b5f1939425728b51a5d8796d6ae5d094e45153.1713973732.git.lorenzo.bianconi@redhat.com
State Accepted
Headers show
Series [ovs-dev,v4] northd: Fix pmtud for non routed traffic. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Lorenzo Bianconi April 24, 2024, 3:51 p.m. UTC
Similar to what is already implemented for routed e/w traffic,
introduce pmtud support for e/w traffic between two logical switch ports
connected to the same logical switch, but running on two different
hypervisors.

Acked-by: Mark Michelson <mmichels@redhat.com>
Reported-at: https://issues.redhat.com/browse/FDP-524
Reported-at: https://issues.redhat.com/browse/FDP-362
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
Changes since v3:
- fix M_NS_DAEMONIZE macro
Changes since v2:
- minor changes
Changes since v1:
- move logic in consider_port_binding
- add more self-test
- fix typos
---
 controller/lflow.h        |   1 +
 controller/physical.c     |  30 +++++++-
 northd/northd.c           |  33 ++++++---
 northd/ovn-northd.8.xml   |  16 +++--
 tests/multinode-macros.at |   7 ++
 tests/multinode.at        | 147 +++++++++++++++++++++++++++++++++++++-
 tests/ovn-controller.at   |  63 ++++++++++++++++
 tests/ovn-macros.at       |   1 +
 tests/ovn-northd.at       |  24 +++++--
 tests/ovn.at              |   5 +-
 10 files changed, 304 insertions(+), 23 deletions(-)

Comments

Numan Siddique May 28, 2024, 9:24 p.m. UTC | #1
On Wed, Apr 24, 2024 at 11:51 AM Lorenzo Bianconi
<lorenzo.bianconi@redhat.com> wrote:
>
> Similar to what is already implemented for routed e/w traffic,
> introduce pmtud support for e/w traffic between two logical switch ports
> connected to the same logical switch, but running on two different
> hypervisors.
>
> Acked-by: Mark Michelson <mmichels@redhat.com>
> Reported-at: https://issues.redhat.com/browse/FDP-524
> Reported-at: https://issues.redhat.com/browse/FDP-362
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>

Thanks for the patch.

I applied this to main with the below changes.

--------------------------------------------------------------------------------
diff --git a/northd/northd.c b/northd/northd.c
index 75a8187e72..495b838fc5 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -11991,20 +11991,18 @@ build_lswitch_icmp_packet_toobig_admin_flows(

     ds_clear(match);
     if (!lsp_is_router(op->nbsp)) {
-        if (!op->n_lsp_addrs) {
-            return;
+        for (size_t i = 0; i < op->n_lsp_addrs; i++) {
+            ds_put_format(match,
+                          "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
+                          " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
+                          " eth.src == %s && outport == %s &&"
+                          " !is_chassis_resident(%s) && flags.tunnel_rx == 1",
+                          op->lsp_addrs[i].ea_s, op->json_key,
+                          op->json_key);
+            ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
+                          ds_cstr(match), "outport <-> inport; next;",
+                          op->lflow_ref);
         }
-
-        ds_put_format(match,
-                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
-                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
-                      " eth.src == "ETH_ADDR_FMT" && outport == %s &&"
-                      " !is_chassis_resident(%s) && flags.tunnel_rx == 1",
-                      ETH_ADDR_ARGS(op->lsp_addrs[0].ea), op->json_key,
-                      op->json_key);
-        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
-                      ds_cstr(match), "outport <-> inport; next;",
-                      op->lflow_ref);
         return;
     }

--------------------------------------------------------------------------------

Numan

> ---
> Changes since v3:
> - fix M_NS_DAEMONIZE macro
> Changes since v2:
> - minor changes
> Changes since v1:
> - move logic in consider_port_binding
> - add more self-test
> - fix typos
> ---
>  controller/lflow.h        |   1 +
>  controller/physical.c     |  30 +++++++-
>  northd/northd.c           |  33 ++++++---
>  northd/ovn-northd.8.xml   |  16 +++--
>  tests/multinode-macros.at |   7 ++
>  tests/multinode.at        | 147 +++++++++++++++++++++++++++++++++++++-
>  tests/ovn-controller.at   |  63 ++++++++++++++++
>  tests/ovn-macros.at       |   1 +
>  tests/ovn-northd.at       |  24 +++++--
>  tests/ovn.at              |   5 +-
>  10 files changed, 304 insertions(+), 23 deletions(-)
>
> diff --git a/controller/lflow.h b/controller/lflow.h
> index 295d004f4..1d20cae35 100644
> --- a/controller/lflow.h
> +++ b/controller/lflow.h
> @@ -94,6 +94,7 @@ struct uuid;
>  #define OFTABLE_ECMP_NH                  77
>  #define OFTABLE_CHK_LB_AFFINITY          78
>  #define OFTABLE_MAC_CACHE_USE            79
> +#define OFTABLE_CT_ZONE_LOOKUP           80
>
>  struct lflow_ctx_in {
>      struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath;
> diff --git a/controller/physical.c b/controller/physical.c
> index 7ee308694..25da789f0 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -1498,6 +1498,26 @@ consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>          return;
>      }
>
> +    if (get_lport_type(binding) == LP_VIF) {
> +        /* Table 80, priority 100.
> +         * =======================
> +         *
> +         * Process ICMP{4,6} error packets too big locally generated from the
> +         * kernel in order to lookup proper ct_zone. */
> +        struct match match = MATCH_CATCHALL_INITIALIZER;
> +        match_set_metadata(&match, htonll(dp_key));
> +        match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, port_key);
> +
> +        struct zone_ids icmp_zone_ids = get_zone_ids(binding, ct_zones);
> +        ofpbuf_clear(ofpacts_p);
> +        put_zones_ofpacts(&icmp_zone_ids, ofpacts_p);
> +        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
> +        ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 100,
> +                        binding->header_.uuid.parts[0], &match,
> +                        ofpacts_p, &binding->header_.uuid);
> +        ofpbuf_clear(ofpacts_p);
> +    }
> +
>      struct match match;
>      if (!strcmp(binding->type, "patch")
>          || (!strcmp(binding->type, "l3gateway")
> @@ -2464,6 +2484,14 @@ physical_run(struct physical_ctx *p_ctx,
>                                flow_table, &ofpacts);
>      }
>
> +    /* Default flow for CT_ZONE_LOOKUP Table. */
> +    struct match ct_look_def_match;
> +    match_init_catchall(&ct_look_def_match);
> +    ofpbuf_clear(&ofpacts);
> +    put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
> +    ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 0, 0,
> +                    &ct_look_def_match, &ofpacts, hc_uuid);
> +
>      /* Handle output to multicast groups, in tables 40 and 41. */
>      const struct sbrec_multicast_group *mc;
>      SBREC_MULTICAST_GROUP_TABLE_FOR_EACH (mc, p_ctx->mc_group_table) {
> @@ -2522,7 +2550,7 @@ physical_run(struct physical_ctx *p_ctx,
>          /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets
>           * do not fit path MTU.
>           */
> -        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
> +        put_resubmit(OFTABLE_CT_ZONE_LOOKUP, &ofpacts);
>
>          /* IPv4 */
>          match_init_catchall(&match);
> diff --git a/northd/northd.c b/northd/northd.c
> index d30ff9da5..ec5f44c16 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -8667,7 +8667,7 @@ build_lswitch_lflows_admission_control(struct ovn_datapath *od,
>      ovs_assert(od->nbs);
>
>      /* Default action for recirculated ICMP error 'packet too big'. */
> -    ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
> +    ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 105,
>                    "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
>                    " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
>                    " flags.tunnel_rx == 1", debug_drop_action(), lflow_ref);
> @@ -11863,7 +11863,22 @@ build_lswitch_icmp_packet_toobig_admin_flows(
>  {
>      ovs_assert(op->nbsp);
>
> +    ds_clear(match);
>      if (!lsp_is_router(op->nbsp)) {
> +        if (!op->n_lsp_addrs) {
> +            return;
> +        }
> +
> +        ds_put_format(match,
> +                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
> +                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
> +                      " eth.src == "ETH_ADDR_FMT" && outport == %s &&"
> +                      " !is_chassis_resident(%s) && flags.tunnel_rx == 1",
> +                      ETH_ADDR_ARGS(op->lsp_addrs[0].ea), op->json_key,
> +                      op->json_key);
> +        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
> +                      ds_cstr(match), "outport <-> inport; next;",
> +                      op->lflow_ref);
>          return;
>      }
>
> @@ -11872,26 +11887,28 @@ build_lswitch_icmp_packet_toobig_admin_flows(
>          return;
>      }
>
> -    ds_clear(match);
>      if (peer->od->is_gw_router) {
>          ds_put_format(match,
>                        "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
>                        " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
>                        "eth.src == %s && outport == %s && flags.tunnel_rx == 1",
>                        peer->nbrp->mac, op->json_key);
> +        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
> +                      ds_cstr(match), "outport <-> inport; next;",
> +                      op->lflow_ref);
>      } else {
>          ds_put_format(match,
>                        "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
>                        " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
>                        "eth.dst == %s && flags.tunnel_rx == 1",
>                        peer->nbrp->mac);
> +        ds_clear(actions);
> +        ds_put_format(actions,
> +                      "outport <-> inport; next(pipeline=ingress,table=%d);",
> +                      ovn_stage_get_table(S_SWITCH_IN_L2_LKUP));
> +        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
> +                      ds_cstr(match), ds_cstr(actions), op->lflow_ref);
>      }
> -    ds_clear(actions);
> -    ds_put_format(actions,
> -                  "outport <-> inport; next(pipeline=ingress,table=%d);",
> -                  ovn_stage_get_table(S_SWITCH_IN_L2_LKUP));
> -    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
> -                  ds_cstr(match), ds_cstr(actions), op->lflow_ref);
>  }
>
>  static void
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index b8e542fcf..9c256736d 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -324,8 +324,7 @@
>        'packet too big' and <code>eth.src == <var>D</var> &amp;&amp;
>        outport == <var>P</var> &amp;&amp; flags.tunnel_rx == 1</code> where
>        <var>D</var> is the peer logical router port <var>RP</var> mac address,
> -      swaps inport and outport and applies the action <code>
> -      next(pipeline=S_SWITCH_IN_L2_LKUP)</code>.
> +      swaps inport and outport and applies the action <code>next</code>.
>      </p>
>
>      <p>
> @@ -338,7 +337,16 @@
>      </p>
>
>      <p>
> -      This table adds a priority-110 flow that matches 'recirculated' icmp{4,6}
> +      For each logical switch port <var>P</var> a priority-110 flow that
> +      matches 'recirculated' icmp{4,6} error 'packet too big' and <code>
> +      eth.src == <var>D</var> &amp;&amp; outport == <var>P</var> &amp;&amp;
> +      !is_chassis_resident("<var>P</var>") &amp;&amp; flags.tunnel_rx == 1
> +      </code> where <var>D</var> is the logical switch port mac address,
> +      swaps inport and outport and applies the action <code>next</code>.
> +    </p>
> +
> +    <p>
> +      This table adds a priority-105 flow that matches 'recirculated' icmp{4,6}
>        error 'packet too big' to drop the packet.
>      </p>
>
> @@ -2467,7 +2475,7 @@ output;
>          </p>
>
>          <p>
> -          This table adds a priority-110 flow that matches 'recirculated'
> +          This table adds a priority-105 flow that matches 'recirculated'
>            icmp{4,6} error 'packet too big' to drop the packet.
>          </p>
>
> diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
> index c04506a52..ef41087ae 100644
> --- a/tests/multinode-macros.at
> +++ b/tests/multinode-macros.at
> @@ -7,6 +7,13 @@
>  m4_define([M_NS_EXEC],
>      [podman exec $1 ip netns exec $2 $3])
>
> +# M_NS_DAEMONIZE([fake_node],[namespace],[command],[pidfile])
> +m4_define([M_NS_DAEMONIZE],
> +    [podman exec $1 ip netns exec $2 $3 & echo $! > $4
> +     echo "kill \`cat $4\`" >> cleanup
> +    ]
> +)
> +
>  # M_NS_CHECK_EXEC([fake_node], [namespace], [command], other_params...)
>  #
>  # Wrapper for AT_CHECK that executes 'command' inside 'fake_node''s namespace'.
> diff --git a/tests/multinode.at b/tests/multinode.at
> index b959a2550..1e6eeb661 100644
> --- a/tests/multinode.at
> +++ b/tests/multinode.at
> @@ -162,6 +162,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>
> +# create LB
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
> +check multinode_nbctl ls-lb-add sw0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
>  m_as ovn-gw-1 ip netns add ovn-ext0
>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> @@ -215,6 +220,12 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>  3 packets transmitted, 3 received, 0% packet loss, time 0ms
>  ])
>
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
> +
>  AT_CLEANUP
>
>  AT_SETUP([ovn multinode pmtu - distributed router - vxlan])
> @@ -704,6 +715,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>
> +# create LB
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
> +check multinode_nbctl lr-lb-add lr0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
>  m_as ovn-gw-1 ip netns add ovn-ext0
>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> @@ -757,7 +773,17 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>  ])
>
>  M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
> -M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 | grep -q "mtu = 1100"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
>
>  AT_CLEANUP
>
> @@ -842,6 +868,11 @@ check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
>  check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
>  check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
>
> +# create LB
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
> +check multinode_nbctl lr-lb-add lr0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
>  m_as ovn-gw-1 ip netns add ovn-ext0
>  m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
>  m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
> @@ -888,6 +919,118 @@ M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
>  3 packets transmitted, 3 received, 0% packet loss, time 0ms
>  ])
>
> -M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 | grep -q "mtu = 1150"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 950'])
> +
> +AT_CLEANUP
> +
> +AT_SETUP([ovn multinode pmtu - logical switch - geneve])
> +
> +# Check that ovn-fake-multinode setup is up and running
> +check_fake_multinode_setup
> +
> +# Delete the multinode NB and OVS resources before starting the test.
> +cleanup_multinode_resources
> +
> +m_as ovn-chassis-1 ip link del sw0p1-p
> +m_as ovn-chassis-2 ip link del sw0p2-p
> +
> +# Reset geneve tunnels
> +for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
> +do
> +    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
> +done
> +
> +OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
> +OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
> +
> +# Test East-West switching
> +check multinode_nbctl ls-add sw0
> +check multinode_nbctl lsp-add sw0 sw0-port1
> +check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
> +check multinode_nbctl lsp-add sw0 sw0-port2
> +check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
> +
> +m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
> +
> +# Create the second logical switch with one port
> +check multinode_nbctl ls-add sw1
> +check multinode_nbctl lsp-add sw1 sw1-port1
> +check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> +
> +# Create a logical router and attach both logical switches
> +check multinode_nbctl lr-add lr0
> +check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> +check multinode_nbctl lsp-add sw0 sw0-lr0
> +check multinode_nbctl lsp-set-type sw0-lr0 router
> +check multinode_nbctl lsp-set-addresses sw0-lr0 router
> +check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> +
> +check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> +check multinode_nbctl lsp-add sw1 sw1-lr0
> +check multinode_nbctl lsp-set-type sw1-lr0 router
> +check multinode_nbctl lsp-set-addresses sw1-lr0 router
> +check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> +
> +m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
> +
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
> +check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
> +
> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
> +check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
> +
> +# create some ACLs
> +check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
> +check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
> +
> +check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
> +check multinode_nbctl ls-lb-add sw0 lb0
> +M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
> +
> +m_wait_for_ports_up
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Change ptmu for the geneve tunnel
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 10.0.0.4 2>&1 | grep -q "message too long, mtu=1142"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
> +[0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +# Change ptmu for the geneve tunnel
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 | grep -q "message too long, mtu=1042"])
> +
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
> +
> +m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
> +for i in $(seq 30); do
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
> +done
> +M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
>
>  AT_CLEANUP
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index be198e00d..a7dc277c1 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -2906,3 +2906,66 @@ OVN_CLEANUP([hv1],[hv2])
>
>  AT_CLEANUP
>  ])
> +
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - pmtud flows])
> +AT_KEYWORDS([pmtud])
> +
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +
> +check ovn-nbctl ls-add ls1 \
> +    -- lsp-add ls1 lsp1 \
> +    -- lsp-set-addresses lsp1 "00:00:00:00:00:01 192.168.1.1" \
> +    -- lsp-add ls1 lsp2 \
> +    -- lsp-set-addresses lsp2 "00:00:00:00:00:02 192.168.1.2"
> +
> +as hv1
> +check ovs-vsctl \
> +    -- add-port br-int vif1 \
> +    -- set Interface vif1 external_ids:iface-id=lsp1 \
> +    -- add-port br-int vif2 \
> +    -- set Interface vif2 external_ids:iface-id=lsp2
> +
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
> +          grep -v NXST_FLOW |sort], [0], [dnl
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> +])
> +
> +check ovn-nbctl lsp-add ls1 lsp3 \
> +    -- lsp-set-addresses lsp3 "00:00:00:00:00:03 192.168.1.3"
> +check ovs-vsctl \
> +    -- add-port br-int vif3 \
> +    -- set Interface vif3 external_ids:iface-id=lsp3
> +
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
> +          grep -v NXST_FLOW |sort], [0], [dnl
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x3,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> +])
> +
> +check ovn-nbctl lsp-del lsp3
> +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
> +          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
> +          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' |
> +          grep -v NXST_FLOW |sort], [0], [dnl
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> + cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
> +])
> +
> +OVN_CLEANUP([hv1])
> +AT_CLEANUP
> +])
> diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
> index 32ab3b69f..47ada5c70 100644
> --- a/tests/ovn-macros.at
> +++ b/tests/ovn-macros.at
> @@ -1130,5 +1130,6 @@ m4_define([OFTABLE_ECMP_NH_MAC], [76])
>  m4_define([OFTABLE_ECMP_NH], [77])
>  m4_define([OFTABLE_CHK_LB_AFFINITY], [78])
>  m4_define([OFTABLE_MAC_CACHE_USE], [79])
> +m4_define([OFTABLE_CT_ZONE_LOOKUP], [80])
>
>  m4_define([OFTABLE_SAVE_INPORT_HEX], [m4_eval(OFTABLE_SAVE_INPORT, 16)])
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 3d944a3ae..37c9ec2a1 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -8599,7 +8599,7 @@ delete_switch_ports() {
>      RUN_OVN_NBCTL()
>  }
>
> -m4_define([DUMP_FLOWS_SORTED], [sed -e 's/arp.tpa == 10.1.0.[[0-9]]\{1,3\}/arp.tpa == 10.1.0.??/;s/eth.dst == ..:..:..:..:..:../??:??:??:??:??:??/' | sort])
> +m4_define([DUMP_FLOWS_SORTED], [sed -e 's/arp.tpa == 10.1.0.[[0-9]]\{1,3\}/arp.tpa == 10.1.0.??/;s/eth.dst == ..:..:..:..:..:../??:??:??:??:??:??/;s/eth.src == ..:..:..:..:..:../??:??:??:??:??:??/' | sort])
>
>  # Build some rather heavy config and modify number of threads in the middle
>  check ovn-nbctl ls-add ls1
> @@ -8664,7 +8664,7 @@ ovn_strip_lflows ], [0], [dnl
>    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> -  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
>    table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
> @@ -8690,7 +8690,9 @@ ovn_strip_lflows ], [0], [dnl
>    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> -  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
>    table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
> @@ -8717,7 +8719,9 @@ ovn_strip_lflows ], [0], [dnl
>    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> -  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
>    table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
> @@ -8745,7 +8749,9 @@ ovn_strip_lflows ], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> -  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
>    table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
> @@ -8772,7 +8778,9 @@ ovn_strip_lflows ], [0], [dnl
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> -  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
> @@ -8802,7 +8810,9 @@ ovn_strip_lflows ], [0], [dnl
>    table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
>    table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
> -  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
> +  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
>    table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
>    table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
> diff --git a/tests/ovn.at b/tests/ovn.at
> index e81cd4f45..546f54225 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -34916,6 +34916,9 @@ m4_define([MULTIPLE_OVS_INT],
>     check ovn-nbctl lsp-add ls lp
>     if test X$1 != X; then
>         check ovn-nbctl lsp-set-type lp $1
> +       nb_flows_ref=0
> +   else
> +       nb_flows_ref=1
>     fi
>     check ovn-nbctl lsp-set-addresses lp "00:00:00:01:01:02 192.168.1.2"
>
> @@ -34997,7 +35000,7 @@ m4_define([MULTIPLE_OVS_INT],
>     check ovs-vsctl del-port br-int lpnew
>     OVS_WAIT_UNTIL([
>         nb_flows=`ovs-ofctl dump-flows br-int | grep $COOKIE | wc -l`
> -       test "${nb_flows}" = 0
> +       test "${nb_flows}" = $nb_flows_ref
>     ])
>
>     echo ======================================================
> --
> 2.44.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/lflow.h b/controller/lflow.h
index 295d004f4..1d20cae35 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -94,6 +94,7 @@  struct uuid;
 #define OFTABLE_ECMP_NH                  77
 #define OFTABLE_CHK_LB_AFFINITY          78
 #define OFTABLE_MAC_CACHE_USE            79
+#define OFTABLE_CT_ZONE_LOOKUP           80
 
 struct lflow_ctx_in {
     struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath;
diff --git a/controller/physical.c b/controller/physical.c
index 7ee308694..25da789f0 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1498,6 +1498,26 @@  consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
         return;
     }
 
+    if (get_lport_type(binding) == LP_VIF) {
+        /* Table 80, priority 100.
+         * =======================
+         *
+         * Process ICMP{4,6} error packets too big locally generated from the
+         * kernel in order to lookup proper ct_zone. */
+        struct match match = MATCH_CATCHALL_INITIALIZER;
+        match_set_metadata(&match, htonll(dp_key));
+        match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, port_key);
+
+        struct zone_ids icmp_zone_ids = get_zone_ids(binding, ct_zones);
+        ofpbuf_clear(ofpacts_p);
+        put_zones_ofpacts(&icmp_zone_ids, ofpacts_p);
+        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, ofpacts_p);
+        ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 100,
+                        binding->header_.uuid.parts[0], &match,
+                        ofpacts_p, &binding->header_.uuid);
+        ofpbuf_clear(ofpacts_p);
+    }
+
     struct match match;
     if (!strcmp(binding->type, "patch")
         || (!strcmp(binding->type, "l3gateway")
@@ -2464,6 +2484,14 @@  physical_run(struct physical_ctx *p_ctx,
                               flow_table, &ofpacts);
     }
 
+    /* Default flow for CT_ZONE_LOOKUP Table. */
+    struct match ct_look_def_match;
+    match_init_catchall(&ct_look_def_match);
+    ofpbuf_clear(&ofpacts);
+    put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
+    ofctrl_add_flow(flow_table, OFTABLE_CT_ZONE_LOOKUP, 0, 0,
+                    &ct_look_def_match, &ofpacts, hc_uuid);
+
     /* Handle output to multicast groups, in tables 40 and 41. */
     const struct sbrec_multicast_group *mc;
     SBREC_MULTICAST_GROUP_TABLE_FOR_EACH (mc, p_ctx->mc_group_table) {
@@ -2522,7 +2550,7 @@  physical_run(struct physical_ctx *p_ctx,
         /* Add specif flows for E/W ICMPv{4,6} packets if tunnelled packets
          * do not fit path MTU.
          */
-        put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);
+        put_resubmit(OFTABLE_CT_ZONE_LOOKUP, &ofpacts);
 
         /* IPv4 */
         match_init_catchall(&match);
diff --git a/northd/northd.c b/northd/northd.c
index d30ff9da5..ec5f44c16 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -8667,7 +8667,7 @@  build_lswitch_lflows_admission_control(struct ovn_datapath *od,
     ovs_assert(od->nbs);
 
     /* Default action for recirculated ICMP error 'packet too big'. */
-    ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
+    ovn_lflow_add(lflows, od, S_SWITCH_IN_CHECK_PORT_SEC, 105,
                   "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
                   " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
                   " flags.tunnel_rx == 1", debug_drop_action(), lflow_ref);
@@ -11863,7 +11863,22 @@  build_lswitch_icmp_packet_toobig_admin_flows(
 {
     ovs_assert(op->nbsp);
 
+    ds_clear(match);
     if (!lsp_is_router(op->nbsp)) {
+        if (!op->n_lsp_addrs) {
+            return;
+        }
+
+        ds_put_format(match,
+                      "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
+                      " (ip6 && icmp6.type == 2 && icmp6.code == 0)) &&"
+                      " eth.src == "ETH_ADDR_FMT" && outport == %s &&"
+                      " !is_chassis_resident(%s) && flags.tunnel_rx == 1",
+                      ETH_ADDR_ARGS(op->lsp_addrs[0].ea), op->json_key,
+                      op->json_key);
+        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 110,
+                      ds_cstr(match), "outport <-> inport; next;",
+                      op->lflow_ref);
         return;
     }
 
@@ -11872,26 +11887,28 @@  build_lswitch_icmp_packet_toobig_admin_flows(
         return;
     }
 
-    ds_clear(match);
     if (peer->od->is_gw_router) {
         ds_put_format(match,
                       "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
                       " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
                       "eth.src == %s && outport == %s && flags.tunnel_rx == 1",
                       peer->nbrp->mac, op->json_key);
+        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
+                      ds_cstr(match), "outport <-> inport; next;",
+                      op->lflow_ref);
     } else {
         ds_put_format(match,
                       "((ip4 && icmp4.type == 3 && icmp4.code == 4) ||"
                       " (ip6 && icmp6.type == 2 && icmp6.code == 0)) && "
                       "eth.dst == %s && flags.tunnel_rx == 1",
                       peer->nbrp->mac);
+        ds_clear(actions);
+        ds_put_format(actions,
+                      "outport <-> inport; next(pipeline=ingress,table=%d);",
+                      ovn_stage_get_table(S_SWITCH_IN_L2_LKUP));
+        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
+                      ds_cstr(match), ds_cstr(actions), op->lflow_ref);
     }
-    ds_clear(actions);
-    ds_put_format(actions,
-                  "outport <-> inport; next(pipeline=ingress,table=%d);",
-                  ovn_stage_get_table(S_SWITCH_IN_L2_LKUP));
-    ovn_lflow_add(lflows, op->od, S_SWITCH_IN_CHECK_PORT_SEC, 120,
-                  ds_cstr(match), ds_cstr(actions), op->lflow_ref);
 }
 
 static void
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index b8e542fcf..9c256736d 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -324,8 +324,7 @@ 
       'packet too big' and <code>eth.src == <var>D</var> &amp;&amp;
       outport == <var>P</var> &amp;&amp; flags.tunnel_rx == 1</code> where
       <var>D</var> is the peer logical router port <var>RP</var> mac address,
-      swaps inport and outport and applies the action <code>
-      next(pipeline=S_SWITCH_IN_L2_LKUP)</code>.
+      swaps inport and outport and applies the action <code>next</code>.
     </p>
 
     <p>
@@ -338,7 +337,16 @@ 
     </p>
 
     <p>
-      This table adds a priority-110 flow that matches 'recirculated' icmp{4,6}
+      For each logical switch port <var>P</var> a priority-110 flow that
+      matches 'recirculated' icmp{4,6} error 'packet too big' and <code>
+      eth.src == <var>D</var> &amp;&amp; outport == <var>P</var> &amp;&amp;
+      !is_chassis_resident("<var>P</var>") &amp;&amp; flags.tunnel_rx == 1
+      </code> where <var>D</var> is the logical switch port mac address,
+      swaps inport and outport and applies the action <code>next</code>.
+    </p>
+
+    <p>
+      This table adds a priority-105 flow that matches 'recirculated' icmp{4,6}
       error 'packet too big' to drop the packet.
     </p>
 
@@ -2467,7 +2475,7 @@  output;
         </p>
 
         <p>
-          This table adds a priority-110 flow that matches 'recirculated'
+          This table adds a priority-105 flow that matches 'recirculated'
           icmp{4,6} error 'packet too big' to drop the packet.
         </p>
 
diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at
index c04506a52..ef41087ae 100644
--- a/tests/multinode-macros.at
+++ b/tests/multinode-macros.at
@@ -7,6 +7,13 @@ 
 m4_define([M_NS_EXEC],
     [podman exec $1 ip netns exec $2 $3])
 
+# M_NS_DAEMONIZE([fake_node],[namespace],[command],[pidfile])
+m4_define([M_NS_DAEMONIZE],
+    [podman exec $1 ip netns exec $2 $3 & echo $! > $4
+     echo "kill \`cat $4\`" >> cleanup
+    ]
+)
+
 # M_NS_CHECK_EXEC([fake_node], [namespace], [command], other_params...)
 #
 # Wrapper for AT_CHECK that executes 'command' inside 'fake_node''s namespace'.
diff --git a/tests/multinode.at b/tests/multinode.at
index b959a2550..1e6eeb661 100644
--- a/tests/multinode.at
+++ b/tests/multinode.at
@@ -162,6 +162,11 @@  check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
 check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
 
+# create LB
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
+check multinode_nbctl ls-lb-add sw0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
 m_as ovn-gw-1 ip netns add ovn-ext0
 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
 m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
@@ -215,6 +220,12 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
+
 AT_CLEANUP
 
 AT_SETUP([ovn multinode pmtu - distributed router - vxlan])
@@ -704,6 +715,11 @@  check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
 check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
 
+# create LB
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
+check multinode_nbctl lr-lb-add lr0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
 m_as ovn-gw-1 ip netns add ovn-ext0
 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
 m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
@@ -757,7 +773,17 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
 ])
 
 M_NS_CHECK_EXEC([ovn-gw-1], [ovn-ext0], [ip link set dev ext1 mtu 1100])
-M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1100"])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 | grep -q "mtu = 1100"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
 
 AT_CLEANUP
 
@@ -842,6 +868,11 @@  check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
 check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
 check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
 
+# create LB
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 20.0.0.3:8080 udp
+check multinode_nbctl lr-lb-add lr0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw1p1], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
 m_as ovn-gw-1 ip netns add ovn-ext0
 m_as ovn-gw-1 ovs-vsctl add-port br-ex ext0 -- set interface ext0 type=internal
 m_as ovn-gw-1 ip link set ext0 netns ovn-ext0
@@ -888,6 +919,118 @@  M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 172.20.1.2 |
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
-M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 |grep -q "mtu = 1150"])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 20 -i 0.5 -s 1300 -M do 172.20.1.2 2>&1 | grep -q "mtu = 1150"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 950'])
+
+AT_CLEANUP
+
+AT_SETUP([ovn multinode pmtu - logical switch - geneve])
+
+# Check that ovn-fake-multinode setup is up and running
+check_fake_multinode_setup
+
+# Delete the multinode NB and OVS resources before starting the test.
+cleanup_multinode_resources
+
+m_as ovn-chassis-1 ip link del sw0p1-p
+m_as ovn-chassis-2 ip link del sw0p2-p
+
+# Reset geneve tunnels
+for c in ovn-chassis-1 ovn-chassis-2 ovn-gw-1
+do
+    m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+done
+
+OVS_WAIT_UNTIL([m_as ovn-chassis-1 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-chassis-2 ip link show | grep -q genev_sys])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip link show | grep -q genev_sys])
+
+# Test East-West switching
+check multinode_nbctl ls-add sw0
+check multinode_nbctl lsp-add sw0 sw0-port1
+check multinode_nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:03 10.0.0.3 1000::3"
+check multinode_nbctl lsp-add sw0 sw0-port2
+check multinode_nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:04 10.0.0.4 1000::4"
+
+m_as ovn-chassis-1 /data/create_fake_vm.sh sw0-port1 sw0p1 50:54:00:00:00:03 10.0.0.3 24 10.0.0.1 1000::3/64 1000::a
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw0-port2 sw0p2 50:54:00:00:00:04 10.0.0.4 24 10.0.0.1 1000::4/64 1000::a
+
+# Create the second logical switch with one port
+check multinode_nbctl ls-add sw1
+check multinode_nbctl lsp-add sw1 sw1-port1
+check multinode_nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
+
+# Create a logical router and attach both logical switches
+check multinode_nbctl lr-add lr0
+check multinode_nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
+check multinode_nbctl lsp-add sw0 sw0-lr0
+check multinode_nbctl lsp-set-type sw0-lr0 router
+check multinode_nbctl lsp-set-addresses sw0-lr0 router
+check multinode_nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+check multinode_nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
+check multinode_nbctl lsp-add sw1 sw1-lr0
+check multinode_nbctl lsp-set-type sw1-lr0 router
+check multinode_nbctl lsp-set-addresses sw1-lr0 router
+check multinode_nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+m_as ovn-chassis-2 /data/create_fake_vm.sh sw1-port1 sw1p1 40:54:00:00:00:03 20.0.0.3 24 20.0.0.1 2000::3/64 2000::a
+
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 10.0.0.0/24
+check multinode_nbctl lr-nat-add lr0 snat 172.20.0.100 20.0.0.0/24
+
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw0 ovn-chassis-1 10
+check multinode_nbctl lrp-set-gateway-chassis lr0-sw1 ovn-chassis-2 10
+
+# create some ACLs
+check multinode_nbctl acl-add sw0 from-lport 1002 'ip4 || ip6'  allow-related
+check multinode_nbctl acl-add sw1 from-lport 1002 'ip4 || ip6'  allow-related
+
+check multinode_nbctl lb-add lb0 10.0.0.1:8080 10.0.0.4:8080 udp
+check multinode_nbctl ls-lb-add sw0 lb0
+M_NS_DAEMONIZE([ovn-chassis-2], [sw0p2], [nc -u -l 8080 >/dev/null 2>&1], [nc.pid])
+
+m_wait_for_ports_up
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 10.0.0.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Change ptmu for the geneve tunnel
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1200 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 10.0.0.4 2>&1 | grep -q "message too long, mtu=1142"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -q -c 3 -i 0.3 -w 2 20.0.0.3 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Change ptmu for the geneve tunnel
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1100 dev eth1
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ping -c 5 -s 1300 -M do 20.0.0.3 2>&1 | grep -q "message too long, mtu=1042"])
+
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route flush dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add 10.0.0.0/24 dev sw0p1])
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route add default via 10.0.0.1 dev sw0p1])
+
+m_as ovn-chassis-1 ip route change 170.168.0.0/16 mtu 1000 dev eth1
+for i in $(seq 30); do
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [sh -c 'dd bs=512 count=2 if=/dev/urandom | nc -u 10.0.0.1 8080'], [ignore], [ignore], [ignore])
+done
+M_NS_CHECK_EXEC([ovn-chassis-1], [sw0p1], [ip route get 10.0.0.1 dev sw0p1 | grep -q 'mtu 942'])
 
 AT_CLEANUP
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index be198e00d..a7dc277c1 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -2906,3 +2906,66 @@  OVN_CLEANUP([hv1],[hv2])
 
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - pmtud flows])
+AT_KEYWORDS([pmtud])
+
+ovn_start
+
+net_add n1
+sim_add hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+check ovn-nbctl ls-add ls1 \
+    -- lsp-add ls1 lsp1 \
+    -- lsp-set-addresses lsp1 "00:00:00:00:00:01 192.168.1.1" \
+    -- lsp-add ls1 lsp2 \
+    -- lsp-set-addresses lsp2 "00:00:00:00:00:02 192.168.1.2"
+
+as hv1
+check ovs-vsctl \
+    -- add-port br-int vif1 \
+    -- set Interface vif1 external_ids:iface-id=lsp1 \
+    -- add-port br-int vif2 \
+    -- set Interface vif2 external_ids:iface-id=lsp2
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
+          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
+          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
+          grep -v NXST_FLOW |sort], [0], [dnl
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+])
+
+check ovn-nbctl lsp-add ls1 lsp3 \
+    -- lsp-set-addresses lsp3 "00:00:00:00:00:03 192.168.1.3"
+check ovs-vsctl \
+    -- add-port br-int vif3 \
+    -- set Interface vif3 external_ids:iface-id=lsp3
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
+          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
+          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' | \
+          grep -v NXST_FLOW |sort], [0], [dnl
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x3,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+])
+
+check ovn-nbctl lsp-del lsp3
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_CT_ZONE_LOOKUP | \
+          sed -e 's/cookie=0x.*, duration=.*, table/cookie=??, duration=??, table/' | \
+          sed -e 's/actions=load:0x.*->NXM_NX_REG13/actions=load:0x?->NXM_NX_REG13/' |
+          grep -v NXST_FLOW |sort], [0], [dnl
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=0 actions=resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x1,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+ cookie=??, duration=??, table=OFTABLE_CT_ZONE_LOOKUP, n_packets=0, n_bytes=0, idle_age=0, priority=100,reg14=0x2,metadata=0x1 actions=load:0x?->NXM_NX_REG13[[0..15]],load:0x2->NXM_NX_REG11[[]],load:0x1->NXM_NX_REG12[[]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
+])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index 32ab3b69f..47ada5c70 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -1130,5 +1130,6 @@  m4_define([OFTABLE_ECMP_NH_MAC], [76])
 m4_define([OFTABLE_ECMP_NH], [77])
 m4_define([OFTABLE_CHK_LB_AFFINITY], [78])
 m4_define([OFTABLE_MAC_CACHE_USE], [79])
+m4_define([OFTABLE_CT_ZONE_LOOKUP], [80])
 
 m4_define([OFTABLE_SAVE_INPORT_HEX], [m4_eval(OFTABLE_SAVE_INPORT, 16)])
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 3d944a3ae..37c9ec2a1 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -8599,7 +8599,7 @@  delete_switch_ports() {
     RUN_OVN_NBCTL()
 }
 
-m4_define([DUMP_FLOWS_SORTED], [sed -e 's/arp.tpa == 10.1.0.[[0-9]]\{1,3\}/arp.tpa == 10.1.0.??/;s/eth.dst == ..:..:..:..:..:../??:??:??:??:??:??/' | sort])
+m4_define([DUMP_FLOWS_SORTED], [sed -e 's/arp.tpa == 10.1.0.[[0-9]]\{1,3\}/arp.tpa == 10.1.0.??/;s/eth.dst == ..:..:..:..:..:../??:??:??:??:??:??/;s/eth.src == ..:..:..:..:..:../??:??:??:??:??:??/' | sort])
 
 # Build some rather heavy config and modify number of threads in the middle
 check ovn-nbctl ls-add ls1
@@ -8664,7 +8664,7 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8690,7 +8690,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8717,7 +8719,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8745,7 +8749,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
   table=??(ls_in_l2_lkup      ), priority=110  , match=(eth.dst == $svc_monitor_mac && (tcp || icmp || icmp6)), action=(handle_svc_check(inport);)
@@ -8772,7 +8778,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(inport == "sw0p1"), action=(reg0[[15]] = 1; next;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p2"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_l2_lkup      ), priority=0    , match=(1), action=(outport = get_fdb(eth.dst); next;)
@@ -8802,7 +8810,9 @@  ovn_strip_lflows ], [0], [dnl
   table=??(ls_in_apply_port_sec), priority=50   , match=(reg0[[15]] == 1), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(eth.src[[40]]), action=(drop;)
   table=??(ls_in_check_port_sec), priority=100  , match=(vlan.present), action=(drop;)
-  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=105  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && flags.tunnel_rx == 1), action=(drop;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:01 && outport == "sw0p1" && !is_chassis_resident("sw0p1") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
+  table=??(ls_in_check_port_sec), priority=110  , match=(((ip4 && icmp4.type == 3 && icmp4.code == 4) || (ip6 && icmp6.type == 2 && icmp6.code == 0)) && eth.src == 00:00:00:00:00:02 && outport == "sw0p2" && !is_chassis_resident("sw0p2") && flags.tunnel_rx == 1), action=(outport <-> inport; next;)
   table=??(ls_in_check_port_sec), priority=50   , match=(1), action=(reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "localnetport"), action=(set_queue(10); reg0[[15]] = check_in_port_sec(); next;)
   table=??(ls_in_check_port_sec), priority=70   , match=(inport == "sw0p1"), action=(reg0[[14]] = 1; next(pipeline=ingress, table=??);)
diff --git a/tests/ovn.at b/tests/ovn.at
index e81cd4f45..546f54225 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -34916,6 +34916,9 @@  m4_define([MULTIPLE_OVS_INT],
    check ovn-nbctl lsp-add ls lp
    if test X$1 != X; then
        check ovn-nbctl lsp-set-type lp $1
+       nb_flows_ref=0
+   else
+       nb_flows_ref=1
    fi
    check ovn-nbctl lsp-set-addresses lp "00:00:00:01:01:02 192.168.1.2"
 
@@ -34997,7 +35000,7 @@  m4_define([MULTIPLE_OVS_INT],
    check ovs-vsctl del-port br-int lpnew
    OVS_WAIT_UNTIL([
        nb_flows=`ovs-ofctl dump-flows br-int | grep $COOKIE | wc -l`
-       test "${nb_flows}" = 0
+       test "${nb_flows}" = $nb_flows_ref
    ])
 
    echo ======================================================