diff mbox series

[ovs-dev,6/6] Update multichassis physical flows on interface MTU update

Message ID 20230503011239.2100488-7-ihrachys@redhat.com
State New, archived
Headers show
Series Implement MTU Path Discovery for multichassis ports | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Ihar Hrachyshka May 3, 2023, 1:12 a.m. UTC
Make ICMP Path MTU Discovery flows in table=38 react to underlying
interface MTU update.

NOTE: ideally, OVN would support Logical_Port MTU, in which case we
wouldn't have to track OVSDB for interfaces, and we would also be able
to react to MTU changes regardless of interface location. This patch is
best effort and doesn't handle all scenarios. (E.g. a scenario where
MTUs are not changed consistently for all switch interfaces across all
chassis.)

Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
---
 controller/if-status.c      | 23 +++++++-----
 controller/if-status.h      |  3 ++
 controller/ovn-controller.c | 73 +++++++++++++++++++++++++++++++++++++
 controller/ovsport.c        |  9 +++++
 controller/ovsport.h        |  2 +
 controller/physical.c       |  1 -
 controller/physical.h       |  1 +
 tests/ovn.at                | 66 +++++++++++++++++++++++++++++++--
 8 files changed, 164 insertions(+), 14 deletions(-)
diff mbox series

Patch

diff --git a/controller/if-status.c b/controller/if-status.c
index e60156c4a..1cdd893ab 100644
--- a/controller/if-status.c
+++ b/controller/if-status.c
@@ -18,6 +18,7 @@ 
 #include "binding.h"
 #include "if-status.h"
 #include "ofctrl-seqno.h"
+#include "ovsport.h"
 #include "simap.h"
 
 #include "lib/hmapx.h"
@@ -500,15 +501,6 @@  ovs_iface_account_mem(const char *iface_id, bool erase)
     }
 }
 
-static uint16_t
-get_iface_mtu(const struct ovsrec_interface *iface)
-{
-    if (!iface || !iface->n_mtu || iface->mtu[0] <= 0) {
-        return 0;
-    }
-    return (uint16_t) iface->mtu[0];
-}
-
 uint16_t
 if_status_mgr_iface_get_mtu(const struct if_status_mgr *mgr,
                             const char *iface_id)
@@ -517,6 +509,19 @@  if_status_mgr_iface_get_mtu(const struct if_status_mgr *mgr,
     return iface ? iface->mtu : 0;
 }
 
+bool
+if_status_mgr_iface_set_mtu(const struct if_status_mgr *mgr,
+                            const char *iface_id,
+                            uint16_t mtu)
+{
+    struct ovs_iface *iface = shash_find_data(&mgr->ifaces, iface_id);
+    if (iface && iface->mtu != mtu) {
+        iface->mtu = mtu;
+        return true;
+    }
+    return false;
+}
+
 static struct ovs_iface *
 ovs_iface_create(struct if_status_mgr *mgr, const char *iface_id,
                  const struct ovsrec_interface *iface_rec,
diff --git a/controller/if-status.h b/controller/if-status.h
index 8186bdf08..b11d4cd61 100644
--- a/controller/if-status.h
+++ b/controller/if-status.h
@@ -47,6 +47,9 @@  bool if_status_mgr_iface_is_present(struct if_status_mgr *mgr,
                                     const char *iface_id);
 uint16_t if_status_mgr_iface_get_mtu(const struct if_status_mgr *mgr,
                                      const char *iface_id);
+bool if_status_mgr_iface_set_mtu(const struct if_status_mgr *mgr,
+                                 const char *iface_id,
+                                 uint16_t mtu);
 bool if_status_handle_claims(struct if_status_mgr *mgr,
                              struct local_binding_data *binding_data,
                              const struct sbrec_chassis *chassis_rec,
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 9359925fa..fb6091fae 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -60,6 +60,7 @@ 
 #include "lib/ovn-dirs.h"
 #include "lib/ovn-sb-idl.h"
 #include "lib/ovn-util.h"
+#include "ovsport.h"
 #include "patch.h"
 #include "vif-plug.h"
 #include "vif-plug-provider.h"
@@ -1056,6 +1057,7 @@  ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_name);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_bfd);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_bfd_status);
+    ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_mtu);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_type);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_options);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_ofport);
@@ -4046,6 +4048,9 @@  static void init_physical_ctx(struct engine_node *node,
     const struct ed_type_mff_ovn_geneve *ed_mff_ovn_geneve =
         engine_get_input_data("mff_ovn_geneve", node);
 
+    const struct ovsrec_interface_table *ovs_interface_table =
+        EN_OVSDB_GET(engine_get_input("OVS_interface", node));
+
     const struct ovsrec_open_vswitch_table *ovs_table =
         EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
     const struct ovsrec_bridge_table *bridge_table =
@@ -4070,6 +4075,7 @@  static void init_physical_ctx(struct engine_node *node,
     p_ctx->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
     p_ctx->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath;
     p_ctx->port_binding_table = port_binding_table;
+    p_ctx->ovs_interface_table = ovs_interface_table;
     p_ctx->mc_group_table = multicast_group_table;
     p_ctx->br_int = br_int;
     p_ctx->chassis_table = chassis_table;
@@ -4129,6 +4135,71 @@  en_pflow_output_run(struct engine_node *node, void *data)
     engine_set_node_state(node, EN_UPDATED);
 }
 
+static bool
+pflow_output_ovs_interface_handler(struct engine_node *node,
+                                   void *data)
+{
+    enum engine_node_state state = EN_UNCHANGED;
+
+    struct ed_type_pflow_output *pfo = data;
+    struct ed_type_runtime_data *rt_data =
+        engine_get_input_data("runtime_data", node);
+    struct ed_type_non_vif_data *non_vif_data =
+        engine_get_input_data("non_vif_data", node);
+
+    struct physical_ctx p_ctx;
+    init_physical_ctx(node, rt_data, non_vif_data, &p_ctx);
+
+    const struct ovsrec_interface *iface;
+    const struct ovsrec_interface_table *ovs_interface_table =
+        p_ctx.ovs_interface_table;
+    OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED (iface, ovs_interface_table) {
+        const char *iface_id = smap_get(&iface->external_ids, "iface-id");
+        if (!iface_id) {
+            continue;
+        }
+
+        uint16_t mtu = get_iface_mtu(iface);
+        if (!if_status_mgr_iface_set_mtu(p_ctx.if_mgr, iface_id, mtu)) {
+            continue;
+        }
+        const struct sbrec_port_binding *pb = lport_lookup_by_name(
+            p_ctx.sbrec_port_binding_by_name, iface_id);
+        if (!pb) {
+            continue;
+        }
+        if (pb->n_additional_chassis) {
+            /* Update flows for all ports in datapath. */
+            struct sbrec_port_binding *target =
+                sbrec_port_binding_index_init_row(
+                    p_ctx.sbrec_port_binding_by_datapath);
+            sbrec_port_binding_index_set_datapath(target, pb->datapath);
+
+            const struct sbrec_port_binding *binding;
+            SBREC_PORT_BINDING_FOR_EACH_EQUAL (
+                    binding, target, p_ctx.sbrec_port_binding_by_datapath) {
+                bool removed = sbrec_port_binding_is_deleted(binding);
+                if (!physical_handle_flows_for_lport(binding, removed, &p_ctx,
+                                                     &pfo->flow_table)) {
+                    return false;
+                }
+                state = EN_UPDATED;
+            }
+            sbrec_port_binding_index_destroy_row(target);
+        } else {
+            /* If any multichassis ports, update flows for the port. */
+            bool removed = sbrec_port_binding_is_deleted(pb);
+            if (!physical_handle_flows_for_lport(pb, removed, &p_ctx,
+                                                 &pfo->flow_table)) {
+                return false;
+            }
+            state = EN_UPDATED;
+        }
+    }
+    engine_set_node_state(node, state);
+    return true;
+}
+
 static bool
 pflow_output_sb_port_binding_handler(struct engine_node *node,
                                      void *data)
@@ -4661,6 +4732,8 @@  main(int argc, char *argv[])
     engine_add_input(&en_pflow_output, &en_sb_chassis,
                      pflow_lflow_output_sb_chassis_handler);
 
+    engine_add_input(&en_pflow_output, &en_ovs_interface,
+                     pflow_output_ovs_interface_handler);
     engine_add_input(&en_pflow_output, &en_sb_port_binding,
                      pflow_output_sb_port_binding_handler);
     engine_add_input(&en_pflow_output, &en_sb_multicast_group,
diff --git a/controller/ovsport.c b/controller/ovsport.c
index ec38c3fca..ebcb9cb6d 100644
--- a/controller/ovsport.c
+++ b/controller/ovsport.c
@@ -264,3 +264,12 @@  maintain_interface_smap_column(
         }
     }
 }
+
+uint16_t
+get_iface_mtu(const struct ovsrec_interface *iface)
+{
+    if (!iface || !iface->n_mtu || iface->mtu[0] <= 0) {
+        return 0;
+    }
+    return (uint16_t) iface->mtu[0];
+}
diff --git a/controller/ovsport.h b/controller/ovsport.h
index e355ff7ff..c40c1855a 100644
--- a/controller/ovsport.h
+++ b/controller/ovsport.h
@@ -57,4 +57,6 @@  const struct ovsrec_port * ovsport_lookup_by_interfaces(
 const struct ovsrec_port * ovsport_lookup_by_interface(
         struct ovsdb_idl_index *, struct ovsrec_interface *);
 
+uint16_t get_iface_mtu(const struct ovsrec_interface *);
+
 #endif /* lib/ovsport.h */
diff --git a/controller/physical.c b/controller/physical.c
index 1c1018616..e9ee3582a 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -92,7 +92,6 @@  physical_register_ovs_idl(struct ovsdb_idl *ovs_idl)
 
     ovsdb_idl_add_table(ovs_idl, &ovsrec_table_interface);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_name);
-    ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_mtu);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_ofport);
     ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_external_ids);
 }
diff --git a/controller/physical.h b/controller/physical.h
index 396bcb138..1f1ed55ef 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -52,6 +52,7 @@  struct physical_ctx {
     struct ovsdb_idl_index *sbrec_port_binding_by_name;
     struct ovsdb_idl_index *sbrec_port_binding_by_datapath;
     const struct sbrec_port_binding_table *port_binding_table;
+    const struct ovsrec_interface_table *ovs_interface_table;
     const struct sbrec_multicast_group_table *mc_group_table;
     const struct ovsrec_bridge *br_int;
     const struct sbrec_chassis_table *chassis_table;
diff --git a/tests/ovn.at b/tests/ovn.at
index 99ce3dd90..99a177c96 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -15270,7 +15270,7 @@  m4_define([MULTICHASSIS_PATH_MTU_DISCOVERY_TEST],
    done
 
    send_ip_packet() {
-       local inport=${1} hv=${2} eth_src=${3} eth_dst=${4} ipv4_src=${5} ipv4_dst=${6} data=${7} fail=${8}
+       local inport=${1} hv=${2} eth_src=${3} eth_dst=${4} ipv4_src=${5} ipv4_dst=${6} data=${7} fail=${8} mtu=${9:-$3}
        packet=$(fmt_pkt "
            Ether(dst='${eth_dst}', src='${eth_src}') /
            IP(src='${ipv4_src}', dst='${ipv4_dst}') /
@@ -15287,7 +15287,7 @@  m4_define([MULTICHASSIS_PATH_MTU_DISCOVERY_TEST],
          packet=$(fmt_pkt "
              Ether(dst='${eth_src}', src='${eth_dst}') /
              IP(src='${ipv4_dst}', dst='${ipv4_src}', ttl=255, flags=2, id=0) /
-             ICMP(type=3, code=4, nexthopmtu=$3) /
+             ICMP(type=3, code=4, nexthopmtu=${mtu}) /
              bytes.fromhex('${original_ip_frame:0:$((534 * 2))}')
          ")
        fi
@@ -15295,7 +15295,7 @@  m4_define([MULTICHASSIS_PATH_MTU_DISCOVERY_TEST],
    }
 
    send_ip6_packet() {
-       local inport=${1} hv=${2} eth_src=${3} eth_dst=${4} ipv6_src=${5} ipv6_dst=${6} data=${7} fail=${8}
+       local inport=${1} hv=${2} eth_src=${3} eth_dst=${4} ipv6_src=${5} ipv6_dst=${6} data=${7} fail=${8} mtu=${9:-$3}
        packet=$(fmt_pkt "
            Ether(dst='${eth_dst}', src='${eth_src}') /
            IPv6(src='${ipv6_src}', dst='${ipv6_dst}') /
@@ -15310,7 +15310,7 @@  m4_define([MULTICHASSIS_PATH_MTU_DISCOVERY_TEST],
          packet=$(fmt_pkt "
              Ether(dst='${eth_src}', src='${eth_dst}') /
              IPv6(src='${ipv6_dst}', dst='${ipv6_src}', hlim=255) /
-             ICMPv6PacketTooBig(mtu=$3) /
+             ICMPv6PacketTooBig(mtu=${mtu}) /
              bytes.fromhex('${original_ip_frame:0:$((1218 * 2))}')
          ")
        fi
@@ -15443,6 +15443,64 @@  m4_define([MULTICHASSIS_PATH_MTU_DISCOVERY_TEST],
    packet=$(send_ip6_packet multi1 1 $multi1_mac $multi2_mac $multi1_ip6 $multi2_ip6 $(payload $len) 1)
    echo $packet >> hv1/multi1.expected
 
+   check_pkts
+   reset_env
+
+   AS_BOX([MTU updates are honored in ICMP Path MTU calculation])
+
+   set_mtu() {
+       local hv=${1} iface=${2} new_mtu=${3}
+
+       iface_uuid=$(as ${hv} ovs-vsctl --bare --columns _uuid find Interface name=${iface})
+       check as ${hv} ovs-vsctl set interface ${iface_uuid} mtu_request=${new_mtu}
+   }
+
+   set_mtu_for_all_ports() {
+       for port in multi1 multi2 first; do
+           set_mtu hv1 ${port} ${1}
+       done
+       for port in multi1 multi2 second; do
+           set_mtu hv2 ${port} ${1}
+       done
+   }
+
+   initial_mtu=1500  # all interfaces are 1500 by default
+   new_mtu=1400
+   set_mtu_for_all_ports ${new_mtu}
+   mtu_diff=$((${initial_mtu} - ${new_mtu}))
+
+   len=3000
+   expected_ip_mtu=$(($3 - ${mtu_diff}))
+   packet=$(send_ip_packet first 1 $first_mac $multi1_mac $first_ip $multi1_ip $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/first.expected
+
+   packet=$(send_ip_packet second 2 $second_mac $multi1_mac $second_ip $multi1_ip $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv2/second.expected
+
+   packet=$(send_ip6_packet first 1 $first_mac $multi1_mac $first_ip6 $multi1_ip6 $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/first.expected
+
+   packet=$(send_ip6_packet second 2 $second_mac $multi1_mac $second_ip6 $multi1_ip6 $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv2/second.expected
+
+   packet=$(send_ip_packet multi1 1 $multi1_mac $first_mac $multi1_ip $first_ip $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/multi1.expected
+
+   packet=$(send_ip_packet multi1 1 $multi1_mac $second_mac $multi1_ip $second_ip $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/multi1.expected
+
+   packet=$(send_ip6_packet multi1 1 $multi1_mac $first_mac $multi1_ip6 $first_ip6 $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/multi1.expected
+
+   packet=$(send_ip6_packet multi1 1 $multi1_mac $second_mac $multi1_ip6 $second_ip6 $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/multi1.expected
+
+   packet=$(send_ip_packet multi1 1 $multi1_mac $multi2_mac $multi1_ip $multi2_ip $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/multi1.expected
+
+   packet=$(send_ip6_packet multi1 1 $multi1_mac $multi2_mac $multi1_ip6 $multi2_ip6 $(payload $len) 1 ${expected_ip_mtu})
+   echo $packet >> hv1/multi1.expected
+
    check_pkts
 
    OVN_CLEANUP([hv1],[hv2])