@@ -1907,6 +1907,71 @@ is_iface_in_int_bridge(const struct ovsrec_interface *iface,
return false;
}
+static bool
+is_ext_id_changed(const struct smap *a, const struct smap *b, const char *key)
+{
+ const char *value_a = smap_get(a, key);
+ const char *value_b = smap_get(b, key);
+ if ((value_a && !value_b)
+ || (!value_a && value_b)
+ || (value_a && value_b && strcmp(value_a, value_b))) {
+ return true;
+ }
+ return false;
+}
+
+/* Check if the change in 'iface_rec' is something we are interested in from
+ * port binding perspective. Return true if the change needs to be handled,
+ * otherwise return false.
+ *
+ * The 'iface_rec' must be change tracked, i.e. iterator from
+ * OVSREC_INTERFACE_TABLE_FOR_EACH_TRACKED. */
+static bool
+ovs_interface_change_need_handle(const struct ovsrec_interface *iface_rec,
+ struct shash *iface_table_external_ids_old)
+{
+ if (ovsrec_interface_is_updated(iface_rec,
+ OVSREC_INTERFACE_COL_NAME)) {
+ return true;
+ }
+ if (ovsrec_interface_is_updated(iface_rec,
+ OVSREC_INTERFACE_COL_OFPORT)) {
+ return true;
+ }
+ if (ovsrec_interface_is_updated(iface_rec,
+ OVSREC_INTERFACE_COL_TYPE)) {
+ return true;
+ }
+ if (ovsrec_interface_is_updated(iface_rec,
+ OVSREC_INTERFACE_COL_EXTERNAL_IDS)) {
+ /* Compare the external_ids that we are interested in with the old
+ * values:
+ * - iface-id
+ * - iface-id-ver
+ * - encap-ip
+ * For any other changes, such as ovn-installed, ovn-installed-ts, etc,
+ * we don't need to handle. */
+ struct smap *external_ids_old =
+ shash_find_data(iface_table_external_ids_old, iface_rec->name);
+ if (!external_ids_old) {
+ return true;
+ }
+ if (is_ext_id_changed(&iface_rec->external_ids, external_ids_old,
+ "iface-id")) {
+ return true;
+ }
+ if (is_ext_id_changed(&iface_rec->external_ids, external_ids_old,
+ "iface-id-ver")) {
+ return true;
+ }
+ if (is_ext_id_changed(&iface_rec->external_ids, external_ids_old,
+ "encap-ip")) {
+ return true;
+ }
+ }
+ return false;
+}
+
/* Returns true if the ovs interface changes were handled successfully,
* false otherwise.
*/
@@ -2018,6 +2083,11 @@ binding_handle_ovs_interface_changes(struct binding_ctx_in *b_ctx_in,
continue;
}
+ if (!ovs_interface_change_need_handle(
+ iface_rec, b_ctx_in->iface_table_external_ids_old)) {
+ continue;
+ }
+
const char *iface_id = smap_get(&iface_rec->external_ids, "iface-id");
int64_t ofport = iface_rec->n_ofport ? *iface_rec->ofport : 0;
if (iface_id && ofport > 0 &&
@@ -55,6 +55,7 @@ struct binding_ctx_in {
const struct ovsrec_bridge_table *bridge_table;
const struct ovsrec_open_vswitch_table *ovs_table;
const struct ovsrec_interface_table *iface_table;
+ struct shash *iface_table_external_ids_old;
};
/* Locally relevant port bindings, e.g., VIFs that might be bound locally,
@@ -1042,6 +1042,79 @@ en_ofctrl_is_connected_run(struct engine_node *node, void *data)
engine_set_node_state(node, EN_UNCHANGED);
}
+struct ed_type_ovs_interface_shadow {
+ struct ovsrec_interface_table *iface_table;
+ struct shash iface_table_external_ids_old;
+};
+
+static void *
+en_ovs_interface_shadow_init(struct engine_node *node OVS_UNUSED,
+ struct engine_arg *arg OVS_UNUSED)
+{
+ struct ed_type_ovs_interface_shadow *data = xzalloc(sizeof *data);
+ data->iface_table = NULL;
+ shash_init(&data->iface_table_external_ids_old);
+
+ return data;
+}
+
+static void
+iface_table_external_ids_old_destroy(struct shash *table_ext_ids)
+{
+ struct shash_node *node;
+ SHASH_FOR_EACH_SAFE (node, table_ext_ids) {
+ struct smap *ext_ids = node->data;
+ smap_destroy(ext_ids);
+ }
+ shash_destroy_free_data(table_ext_ids);
+}
+
+static void
+en_ovs_interface_shadow_cleanup(void *data_)
+{
+ struct ed_type_ovs_interface_shadow *data = data_;
+ iface_table_external_ids_old_destroy(&data->iface_table_external_ids_old);
+}
+
+static void
+en_ovs_interface_shadow_clear_tracked_data(void *data_)
+{
+ struct ed_type_ovs_interface_shadow *data = data_;
+ iface_table_external_ids_old_destroy(&data->iface_table_external_ids_old);
+ shash_init(&data->iface_table_external_ids_old);
+
+ if (!data->iface_table) {
+ return;
+ }
+
+ const struct ovsrec_interface *iface_rec;
+ OVSREC_INTERFACE_TABLE_FOR_EACH (iface_rec, data->iface_table) {
+ struct smap *external_ids = xmalloc(sizeof *external_ids);
+ smap_clone(external_ids, &iface_rec->external_ids);
+ shash_add(&data->iface_table_external_ids_old, iface_rec->name,
+ external_ids);
+ }
+}
+
+static void
+en_ovs_interface_shadow_run(struct engine_node *node, void *data_)
+{
+ struct ed_type_ovs_interface_shadow *data = data_;
+ struct ovsrec_interface_table *iface_table =
+ (struct ovsrec_interface_table *)EN_OVSDB_GET(
+ engine_get_input("OVS_interface", node));
+ data->iface_table = iface_table;
+ engine_set_node_state(node, EN_UPDATED);
+}
+
+static bool
+ovs_interface_shadow_ovs_interface_handler(struct engine_node *node,
+ void *data_)
+{
+ en_ovs_interface_shadow_run(node, data_);
+ return true;
+}
+
struct ed_type_runtime_data {
/* Contains "struct local_datapath" nodes. */
struct hmap local_datapaths;
@@ -1214,9 +1287,8 @@ init_binding_ctx(struct engine_node *node,
(struct ovsrec_port_table *)EN_OVSDB_GET(
engine_get_input("OVS_port", node));
- struct ovsrec_interface_table *iface_table =
- (struct ovsrec_interface_table *)EN_OVSDB_GET(
- engine_get_input("OVS_interface", node));
+ struct ed_type_ovs_interface_shadow *iface_shadow =
+ engine_get_input_data("ovs_interface_shadow", node);
struct ovsrec_qos_table *qos_table =
(struct ovsrec_qos_table *)EN_OVSDB_GET(
@@ -1249,7 +1321,9 @@ init_binding_ctx(struct engine_node *node,
b_ctx_in->sbrec_port_binding_by_datapath = sbrec_port_binding_by_datapath;
b_ctx_in->sbrec_port_binding_by_name = sbrec_port_binding_by_name;
b_ctx_in->port_table = port_table;
- b_ctx_in->iface_table = iface_table;
+ b_ctx_in->iface_table = iface_shadow->iface_table;
+ b_ctx_in->iface_table_external_ids_old =
+ &iface_shadow->iface_table_external_ids_old;
b_ctx_in->qos_table = qos_table;
b_ctx_in->port_binding_table = pb_table;
b_ctx_in->br_int = br_int;
@@ -1331,7 +1405,7 @@ en_runtime_data_run(struct engine_node *node, void *data)
}
static bool
-runtime_data_ovs_interface_handler(struct engine_node *node, void *data)
+runtime_data_ovs_interface_shadow_handler(struct engine_node *node, void *data)
{
struct ed_type_runtime_data *rt_data = data;
struct binding_ctx_in b_ctx_in;
@@ -3338,6 +3412,8 @@ main(int argc, char *argv[])
/* Define inc-proc-engine nodes. */
ENGINE_NODE_WITH_CLEAR_TRACK_DATA_IS_VALID(ct_zones, "ct_zones");
+ ENGINE_NODE_WITH_CLEAR_TRACK_DATA(ovs_interface_shadow,
+ "ovs_interface_shadow");
ENGINE_NODE_WITH_CLEAR_TRACK_DATA(runtime_data, "runtime_data");
ENGINE_NODE(non_vif_data, "non_vif_data");
ENGINE_NODE(mff_ovn_geneve, "mff_ovn_geneve");
@@ -3459,6 +3535,9 @@ main(int argc, char *argv[])
engine_add_input(&en_ct_zones, &en_runtime_data,
ct_zones_runtime_data_handler);
+ engine_add_input(&en_ovs_interface_shadow, &en_ovs_interface,
+ ovs_interface_shadow_ovs_interface_handler);
+
engine_add_input(&en_runtime_data, &en_ofctrl_is_connected, NULL);
engine_add_input(&en_runtime_data, &en_ovs_open_vswitch, NULL);
@@ -3480,8 +3559,8 @@ main(int argc, char *argv[])
*/
engine_add_input(&en_runtime_data, &en_ovs_port,
engine_noop_handler);
- engine_add_input(&en_runtime_data, &en_ovs_interface,
- runtime_data_ovs_interface_handler);
+ engine_add_input(&en_runtime_data, &en_ovs_interface_shadow,
+ runtime_data_ovs_interface_shadow_handler);
engine_add_input(&en_flow_output, &en_lflow_output,
flow_output_lflow_output_handler);
@@ -366,6 +366,23 @@ for i in 1 2; do
ovn-nbctl wait-until Logical_Switch_Port $lp 'up=true' &&
ovn-nbctl --wait=hv sync]
)
+
+ # Delete and recreate $lp to make it unbind and rebind multiple times, and
+ # ensure no recompute is triggered.
+ for k in $(seq 10); do
+ OVN_CONTROLLER_EXPECT_NO_HIT(
+ [hv$i hv$j], [lflow_run],
+ [as hv$i ovs-vsctl set Interface $vif external-ids:iface-id=xxxx &&
+ ovn-nbctl wait-until Logical_Switch_Port $lp 'up=false' &&
+ ovn-nbctl --wait=hv sync]
+ )
+ OVN_CONTROLLER_EXPECT_NO_HIT(
+ [hv$i hv$j], [lflow_run],
+ [as hv$i ovs-vsctl set Interface $vif external-ids:iface-id=$lp &&
+ ovn-nbctl wait-until Logical_Switch_Port $lp 'up=true' &&
+ ovn-nbctl --wait=hv sync]
+ )
+ done
done
for i in 1 2; do
During the process of claiming a VIF port on a chassis, there are multiple SB and local OVSDB updates by ovn-controller, which may trigger ovn-controller recompute, depending on the timing between the completion of SB and local OVSDB updates: After port claiming, ovn-controller sends an update to the local OVSDB to clean the Interface:external_ids:ovn-installed, and also sends an update to the SB DB to update the Port_Binding:chassis. If the local OVSDB update notification comes back before the SB update completes, ovn-controller's SB IDL is still "in transaction" and read-only, but the local Interface update triggers the I-P engine runtime_data node to handle the OVS_interface change, which considers the VIF to be claimed (again) potentially, which requires SB updates while it is not possible at the moment, and so the I-P handler returns failure and the engine aborts, which triggers recompute at the next I-P engine iteration when SB DB is writable. If we are lucky at the above step that the SB update completes before the local OVSDB update is received, ovn-controller would handle the change incrementally (and figures out that the SB Port_Binding chassis is already the current one thus wouldn't reclaim the VIF). But the next step after it completes flow computing for the newly bound VIF it sends another update to the local OVSDB to set the Interface:external_ids:ovn-installed and ovn-installed-ts, while at the same time it also sends an update to the SB DB to set the Port_Binding:up. These two parallel updates would lead to the same situation as the above step and potentially cause ovn-controll recompute. The recompute is very likely to be triggered in production because the SB DB is remote (on central nodes) and has heavier load, which requires longer time to complete the updates than the local OVSDB server. The problem is that the runtime_data's OVS_interface change handler couldn't handle the update of the Interface:external_ids:ovn-installed incrementally, because ovn-controller assumes it could require an SB update that is not possible at the moment. However, the assumption is not true. In fact it shouldn't even care about the change because this external_id:ovn-installed is written by ovn-controller itself and it is write-only (no need to read it as an input for any further processing). So, the idea to fix the problem is to ignore such changes that happen to the "write-only" fields only. OVSDB change tracking provides APIs to check if a specific column is updated, but the difficulty here is that the column external_ids is a k-v map which contains not only the data that ovn-controller can ignore, such as ovn-installed, ovn-installed-ts, but also the data ovn-controller cares about, such as iface-id, iface-id-ver and encap-ip. To solve this problem, we need to know what's changed exactly inside the external_ids map. This patch solves it by introducing a new I-P engine node ovs_interface_shadow to wrap the OVSDB input ovs_interface with an extra copy of the older version of the column external_ids, and replace the input OVS_interface of the runtime_data node. In the change handler of the runtime_data node we evaluates if the change needs to be handled before calling consider_iface_claim(), so in the above scenarios the changes don't need to be handled and won't trigger recompute any more. A test case is also updated to capture this (which is very likely, if not 100%, to fail without the fix). Signed-off-by: Han Zhou <hzhou@ovn.org> --- controller/binding.c | 70 ++++++++++++++++++++++++++++ controller/binding.h | 1 + controller/ovn-controller.c | 93 ++++++++++++++++++++++++++++++++++--- tests/ovn-performance.at | 17 +++++++ 4 files changed, 174 insertions(+), 7 deletions(-)