diff mbox series

[ovs-dev,2/2,branch-24.03] northd, controller: Handle tunnel_key change consistently.

Message ID 20240509073543.563418-2-amusil@redhat.com
State Accepted
Headers show
Series [ovs-dev,1/2,branch-24.03] tests: Add macro for checking flows after recompute. | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Ales Musil May 9, 2024, 7:35 a.m. UTC
Currently the tunnel_key change for either LS/LR/LSP/LRP wasn't
consistent. That would lead to a situations when some old would still
be present, breaking the connection especially for already existing
FDBs and MAC bindings.

Make sure the FDB entries are up to date by removing them from DB
when there is a tunnel_key change as those entries have only tunnel_key
refrences (dp_key, port_key).

MAC bindings have references to the datapath and port name, instead of
removing those entries do recompute in the controller when we detect
tunnel_key change. This can be costly at scale, however the tunnel_key
is not expected to change constantly, in most cases it shouldn't change
at all.

Fixes: b337750e45be ("northd: Incremental processing of VIF changes in 'northd' node.")
Fixes: 425f699e2b20 ("controller: fixed potential segfault when changing tunnel_key and deleting ls.")
Reported-at: https://issues.redhat.com/browse/FDP-393
Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
(cherry picked from commit ddf051cbc6af24c303bf88970750e5c5fe285400)
---
 controller/binding.c        | 13 ++++++++--
 controller/ovn-controller.c | 27 +++++++------------
 northd/northd.c             |  7 +++++
 tests/ovn.at                | 52 +++++++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 20 deletions(-)

Comments

Numan Siddique May 9, 2024, 9:30 p.m. UTC | #1
On Thu, May 9, 2024 at 3:36 AM Ales Musil <amusil@redhat.com> wrote:
>
> Currently the tunnel_key change for either LS/LR/LSP/LRP wasn't
> consistent. That would lead to a situations when some old would still
> be present, breaking the connection especially for already existing
> FDBs and MAC bindings.
>
> Make sure the FDB entries are up to date by removing them from DB
> when there is a tunnel_key change as those entries have only tunnel_key
> refrences (dp_key, port_key).
>
> MAC bindings have references to the datapath and port name, instead of
> removing those entries do recompute in the controller when we detect
> tunnel_key change. This can be costly at scale, however the tunnel_key
> is not expected to change constantly, in most cases it shouldn't change
> at all.
>
> Fixes: b337750e45be ("northd: Incremental processing of VIF changes in 'northd' node.")
> Fixes: 425f699e2b20 ("controller: fixed potential segfault when changing tunnel_key and deleting ls.")
> Reported-at: https://issues.redhat.com/browse/FDP-393
> Acked-by: Mark Michelson <mmichels@redhat.com>
> Signed-off-by: Ales Musil <amusil@redhat.com>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> (cherry picked from commit ddf051cbc6af24c303bf88970750e5c5fe285400)

Thanks.  Applied both the patches to branch-24.03.

Numan

> ---
>  controller/binding.c        | 13 ++++++++--
>  controller/ovn-controller.c | 27 +++++++------------
>  northd/northd.c             |  7 +++++
>  tests/ovn.at                | 52 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 79 insertions(+), 20 deletions(-)
>
> diff --git a/controller/binding.c b/controller/binding.c
> index 8ac2ce3e2..0712d7030 100644
> --- a/controller/binding.c
> +++ b/controller/binding.c
> @@ -3126,8 +3126,17 @@ delete_done:
>              update_ld_peers(pb, b_ctx_out->local_datapaths);
>          }
>
> -        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb);
> -        if (!handled) {
> +        if (!handle_updated_port(b_ctx_in, b_ctx_out, pb)) {
> +            handled = false;
> +            break;
> +        }
> +
> +        if (!sbrec_port_binding_is_new(pb) &&
> +            sbrec_port_binding_is_updated(pb,
> +                                          SBREC_PORT_BINDING_COL_TUNNEL_KEY) &&
> +            get_local_datapath(b_ctx_out->local_datapaths,
> +                               pb->datapath->tunnel_key)) {
> +            handled = false;
>              break;
>          }
>      }
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index a40712e53..113d3e05c 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -1893,7 +1893,6 @@ runtime_data_sb_datapath_binding_handler(struct engine_node *node OVS_UNUSED,
>              engine_get_input("SB_datapath_binding", node));
>      const struct sbrec_datapath_binding *dp;
>      struct ed_type_runtime_data *rt_data = data;
> -    struct local_datapath *ld;
>
>      SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
>          if (sbrec_datapath_binding_is_deleted(dp)) {
> @@ -1901,27 +1900,19 @@ runtime_data_sb_datapath_binding_handler(struct engine_node *node OVS_UNUSED,
>                                     dp->tunnel_key)) {
>                  return false;
>              }
> +
> +        }
> +
> +        if (sbrec_datapath_binding_is_updated(
> +                dp, SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY) &&
> +            !sbrec_datapath_binding_is_new(dp)) {
>              /* If the tunnel key got updated, get_local_datapath will not find
>               * the ld. Use get_local_datapath_no_hash which does not
>               * rely on the hash.
>               */
> -            if (sbrec_datapath_binding_is_updated(
> -                    dp, SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY)) {
> -                if (get_local_datapath_no_hash(&rt_data->local_datapaths,
> -                                               dp->tunnel_key)) {
> -                    return false;
> -                }
> -            }
> -        } else if (sbrec_datapath_binding_is_updated(
> -                        dp, SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY)
> -                   && !sbrec_datapath_binding_is_new(dp)) {
> -            /* If the tunnel key is updated, remove the entry (with a wrong
> -             * hash) from the map. It will be (properly) added back later.
> -             */
> -            if ((ld = get_local_datapath_no_hash(&rt_data->local_datapaths,
> -                                                 dp->tunnel_key))) {
> -                hmap_remove(&rt_data->local_datapaths, &ld->hmap_node);
> -                local_datapath_destroy(ld);
> +            if (get_local_datapath_no_hash(&rt_data->local_datapaths,
> +                                           dp->tunnel_key)) {
> +                return false;
>              }
>          }
>      }
> diff --git a/northd/northd.c b/northd/northd.c
> index 8f20c4be3..a4bd3798b 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -4520,6 +4520,8 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                  op->visited = true;
>                  continue;
>              }
> +
> +            uint32_t old_tunnel_key = op->tunnel_key;
>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>                                  new_nbsp, NULL,
>                                  od, sb, ni->sbrec_mirror_table,
> @@ -4529,6 +4531,11 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
>                  goto fail;
>              }
>              add_op_to_northd_tracked_ports(&trk_lsps->updated, op);
> +
> +            if (old_tunnel_key != op->tunnel_key) {
> +                delete_fdb_entry(ni->sbrec_fdb_by_dp_and_port, od->tunnel_key,
> +                                 old_tunnel_key);
> +            }
>          }
>          op->visited = true;
>      }
> diff --git a/tests/ovn.at b/tests/ovn.at
> index c36fadb90..a64e09bb1 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -37383,6 +37383,8 @@ sim_add hv1
>  as hv1
>  check ovs-vsctl add-br br-phys
>  ovn_attach n1 br-phys 192.168.0.11
> +ovs-vsctl -- add-port br-int vif1 -- \
> +    set interface vif1 external-ids:iface-id=lsp1
>
>  check ovn-nbctl --wait=hv ls-add ls \
>            -- lsp-add ls lsp1 \
> @@ -37395,6 +37397,56 @@ check ovn-nbctl --wait=hv ls-add ls \
>                 addresses=router \
>            -- lrp-set-gateway-chassis lr-ls hv1
>
> +dp_uuid=$(fetch_column datapath _uuid external_ids:name=lr)
> +ovn-sbctl create MAC_Binding ip=192.168.1.10 datapath=$dp_uuid logical_port=lr-ls mac='"00:00:00:00:00:01"'
> +
> +OVN_POPULATE_ARP
> +wait_for_ports_up
> +check ovn-nbctl --wait=hv sync
> +
> +create_fdb() {
> +    ls_key=$(fetch_column datapath tunnel_key external_ids:name=ls)
> +    lsp_key=$(fetch_column port_binding tunnel_key logical_port=lsp1)
> +
> +    ovn-sbctl create FDB mac='"00:00:00:00:00:01"' dp_key=$ls_key port_key=$lsp_key
> +}
> +
> +AS_BOX([Logical switch tunnel_key change])
> +create_fdb
> +
> +check ovn-nbctl --wait=hv set Logical_Switch ls other_config:requested-tnl-key=10
> +ovn-sbctl list datapath
> +CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
> +
> +check_row_count FDB 0 mac='"00:00:00:00:00:01"'
> +
> +AS_BOX([Logical switch port tunnel_key change])
> +create_fdb
> +
> +check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 options:requested-tnl-key=10
> +CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
> +
> +check_row_count FDB 0 mac='"00:00:00:00:00:01"'
> +
> +AS_BOX([Logical router tunnel_key change])
> +check ovn-nbctl --wait=hv set Logical_Router lr options:requested-tnl-key=20
> +CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
> +
> +check_row_count Mac_Binding 1 ip=192.168.1.10
> +AT_CHECK([ovs-ofctl dump-flows br-int table=67 | grep -c metadata=0x14], [0], [dnl
> +1
> +])
> +
> +AS_BOX([Logical router port tunnel_key change])
> +check ovn-nbctl --wait=hv set Logical_Router_Port lr-ls options:requested-tnl-key=20
> +CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
> +
> +check_row_count Mac_Binding 1 ip=192.168.1.10
> +AT_CHECK([ovs-ofctl dump-flows br-int table=67 | grep -c reg14=0x14], [0], [dnl
> +1
> +])
> +
> +AS_BOX([Logical switch tunnel_key change, potential segfault])
>  sleep_controller hv1
>
>  check ovn-nbctl --wait=sb set Logical_Switch ls other_config:requested-tnl-key=1000
> --
> 2.44.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/binding.c b/controller/binding.c
index 8ac2ce3e2..0712d7030 100644
--- a/controller/binding.c
+++ b/controller/binding.c
@@ -3126,8 +3126,17 @@  delete_done:
             update_ld_peers(pb, b_ctx_out->local_datapaths);
         }
 
-        handled = handle_updated_port(b_ctx_in, b_ctx_out, pb);
-        if (!handled) {
+        if (!handle_updated_port(b_ctx_in, b_ctx_out, pb)) {
+            handled = false;
+            break;
+        }
+
+        if (!sbrec_port_binding_is_new(pb) &&
+            sbrec_port_binding_is_updated(pb,
+                                          SBREC_PORT_BINDING_COL_TUNNEL_KEY) &&
+            get_local_datapath(b_ctx_out->local_datapaths,
+                               pb->datapath->tunnel_key)) {
+            handled = false;
             break;
         }
     }
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index a40712e53..113d3e05c 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -1893,7 +1893,6 @@  runtime_data_sb_datapath_binding_handler(struct engine_node *node OVS_UNUSED,
             engine_get_input("SB_datapath_binding", node));
     const struct sbrec_datapath_binding *dp;
     struct ed_type_runtime_data *rt_data = data;
-    struct local_datapath *ld;
 
     SBREC_DATAPATH_BINDING_TABLE_FOR_EACH_TRACKED (dp, dp_table) {
         if (sbrec_datapath_binding_is_deleted(dp)) {
@@ -1901,27 +1900,19 @@  runtime_data_sb_datapath_binding_handler(struct engine_node *node OVS_UNUSED,
                                    dp->tunnel_key)) {
                 return false;
             }
+
+        }
+
+        if (sbrec_datapath_binding_is_updated(
+                dp, SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY) &&
+            !sbrec_datapath_binding_is_new(dp)) {
             /* If the tunnel key got updated, get_local_datapath will not find
              * the ld. Use get_local_datapath_no_hash which does not
              * rely on the hash.
              */
-            if (sbrec_datapath_binding_is_updated(
-                    dp, SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY)) {
-                if (get_local_datapath_no_hash(&rt_data->local_datapaths,
-                                               dp->tunnel_key)) {
-                    return false;
-                }
-            }
-        } else if (sbrec_datapath_binding_is_updated(
-                        dp, SBREC_DATAPATH_BINDING_COL_TUNNEL_KEY)
-                   && !sbrec_datapath_binding_is_new(dp)) {
-            /* If the tunnel key is updated, remove the entry (with a wrong
-             * hash) from the map. It will be (properly) added back later.
-             */
-            if ((ld = get_local_datapath_no_hash(&rt_data->local_datapaths,
-                                                 dp->tunnel_key))) {
-                hmap_remove(&rt_data->local_datapaths, &ld->hmap_node);
-                local_datapath_destroy(ld);
+            if (get_local_datapath_no_hash(&rt_data->local_datapaths,
+                                           dp->tunnel_key)) {
+                return false;
             }
         }
     }
diff --git a/northd/northd.c b/northd/northd.c
index 8f20c4be3..a4bd3798b 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -4520,6 +4520,8 @@  ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
                 op->visited = true;
                 continue;
             }
+
+            uint32_t old_tunnel_key = op->tunnel_key;
             if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
                                 new_nbsp, NULL,
                                 od, sb, ni->sbrec_mirror_table,
@@ -4529,6 +4531,11 @@  ls_handle_lsp_changes(struct ovsdb_idl_txn *ovnsb_idl_txn,
                 goto fail;
             }
             add_op_to_northd_tracked_ports(&trk_lsps->updated, op);
+
+            if (old_tunnel_key != op->tunnel_key) {
+                delete_fdb_entry(ni->sbrec_fdb_by_dp_and_port, od->tunnel_key,
+                                 old_tunnel_key);
+            }
         }
         op->visited = true;
     }
diff --git a/tests/ovn.at b/tests/ovn.at
index c36fadb90..a64e09bb1 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -37383,6 +37383,8 @@  sim_add hv1
 as hv1
 check ovs-vsctl add-br br-phys
 ovn_attach n1 br-phys 192.168.0.11
+ovs-vsctl -- add-port br-int vif1 -- \
+    set interface vif1 external-ids:iface-id=lsp1
 
 check ovn-nbctl --wait=hv ls-add ls \
           -- lsp-add ls lsp1 \
@@ -37395,6 +37397,56 @@  check ovn-nbctl --wait=hv ls-add ls \
                addresses=router \
           -- lrp-set-gateway-chassis lr-ls hv1
 
+dp_uuid=$(fetch_column datapath _uuid external_ids:name=lr)
+ovn-sbctl create MAC_Binding ip=192.168.1.10 datapath=$dp_uuid logical_port=lr-ls mac='"00:00:00:00:00:01"'
+
+OVN_POPULATE_ARP
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+create_fdb() {
+    ls_key=$(fetch_column datapath tunnel_key external_ids:name=ls)
+    lsp_key=$(fetch_column port_binding tunnel_key logical_port=lsp1)
+
+    ovn-sbctl create FDB mac='"00:00:00:00:00:01"' dp_key=$ls_key port_key=$lsp_key
+}
+
+AS_BOX([Logical switch tunnel_key change])
+create_fdb
+
+check ovn-nbctl --wait=hv set Logical_Switch ls other_config:requested-tnl-key=10
+ovn-sbctl list datapath
+CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
+
+check_row_count FDB 0 mac='"00:00:00:00:00:01"'
+
+AS_BOX([Logical switch port tunnel_key change])
+create_fdb
+
+check ovn-nbctl --wait=hv set Logical_Switch_Port lsp1 options:requested-tnl-key=10
+CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
+
+check_row_count FDB 0 mac='"00:00:00:00:00:01"'
+
+AS_BOX([Logical router tunnel_key change])
+check ovn-nbctl --wait=hv set Logical_Router lr options:requested-tnl-key=20
+CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
+
+check_row_count Mac_Binding 1 ip=192.168.1.10
+AT_CHECK([ovs-ofctl dump-flows br-int table=67 | grep -c metadata=0x14], [0], [dnl
+1
+])
+
+AS_BOX([Logical router port tunnel_key change])
+check ovn-nbctl --wait=hv set Logical_Router_Port lr-ls options:requested-tnl-key=20
+CHECK_FLOWS_AFTER_RECOMPUTE([hv1], [hv1])
+
+check_row_count Mac_Binding 1 ip=192.168.1.10
+AT_CHECK([ovs-ofctl dump-flows br-int table=67 | grep -c reg14=0x14], [0], [dnl
+1
+])
+
+AS_BOX([Logical switch tunnel_key change, potential segfault])
 sleep_controller hv1
 
 check ovn-nbctl --wait=sb set Logical_Switch ls other_config:requested-tnl-key=1000