diff mbox series

[ovs-dev] ovn-controller: Allow two datapaths to monitor the same vrf.

Message ID 20260518214403.1223287-1-jtanenba@redhat.com
State Changes Requested
Headers show
Series [ovs-dev] ovn-controller: Allow two datapaths to monitor the same vrf. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Jacob Tanenbaum May 18, 2026, 9:44 p.m. UTC
Users should be able to configure two logical routers that
simultaneously interact with the same host routing table (vrf).

This is accomplished by maintaining an hmap while route_exchange_run()
is running that keeps track of all the advertised routes that we have
seen per table_id. Each loop over r_ctx_in->announce_routes is a
different datapath if we try to add advertised routes to an entry that
already has advertised routes then we know two different datapaths are
trying to write advertised routes to the vrf table.

Everytime we re_nl_sync_routes() routes that are not passed to it are
deleted as stale, so passing any advertised previously advertised route
on that table was a way to ensure that two routers can interact with a
vrf table.

Reported-at: https://redhat.atlassian.net/browse/FDP-3472
Reported-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Jacob Tanenbaum <jtanenba@redhat.com>

Comments

Mairtin O'Loingsigh May 20, 2026, 9:46 a.m. UTC | #1
On Mon, May 18, 2026 at 05:44:03PM -0400, Jacob Tanenbaum via dev wrote:
> Users should be able to configure two logical routers that
> simultaneously interact with the same host routing table (vrf).
> 
> This is accomplished by maintaining an hmap while route_exchange_run()
> is running that keeps track of all the advertised routes that we have
> seen per table_id. Each loop over r_ctx_in->announce_routes is a
> different datapath if we try to add advertised routes to an entry that
> already has advertised routes then we know two different datapaths are
> trying to write advertised routes to the vrf table.
> 
> Everytime we re_nl_sync_routes() routes that are not passed to it are
> deleted as stale, so passing any advertised previously advertised route
> on that table was a way to ensure that two routers can interact with a
> vrf table.
> 
> Reported-at: https://redhat.atlassian.net/browse/FDP-3472
> Reported-by: Dumitru Ceara <dceara@redhat.com>
> Signed-off-by: Jacob Tanenbaum <jtanenba@redhat.com>
> 
> diff --git a/controller/route-exchange.c b/controller/route-exchange.c
> index 82727f4e4..b36e08c53 100644
> --- a/controller/route-exchange.c
> +++ b/controller/route-exchange.c
> @@ -243,11 +243,18 @@ static int route_exchange_nl_status;
>          }                                       \
>      } while (0)
>  
> +struct advertised_routes_for_table_id_entry{
nit: add space before {
> +    struct hmap_node node;
> +
> +    uint32_t table_id;
> +    struct hmap routes;
> +};
> +
>  void
>  route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
>                     struct route_exchange_ctx_out *r_ctx_out)
>  {
> -    struct hmap table_ids = HMAP_INITIALIZER(&table_ids);
> +    struct hmap advertised_routes = HMAP_INITIALIZER(&advertised_routes);
>      struct sset old_maintained_vrfs = SSET_INITIALIZER(&old_maintained_vrfs);
>      sset_swap(&_maintained_vrfs, &old_maintained_vrfs);
>      struct hmap old_maintained_route_table =
> @@ -259,15 +266,52 @@ route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
>      const struct advertise_datapath_entry *ad;
>      HMAP_FOR_EACH (ad, node, r_ctx_in->announce_routes) {
>          uint32_t table_id = route_get_table_id(ad->db);
> -
> -        bool valid = TABLE_ID_VALID(table_id);
> -        if (!valid || !ovn_add_tnlid(&table_ids, table_id)) {
> +        if (!TABLE_ID_VALID(table_id)) {
>              VLOG_WARN_RL(&rl, "Unable to sync routes for datapath "UUID_FMT": "
> -                         "%s table id: %"PRIu32,
> -                         UUID_ARGS(&ad->db->header_.uuid),
> -                         !valid ? "invalid" : "duplicate", table_id);
> +                         "invalid table id: %"PRIu32,
> +                         UUID_ARGS(&ad->db->header_.uuid), table_id);
>              continue;
>          }
> +        struct advertised_routes_for_table_id_entry *entry;
> +        uint32_t hash = maintained_route_table_hash(table_id);
> +        HMAP_FOR_EACH_WITH_HASH (entry, node, hash, &advertised_routes) {
> +            if (entry->table_id == table_id) {
> +                if (!hmap_is_empty(&ad->routes) &&
> +                    !hmap_is_empty(&entry->routes)) {
> +                    VLOG_WARN_RL(&rl, "Multiple datapaths are distributing "
> +                                 "routes on vni table %"PRIu32"",
> +                                  table_id);
> +                }
> +                break;
> +
> +            }
> +        }
> +        /* In order to properly call re_nl_sync_routes() we need to
> +         * store any previously processed advirtised routes in order
> +         * to prevent those routes from being erroneously deleted as
> +         * stale routes.
> +         */
> +        if (entry == NULL) {
> +            entry = xmalloc(sizeof *entry);
> +            *entry = (struct advertised_routes_for_table_id_entry) {
> +                .table_id = table_id,
> +                .routes = HMAP_INITIALIZER(&entry->routes),
> +            };
> +            hmap_insert(&advertised_routes, &entry->node, hash);
> +        }
> +        struct advertise_route_entry *are;
> +        HMAP_FOR_EACH (are, node, &ad->routes) {
> +            struct advertise_route_entry *copy = xmalloc(sizeof(*copy));
> +            *copy = (struct advertise_route_entry) {
> +                .addr = are->addr,
> +                .plen = are->plen,
> +                .priority = are->priority,
> +                .nexthop = are->nexthop,
> +            };
> +            hmap_insert(&entry->routes,
> +                        &copy->node,
> +                        hmap_node_hash(&are->node));
> +        }
>  
>          if (ad->maintain_vrf) {
>              if (!sset_contains(&old_maintained_vrfs, ad->vrf_name)) {
> @@ -295,7 +339,7 @@ route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
>          struct vector received_routes =
>              VECTOR_EMPTY_INITIALIZER(struct re_nl_received_route_node);
>  
> -        error = re_nl_sync_routes(table_id, &ad->routes,
> +        error = re_nl_sync_routes(table_id, &entry->routes,
>                                    &received_routes, ad->db);
>          SET_ROUTE_EXCHANGE_NL_STATUS(error);
>  
> @@ -339,7 +383,16 @@ route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
>          sset_delete(&old_maintained_vrfs, SSET_NODE_FROM_NAME(vrf_name));
>      }
>      sset_destroy(&old_maintained_vrfs);
> -    ovn_destroy_tnlids(&table_ids);
> +    struct advertised_routes_for_table_id_entry *arte;
> +    HMAP_FOR_EACH_POP (arte, node, &advertised_routes) {
> +        struct advertise_route_entry *ade;
> +        HMAP_FOR_EACH_POP (ade, node, &arte->routes) {
> +            free(ade);
> +        }
> +        hmap_destroy(&arte->routes);
> +        free(arte);
> +    }
> +    hmap_destroy(&advertised_routes);
>  }
>  
>  void
> diff --git a/tests/system-ovn.at b/tests/system-ovn.at
> index 49aada46d..cce55a058 100644
> --- a/tests/system-ovn.at
> +++ b/tests/system-ovn.at
> @@ -20486,6 +20486,11 @@ check ovn-nbctl --wait=hv set Logical_Router lr-frr options:dynamic-routing-no-l
>  # Verify routes do not appear in SB database.
>  wait_row_count Learned_Route 0
>  
> +ovn-sbctl find Datapath_Binding
> +
> +ovn-sbctl find Learned_Route
> +check ovn-nbctl --wait=hv remove Logical_Router lr-tenant options dynamic-routing dynamic-routing-vrf-id
> +
>  check ovn-nbctl --wait=hv set Logical_Router lr-frr options:dynamic-routing-no-learning=false
>  
>  # Verify learned route appears in SB database
> @@ -20583,6 +20588,38 @@ ip_prefix           : "10.10.3.1"
>  ip_prefix           : "10.10.4.1"
>  ])
>  
> +# Verify that we can have one router read and another write from the same vrf table
> +#
> +# The router that advertises needs to have the gateway chassis set so remove the
> +# to redestribute routes from lr-frr
> +
> +check ovn-nbctl remove Logical_Router lr-frr options dynamic-routing-redistribute
> +check ovn-nbctl --wait=hv set Logical_Router lr-frr \
> +    options:dynamic-routing-vrf-id=$vni
> +
> +check ovn-nbctl --wait=hv set Logical_Router lr-tenant \
> +    options:dynamic-routing-vrf-id=$vni \
> +    options:dynamic-routing=true \
> +    options:dynamic-routing-redistribute=static \
> +    options:dynamic-routing-no-learning=true
> +
> +#verify that lr-frr has Learned_Routes but lr-tenant does not
> +wait_row_count Learned_Route 2
> +wait_row_count Advertised_Route 1
> +
> +OVN_ROUTE_EQUAL([vrf-$vni], [dnl
> +blackhole 10.10.1.1 proto ovn metric 1000
> +10.10.3.1 via 20.0.0.25 dev local-bgp-port proto zebra
> +10.10.4.1 via 20.0.0.25 dev local-bgp-port proto zebra
> +20.0.0.0/8 dev local-bgp-port proto kernel scope link src 20.0.0.4])
> +
> +OVS_WAIT_FOR_OUTPUT([ip route show table $vni type blackhole | wc -l], [0], [dnl
> +1
> +])
> +
> +check ovn-nbctl remove Logical_Router lr-tenant options dynamic-routing-vrf-id dynamic-routing
> +
> +
>  # Remove lrp-local-bgp-port port.
>  AS_BOX([$(date +%H:%M:%S.%03N) Remove lrp])
>  check ovn-nbctl --wait=hv lrp-del lrp-local-bgp-port
> -- 
> 2.54.0
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
Hi Jacob,

One concern I have with above is code is, if multiple datapaths share
the same VRF, each will sync the same table_id. Would it be better to sort
the datapaths by table_id first, then accumulate the routes for matching
table_ids. And finally, sync the final set of routes.

Regards,
Mairtin
diff mbox series

Patch

diff --git a/controller/route-exchange.c b/controller/route-exchange.c
index 82727f4e4..b36e08c53 100644
--- a/controller/route-exchange.c
+++ b/controller/route-exchange.c
@@ -243,11 +243,18 @@  static int route_exchange_nl_status;
         }                                       \
     } while (0)
 
+struct advertised_routes_for_table_id_entry{
+    struct hmap_node node;
+
+    uint32_t table_id;
+    struct hmap routes;
+};
+
 void
 route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
                    struct route_exchange_ctx_out *r_ctx_out)
 {
-    struct hmap table_ids = HMAP_INITIALIZER(&table_ids);
+    struct hmap advertised_routes = HMAP_INITIALIZER(&advertised_routes);
     struct sset old_maintained_vrfs = SSET_INITIALIZER(&old_maintained_vrfs);
     sset_swap(&_maintained_vrfs, &old_maintained_vrfs);
     struct hmap old_maintained_route_table =
@@ -259,15 +266,52 @@  route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
     const struct advertise_datapath_entry *ad;
     HMAP_FOR_EACH (ad, node, r_ctx_in->announce_routes) {
         uint32_t table_id = route_get_table_id(ad->db);
-
-        bool valid = TABLE_ID_VALID(table_id);
-        if (!valid || !ovn_add_tnlid(&table_ids, table_id)) {
+        if (!TABLE_ID_VALID(table_id)) {
             VLOG_WARN_RL(&rl, "Unable to sync routes for datapath "UUID_FMT": "
-                         "%s table id: %"PRIu32,
-                         UUID_ARGS(&ad->db->header_.uuid),
-                         !valid ? "invalid" : "duplicate", table_id);
+                         "invalid table id: %"PRIu32,
+                         UUID_ARGS(&ad->db->header_.uuid), table_id);
             continue;
         }
+        struct advertised_routes_for_table_id_entry *entry;
+        uint32_t hash = maintained_route_table_hash(table_id);
+        HMAP_FOR_EACH_WITH_HASH (entry, node, hash, &advertised_routes) {
+            if (entry->table_id == table_id) {
+                if (!hmap_is_empty(&ad->routes) &&
+                    !hmap_is_empty(&entry->routes)) {
+                    VLOG_WARN_RL(&rl, "Multiple datapaths are distributing "
+                                 "routes on vni table %"PRIu32"",
+                                  table_id);
+                }
+                break;
+
+            }
+        }
+        /* In order to properly call re_nl_sync_routes() we need to
+         * store any previously processed advirtised routes in order
+         * to prevent those routes from being erroneously deleted as
+         * stale routes.
+         */
+        if (entry == NULL) {
+            entry = xmalloc(sizeof *entry);
+            *entry = (struct advertised_routes_for_table_id_entry) {
+                .table_id = table_id,
+                .routes = HMAP_INITIALIZER(&entry->routes),
+            };
+            hmap_insert(&advertised_routes, &entry->node, hash);
+        }
+        struct advertise_route_entry *are;
+        HMAP_FOR_EACH (are, node, &ad->routes) {
+            struct advertise_route_entry *copy = xmalloc(sizeof(*copy));
+            *copy = (struct advertise_route_entry) {
+                .addr = are->addr,
+                .plen = are->plen,
+                .priority = are->priority,
+                .nexthop = are->nexthop,
+            };
+            hmap_insert(&entry->routes,
+                        &copy->node,
+                        hmap_node_hash(&are->node));
+        }
 
         if (ad->maintain_vrf) {
             if (!sset_contains(&old_maintained_vrfs, ad->vrf_name)) {
@@ -295,7 +339,7 @@  route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
         struct vector received_routes =
             VECTOR_EMPTY_INITIALIZER(struct re_nl_received_route_node);
 
-        error = re_nl_sync_routes(table_id, &ad->routes,
+        error = re_nl_sync_routes(table_id, &entry->routes,
                                   &received_routes, ad->db);
         SET_ROUTE_EXCHANGE_NL_STATUS(error);
 
@@ -339,7 +383,16 @@  route_exchange_run(const struct route_exchange_ctx_in *r_ctx_in,
         sset_delete(&old_maintained_vrfs, SSET_NODE_FROM_NAME(vrf_name));
     }
     sset_destroy(&old_maintained_vrfs);
-    ovn_destroy_tnlids(&table_ids);
+    struct advertised_routes_for_table_id_entry *arte;
+    HMAP_FOR_EACH_POP (arte, node, &advertised_routes) {
+        struct advertise_route_entry *ade;
+        HMAP_FOR_EACH_POP (ade, node, &arte->routes) {
+            free(ade);
+        }
+        hmap_destroy(&arte->routes);
+        free(arte);
+    }
+    hmap_destroy(&advertised_routes);
 }
 
 void
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 49aada46d..cce55a058 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -20486,6 +20486,11 @@  check ovn-nbctl --wait=hv set Logical_Router lr-frr options:dynamic-routing-no-l
 # Verify routes do not appear in SB database.
 wait_row_count Learned_Route 0
 
+ovn-sbctl find Datapath_Binding
+
+ovn-sbctl find Learned_Route
+check ovn-nbctl --wait=hv remove Logical_Router lr-tenant options dynamic-routing dynamic-routing-vrf-id
+
 check ovn-nbctl --wait=hv set Logical_Router lr-frr options:dynamic-routing-no-learning=false
 
 # Verify learned route appears in SB database
@@ -20583,6 +20588,38 @@  ip_prefix           : "10.10.3.1"
 ip_prefix           : "10.10.4.1"
 ])
 
+# Verify that we can have one router read and another write from the same vrf table
+#
+# The router that advertises needs to have the gateway chassis set so remove the
+# to redestribute routes from lr-frr
+
+check ovn-nbctl remove Logical_Router lr-frr options dynamic-routing-redistribute
+check ovn-nbctl --wait=hv set Logical_Router lr-frr \
+    options:dynamic-routing-vrf-id=$vni
+
+check ovn-nbctl --wait=hv set Logical_Router lr-tenant \
+    options:dynamic-routing-vrf-id=$vni \
+    options:dynamic-routing=true \
+    options:dynamic-routing-redistribute=static \
+    options:dynamic-routing-no-learning=true
+
+#verify that lr-frr has Learned_Routes but lr-tenant does not
+wait_row_count Learned_Route 2
+wait_row_count Advertised_Route 1
+
+OVN_ROUTE_EQUAL([vrf-$vni], [dnl
+blackhole 10.10.1.1 proto ovn metric 1000
+10.10.3.1 via 20.0.0.25 dev local-bgp-port proto zebra
+10.10.4.1 via 20.0.0.25 dev local-bgp-port proto zebra
+20.0.0.0/8 dev local-bgp-port proto kernel scope link src 20.0.0.4])
+
+OVS_WAIT_FOR_OUTPUT([ip route show table $vni type blackhole | wc -l], [0], [dnl
+1
+])
+
+check ovn-nbctl remove Logical_Router lr-tenant options dynamic-routing-vrf-id dynamic-routing
+
+
 # Remove lrp-local-bgp-port port.
 AS_BOX([$(date +%H:%M:%S.%03N) Remove lrp])
 check ovn-nbctl --wait=hv lrp-del lrp-local-bgp-port