diff mbox series

[ovs-dev,v3,1/3] northd: Introduce ECMP_Nexthop table in SB db.

Message ID 2cd1ba01c63b1dcfcdb0bdaba897db53b8e50685.1717698646.git.lorenzo.bianconi@redhat.com
State Changes Requested
Headers show
Series Introduce ECMP_nexthop monitor in ovn-controller | expand

Checks

Context Check Description
ovsrobot/apply-robot fail apply and check: fail

Commit Message

Lorenzo Bianconi June 6, 2024, 6:34 p.m. UTC
Introduce ECMP_Nexthop table in the SB db in order to track active
ecmp-symmetric-reply connections and flush stale ones.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 northd/en-northd.c       |  33 +++++++++++
 northd/en-northd.h       |   4 ++
 northd/inc-proc-northd.c |   7 ++-
 northd/northd.c          | 117 +++++++++++++++++++++++++++++++++++++++
 northd/northd.h          |  10 ++++
 ovn-sb.ovsschema         |  16 +++++-
 ovn-sb.xml               |  31 +++++++++++
 tests/ovn-northd.at      |   4 ++
 8 files changed, 220 insertions(+), 2 deletions(-)

Comments

Ales Musil June 24, 2024, 8:34 a.m. UTC | #1
On Thu, Jun 6, 2024 at 8:34 PM Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
wrote:

> Introduce ECMP_Nexthop table in the SB db in order to track active
> ecmp-symmetric-reply connections and flush stale ones.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>

Hi Lorenzo,

thank you for the patch. I have a couple of comments see below.


>  northd/en-northd.c       |  33 +++++++++++
>  northd/en-northd.h       |   4 ++
>  northd/inc-proc-northd.c |   7 ++-
>  northd/northd.c          | 117 +++++++++++++++++++++++++++++++++++++++
>  northd/northd.h          |  10 ++++
>  ovn-sb.ovsschema         |  16 +++++-
>  ovn-sb.xml               |  31 +++++++++++
>  tests/ovn-northd.at      |   4 ++
>  8 files changed, 220 insertions(+), 2 deletions(-)
>
> diff --git a/northd/en-northd.c b/northd/en-northd.c
> index a4de71ba1..a2823ab2b 100644
> --- a/northd/en-northd.c
> +++ b/northd/en-northd.c
> @@ -380,6 +380,23 @@ en_bfd_run(struct engine_node *node, void *data)
>      engine_set_node_state(node, EN_UPDATED);
>  }
>
> +void
> +en_ecmp_nexthop_run(struct engine_node *node, void *data)
> +{
> +    const struct engine_context *eng_ctx = engine_get_context();
> +    struct static_routes_data *static_routes_data =
> +        engine_get_input_data("static_routes", node);
> +    struct ecmp_nexthop_data *enh_data = data;
> +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> +
> +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> +                             &static_routes_data->parsed_routes,
> +                             &enh_data->nexthops,
> +                             sbrec_ecmp_nexthop_table);
> +    engine_set_node_state(node, EN_UPDATED);
> +}
> +
>  void
>  *en_northd_init(struct engine_node *node OVS_UNUSED,
>                  struct engine_arg *arg OVS_UNUSED)
> @@ -421,6 +438,16 @@ void
>      return data;
>  }
>
> +void
> +*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> +                      struct engine_arg *arg OVS_UNUSED)
> +{
> +    struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
> +
> +    ecmp_nexthop_init(data);
> +    return data;
> +}
> +
>  void
>  en_northd_cleanup(void *data)
>  {
> @@ -451,3 +478,9 @@ en_bfd_cleanup(void *data)
>  {
>      bfd_destroy(data);
>  }
> +
> +void
> +en_ecmp_nexthop_cleanup(void *data)
> +{
> +    ecmp_nexthop_destroy(data);
> +}
> diff --git a/northd/en-northd.h b/northd/en-northd.h
> index 424209c2f..c6d520f71 100644
> --- a/northd/en-northd.h
> +++ b/northd/en-northd.h
> @@ -34,5 +34,9 @@ void *en_bfd_init(struct engine_node *node OVS_UNUSED,
>  void en_bfd_cleanup(void *data);
>  bool bfd_change_handler(struct engine_node *node, void *data);
>  void en_bfd_run(struct engine_node *node, void *data);
> +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> +                           struct engine_arg *arg OVS_UNUSED);
> +void en_ecmp_nexthop_cleanup(void *data);
>
>  #endif /* EN_NORTHD_H */
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index d907da14d..c4e5b9bf6 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -103,7 +103,8 @@ static unixctl_cb_func chassis_features_list;
>      SB_NODE(fdb, "fdb") \
>      SB_NODE(static_mac_binding, "static_mac_binding") \
>      SB_NODE(chassis_template_var, "chassis_template_var") \
> -    SB_NODE(logical_dp_group, "logical_dp_group")
> +    SB_NODE(logical_dp_group, "logical_dp_group") \
> +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
>
>  enum sb_engine_node {
>  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> @@ -160,6 +161,7 @@ static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(ls_stateful,
> "ls_stateful");
>  static ENGINE_NODE(route_policies, "route_policies");
>  static ENGINE_NODE(static_routes, "static_routes");
>  static ENGINE_NODE(bfd, "bfd");
> +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
>
>  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>                            struct ovsdb_idl_loop *sb)
> @@ -261,6 +263,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_static_routes,
>                       &en_nb_logical_router_static_route, NULL);
>
> +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> +
>      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
>      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
>      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> diff --git a/northd/northd.c b/northd/northd.c
> index 2eb5f2be8..efe1e3f46 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -10039,6 +10039,105 @@ build_bfd_table(
>      return ret;
>  }
>
> +struct ecmp_nexthop_entry {
> +    struct hmap_node hmap_node;
> +
> +    char *nexthop;
> +    int id;
> +    bool stale;
> +};
>

I feel like simap would be enough in this case, basically anything that is
in the DB but not in the simap is stale.


> +
> +static struct ecmp_nexthop_entry *
> +ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop, size_t
> hash)
> +{
> +    struct ecmp_nexthop_entry *e;
> +
> +    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
> +        if (!strcmp(e->nexthop, nexthop)) {
> +            return e;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +#define NEXTHOP_IDS_LEN        65535
> +bool
> +build_ecmp_nexthop_table(
> +        struct ovsdb_idl_txn *ovnsb_txn,
> +        struct hmap *routes,
> +        struct hmap *nexthops,
> +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> +{
> +    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> +    bool ret = false;
> +
> +    if (!ovnsb_txn) {
> +        return false;
> +    }
> +
> +    struct ecmp_nexthop_entry *e;
> +    HMAP_FOR_EACH (e, hmap_node, nexthops) {
> +        bitmap_set1(nexthop_ids, e->id);
> +        e->stale = true;
> +    }
> +
> +    struct parsed_route *pr;
> +    HMAP_FOR_EACH (pr, key_node, routes) {
> +        if (!pr->ecmp_symmetric_reply) {
> +            continue;
> +        }
> +
> +        const struct nbrec_logical_router_static_route *r = pr->route;
> +        size_t hash = hash_string(r->nexthop, 0);
> +        e = ecmp_nexthop_lookup(nexthops, r->nexthop, hash);
> +        if (!e) {
> +            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
>
>
nit: You can store the id + 1 to pass it into the next bitmap_scan() so the
search doesn't always start from beginning.


> +            if (id == NEXTHOP_IDS_LEN) {
> +                static struct vlog_rate_limit rl =
> VLOG_RATE_LIMIT_INIT(1, 1);
> +                VLOG_WARN_RL(&rl, "nexthop id address space is
> exhausted");
> +                continue;
> +            }
> +            bitmap_set1(nexthop_ids, id);
> +            ret = true;
> +
> +            e = xzalloc(sizeof *e);
> +            e->nexthop = xstrdup(r->nexthop);
> +            e->id = id;
> +            hmap_insert(nexthops, &e->hmap_node, hash);
> +
> +            const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop
> +                = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, e->nexthop);
> +            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
> +        } else {
> +            e->stale = false;
> +        }
> +    }
> +
> +    HMAP_FOR_EACH_SAFE (e, hmap_node, nexthops) {
> +        if (!e->stale) {
> +            continue;
> +        }
> +
> +        const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +        SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> +                                           sbrec_ecmp_nexthop_table) {
> +            if (!strcmp(sb_ecmp_nexthop->nexthop, e->nexthop)) {
> +                ret = true;
> +                sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
> +                hmap_remove(nexthops, &e->hmap_node);
> +                free(e->nexthop);
> +                free(e);
> +                break;
> +            }
> +        }
> +    }
>

By using simap you can simplify this to two loops, first one that walks the
routes and creates the simap, second that removes entries from DB that are
not in the current simap.

+
> +    bitmap_free(nexthop_ids);
> +
> +    return ret;
> +}
> +
>  /* Returns a string of the IP address of the router port 'op' that
>   * overlaps with 'ip_s".  If one is not found, returns NULL.
>   *
> @@ -17815,6 +17914,12 @@ bfd_init(struct bfd_data *data)
>      hmap_init(&data->bfd_connections);
>  }
>
> +void
> +ecmp_nexthop_init(struct ecmp_nexthop_data *data)
> +{
> +    hmap_init(&data->nexthops);
> +}
> +
>  void
>  northd_destroy(struct northd_data *data)
>  {
> @@ -17888,6 +17993,18 @@ bfd_destroy(struct bfd_data *data)
>      hmap_destroy(&data->bfd_connections);
>  }
>
> +void
> +ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
> +{
> +    struct ecmp_nexthop_entry *e;
> +
> +    HMAP_FOR_EACH_POP (e, hmap_node, &data->nexthops) {
> +        free(e->nexthop);
> +        free(e);
> +    }
> +    hmap_destroy(&data->nexthops);
> +}
> +
>  void
>  ovnnb_db_run(struct northd_input *input_data,
>               struct northd_data *data,
> diff --git a/northd/northd.h b/northd/northd.h
> index 4c761f0a3..1e82a1a48 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -183,6 +183,10 @@ struct bfd_data {
>      struct hmap bfd_connections;
>  };
>
> +struct ecmp_nexthop_data {
> +    struct hmap nexthops;
> +};
> +
>  struct lr_nat_table;
>
>  struct lflow_input {
> @@ -729,6 +733,12 @@ void static_routes_destroy(struct static_routes_data
> *);
>  void bfd_init(struct bfd_data *);
>  void bfd_destroy(struct bfd_data *);
>
> +bool build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
> +                              struct hmap *, struct hmap *,
> +                              const struct sbrec_ecmp_nexthop_table *);
> +void ecmp_nexthop_init(struct ecmp_nexthop_data *);
> +void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
> +
>  struct lflow_table;
>  struct lr_stateful_tracked_data;
>  struct ls_stateful_tracked_data;
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index b6c051ae6..12771c5d8 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Southbound",
>      "version": "20.34.0",
>
>
We should also bump the DB version number.


> -    "cksum": "2786607656 31376",
> +    "cksum": "439804439 32037",
>      "tables": {
>          "SB_Global": {
>              "columns": {
> @@ -608,6 +608,20 @@
>                                            "refTable":
> "Datapath_Binding"}}}},
>              "indexes": [["logical_port", "ip"]],
>              "isRoot": true},
> +        "ECMP_Nexthop": {
> +            "columns": {
> +                "nexthop": {"type": "string"},
> +                "id": {"type": {"key": {"type": "integer",
> +                                        "minInteger": 0,
> +                                        "maxInteger": 65535}}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["nexthop"]],
> +            "isRoot": true},
>          "Chassis_Template_Var": {
>              "columns": {
>                  "chassis": {"type": "string"},
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 73a1be5ed..d84e956c5 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -5179,4 +5179,35 @@ tcp.flags = RST;
>        The set of variable values for a given chassis.
>      </column>
>    </table>
> +
> +  <table name="ECMP_Nexthop">
> +    <p>
> +      Each record in this table represents an active ECMP route committed
> by
> +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking
> table.
> +      <code>ECMP_Nexthop</code> table is used by
> <code>ovn-controller</code>
> +      to track active ct entries and to flush stale ones.
> +    </p>
> +    <column name="nexthop">
> +      <p>
> +        Nexthop IP address for this ECMP route. Nexthop IP address should
> +        be the IP address of a connected router port or the IP address of
> +        an external device used as nexthop for the given destination.
> +      </p>
> +    </column>
> +
> +    <column name="id">
> +      <p>
> +        Nexthop unique identifier. Nexthop ID is used to track active
> +        ecmp-symmetric-reply connections and flush stale ones.
> +      </p>
> +    </column>
> +
> +    <column name="options">
> +      Reserved for future use.
> +    </column>
> +
> +    <column name="external_ids">
> +      See <em>External IDs</em> at the beginning of this document.
> +    </column>
> +  </table>
>  </database>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 8a1db5fc0..ed550033c 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -6666,6 +6666,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
>  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
>
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> 192.168.0.10
> +check_row_count ECMP_Nexthop 1
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>
> @@ -6677,6 +6678,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> ovn_strip_lflows], [0], [dn
>  ])
>
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> 192.168.0.20
> +check_row_count ECMP_Nexthop 2
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> ovn_strip_lflows], [0], [dnl
> @@ -6713,6 +6715,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows
> | ovn_strip_lflows], [0], [
>
>  # add ecmp route with wrong nexthop
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> 192.168.1.20
> +check_row_count ECMP_Nexthop 2
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> ovn_strip_lflows], [0], [dnl
> @@ -6727,6 +6730,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> sed 's/192\.168\.0\..0/192.
>
>  check ovn-nbctl lr-route-del lr0
>  wait_row_count nb:Logical_Router_Static_Route 0
> +check_row_count ECMP_Nexthop 0
>
>  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>  ovn-sbctl dump-flows lr0 > lr0flows
> --
> 2.45.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Thanks,
Ales
diff mbox series

Patch

diff --git a/northd/en-northd.c b/northd/en-northd.c
index a4de71ba1..a2823ab2b 100644
--- a/northd/en-northd.c
+++ b/northd/en-northd.c
@@ -380,6 +380,23 @@  en_bfd_run(struct engine_node *node, void *data)
     engine_set_node_state(node, EN_UPDATED);
 }
 
+void
+en_ecmp_nexthop_run(struct engine_node *node, void *data)
+{
+    const struct engine_context *eng_ctx = engine_get_context();
+    struct static_routes_data *static_routes_data =
+        engine_get_input_data("static_routes", node);
+    struct ecmp_nexthop_data *enh_data = data;
+    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
+        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
+
+    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
+                             &static_routes_data->parsed_routes,
+                             &enh_data->nexthops,
+                             sbrec_ecmp_nexthop_table);
+    engine_set_node_state(node, EN_UPDATED);
+}
+
 void
 *en_northd_init(struct engine_node *node OVS_UNUSED,
                 struct engine_arg *arg OVS_UNUSED)
@@ -421,6 +438,16 @@  void
     return data;
 }
 
+void
+*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                      struct engine_arg *arg OVS_UNUSED)
+{
+    struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
+
+    ecmp_nexthop_init(data);
+    return data;
+}
+
 void
 en_northd_cleanup(void *data)
 {
@@ -451,3 +478,9 @@  en_bfd_cleanup(void *data)
 {
     bfd_destroy(data);
 }
+
+void
+en_ecmp_nexthop_cleanup(void *data)
+{
+    ecmp_nexthop_destroy(data);
+}
diff --git a/northd/en-northd.h b/northd/en-northd.h
index 424209c2f..c6d520f71 100644
--- a/northd/en-northd.h
+++ b/northd/en-northd.h
@@ -34,5 +34,9 @@  void *en_bfd_init(struct engine_node *node OVS_UNUSED,
 void en_bfd_cleanup(void *data);
 bool bfd_change_handler(struct engine_node *node, void *data);
 void en_bfd_run(struct engine_node *node, void *data);
+void en_ecmp_nexthop_run(struct engine_node *node, void *data);
+void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
+                           struct engine_arg *arg OVS_UNUSED);
+void en_ecmp_nexthop_cleanup(void *data);
 
 #endif /* EN_NORTHD_H */
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index d907da14d..c4e5b9bf6 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -103,7 +103,8 @@  static unixctl_cb_func chassis_features_list;
     SB_NODE(fdb, "fdb") \
     SB_NODE(static_mac_binding, "static_mac_binding") \
     SB_NODE(chassis_template_var, "chassis_template_var") \
-    SB_NODE(logical_dp_group, "logical_dp_group")
+    SB_NODE(logical_dp_group, "logical_dp_group") \
+    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
 
 enum sb_engine_node {
 #define SB_NODE(NAME, NAME_STR) SB_##NAME,
@@ -160,6 +161,7 @@  static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(ls_stateful, "ls_stateful");
 static ENGINE_NODE(route_policies, "route_policies");
 static ENGINE_NODE(static_routes, "static_routes");
 static ENGINE_NODE(bfd, "bfd");
+static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
                           struct ovsdb_idl_loop *sb)
@@ -261,6 +263,9 @@  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_add_input(&en_static_routes,
                      &en_nb_logical_router_static_route, NULL);
 
+    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
+    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
+
     engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
     engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
     engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
diff --git a/northd/northd.c b/northd/northd.c
index 2eb5f2be8..efe1e3f46 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -10039,6 +10039,105 @@  build_bfd_table(
     return ret;
 }
 
+struct ecmp_nexthop_entry {
+    struct hmap_node hmap_node;
+
+    char *nexthop;
+    int id;
+    bool stale;
+};
+
+static struct ecmp_nexthop_entry *
+ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop, size_t hash)
+{
+    struct ecmp_nexthop_entry *e;
+
+    HMAP_FOR_EACH_WITH_HASH (e, hmap_node, hash, map) {
+        if (!strcmp(e->nexthop, nexthop)) {
+            return e;
+        }
+    }
+    return NULL;
+}
+
+#define NEXTHOP_IDS_LEN	65535
+bool
+build_ecmp_nexthop_table(
+        struct ovsdb_idl_txn *ovnsb_txn,
+        struct hmap *routes,
+        struct hmap *nexthops,
+        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
+{
+    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
+    bool ret = false;
+
+    if (!ovnsb_txn) {
+        return false;
+    }
+
+    struct ecmp_nexthop_entry *e;
+    HMAP_FOR_EACH (e, hmap_node, nexthops) {
+        bitmap_set1(nexthop_ids, e->id);
+        e->stale = true;
+    }
+
+    struct parsed_route *pr;
+    HMAP_FOR_EACH (pr, key_node, routes) {
+        if (!pr->ecmp_symmetric_reply) {
+            continue;
+        }
+
+        const struct nbrec_logical_router_static_route *r = pr->route;
+        size_t hash = hash_string(r->nexthop, 0);
+        e = ecmp_nexthop_lookup(nexthops, r->nexthop, hash);
+        if (!e) {
+            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
+            if (id == NEXTHOP_IDS_LEN) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+                VLOG_WARN_RL(&rl, "nexthop id address space is exhausted");
+                continue;
+            }
+            bitmap_set1(nexthop_ids, id);
+            ret = true;
+
+            e = xzalloc(sizeof *e);
+            e->nexthop = xstrdup(r->nexthop);
+            e->id = id;
+            hmap_insert(nexthops, &e->hmap_node, hash);
+
+            const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop
+                = sbrec_ecmp_nexthop_insert(ovnsb_txn);
+            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, e->nexthop);
+            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
+        } else {
+            e->stale = false;
+        }
+    }
+
+    HMAP_FOR_EACH_SAFE (e, hmap_node, nexthops) {
+        if (!e->stale) {
+            continue;
+        }
+
+        const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+        SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
+                                           sbrec_ecmp_nexthop_table) {
+            if (!strcmp(sb_ecmp_nexthop->nexthop, e->nexthop)) {
+                ret = true;
+                sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
+                hmap_remove(nexthops, &e->hmap_node);
+                free(e->nexthop);
+                free(e);
+                break;
+            }
+        }
+    }
+
+    bitmap_free(nexthop_ids);
+
+    return ret;
+}
+
 /* Returns a string of the IP address of the router port 'op' that
  * overlaps with 'ip_s".  If one is not found, returns NULL.
  *
@@ -17815,6 +17914,12 @@  bfd_init(struct bfd_data *data)
     hmap_init(&data->bfd_connections);
 }
 
+void
+ecmp_nexthop_init(struct ecmp_nexthop_data *data)
+{
+    hmap_init(&data->nexthops);
+}
+
 void
 northd_destroy(struct northd_data *data)
 {
@@ -17888,6 +17993,18 @@  bfd_destroy(struct bfd_data *data)
     hmap_destroy(&data->bfd_connections);
 }
 
+void
+ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
+{
+    struct ecmp_nexthop_entry *e;
+
+    HMAP_FOR_EACH_POP (e, hmap_node, &data->nexthops) {
+        free(e->nexthop);
+        free(e);
+    }
+    hmap_destroy(&data->nexthops);
+}
+
 void
 ovnnb_db_run(struct northd_input *input_data,
              struct northd_data *data,
diff --git a/northd/northd.h b/northd/northd.h
index 4c761f0a3..1e82a1a48 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -183,6 +183,10 @@  struct bfd_data {
     struct hmap bfd_connections;
 };
 
+struct ecmp_nexthop_data {
+    struct hmap nexthops;
+};
+
 struct lr_nat_table;
 
 struct lflow_input {
@@ -729,6 +733,12 @@  void static_routes_destroy(struct static_routes_data *);
 void bfd_init(struct bfd_data *);
 void bfd_destroy(struct bfd_data *);
 
+bool build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
+                              struct hmap *, struct hmap *,
+                              const struct sbrec_ecmp_nexthop_table *);
+void ecmp_nexthop_init(struct ecmp_nexthop_data *);
+void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
+
 struct lflow_table;
 struct lr_stateful_tracked_data;
 struct ls_stateful_tracked_data;
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index b6c051ae6..12771c5d8 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Southbound",
     "version": "20.34.0",
-    "cksum": "2786607656 31376",
+    "cksum": "439804439 32037",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -608,6 +608,20 @@ 
                                           "refTable": "Datapath_Binding"}}}},
             "indexes": [["logical_port", "ip"]],
             "isRoot": true},
+        "ECMP_Nexthop": {
+            "columns": {
+                "nexthop": {"type": "string"},
+                "id": {"type": {"key": {"type": "integer",
+                                        "minInteger": 0,
+                                        "maxInteger": 65535}}},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}},
+                "options": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "indexes": [["nexthop"]],
+            "isRoot": true},
         "Chassis_Template_Var": {
             "columns": {
                 "chassis": {"type": "string"},
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 73a1be5ed..d84e956c5 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -5179,4 +5179,35 @@  tcp.flags = RST;
       The set of variable values for a given chassis.
     </column>
   </table>
+
+  <table name="ECMP_Nexthop">
+    <p>
+      Each record in this table represents an active ECMP route committed by
+      <code>ovn-northd</code> to <code>ovs</code> connection-tracking table.
+      <code>ECMP_Nexthop</code> table is used by <code>ovn-controller</code>
+      to track active ct entries and to flush stale ones.
+    </p>
+    <column name="nexthop">
+      <p>
+        Nexthop IP address for this ECMP route. Nexthop IP address should
+        be the IP address of a connected router port or the IP address of
+        an external device used as nexthop for the given destination.
+      </p>
+    </column>
+
+    <column name="id">
+      <p>
+        Nexthop unique identifier. Nexthop ID is used to track active
+        ecmp-symmetric-reply connections and flush stale ones.
+      </p>
+    </column>
+
+    <column name="options">
+      Reserved for future use.
+    </column>
+
+    <column name="external_ids">
+      See <em>External IDs</em> at the beginning of this document.
+    </column>
+  </table>
 </database>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 8a1db5fc0..ed550033c 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6666,6 +6666,7 @@  check ovn-nbctl lsp-set-addresses public-lr0 router
 check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
 
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
+check_row_count ECMP_Nexthop 1
 
 ovn-sbctl dump-flows lr0 > lr0flows
 
@@ -6677,6 +6678,7 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
 ])
 
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
+check_row_count ECMP_Nexthop 2
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6713,6 +6715,7 @@  AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [
 
 # add ecmp route with wrong nexthop
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20
+check_row_count ECMP_Nexthop 2
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6727,6 +6730,7 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.
 
 check ovn-nbctl lr-route-del lr0
 wait_row_count nb:Logical_Router_Static_Route 0
+check_row_count ECMP_Nexthop 0
 
 check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
 ovn-sbctl dump-flows lr0 > lr0flows