diff mbox series

[ovs-dev,v2,1/3] northd: Introduce ECMP_Nexthop table in SB db.

Message ID f31fb6630039c9b310b7fa489247d354f8a46719.1710257650.git.lorenzo.bianconi@redhat.com
State Changes Requested
Headers show
Series Introduce ECMP_nexthop monitor in ovn-controller | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_ovn-kubernetes success github build: passed
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Lorenzo Bianconi March 12, 2024, 3:59 p.m. UTC
Introduce ECMP_Nexthop table in the SB db in order to track active
ecmp-symmetric-reply connections and flush stale ones.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 northd/en-northd.c       |  4 +++
 northd/inc-proc-northd.c |  8 +++--
 northd/northd.c          | 73 ++++++++++++++++++++++++++++++++++++++++
 northd/northd.h          |  3 ++
 ovn-sb.ovsschema         | 18 ++++++++--
 ovn-sb.xml               | 26 ++++++++++++++
 tests/ovn-northd.at      |  4 +++
 7 files changed, 132 insertions(+), 4 deletions(-)

Comments

Dumitru Ceara April 12, 2024, 3:33 p.m. UTC | #1
On 3/12/24 16:59, Lorenzo Bianconi wrote:
> Introduce ECMP_Nexthop table in the SB db in order to track active
> ecmp-symmetric-reply connections and flush stale ones.
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---

Hi Lorenzo,

>  northd/en-northd.c       |  4 +++
>  northd/inc-proc-northd.c |  8 +++--
>  northd/northd.c          | 73 ++++++++++++++++++++++++++++++++++++++++
>  northd/northd.h          |  3 ++
>  ovn-sb.ovsschema         | 18 ++++++++--
>  ovn-sb.xml               | 26 ++++++++++++++
>  tests/ovn-northd.at      |  4 +++
>  7 files changed, 132 insertions(+), 4 deletions(-)
> 
> diff --git a/northd/en-northd.c b/northd/en-northd.c
> index 4479b4aff..2f8408fbc 100644
> --- a/northd/en-northd.c
> +++ b/northd/en-northd.c
> @@ -76,6 +76,8 @@ northd_get_input_data(struct engine_node *node,
>          EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node));
>      input_data->nbrec_mirror_table =
>          EN_OVSDB_GET(engine_get_input("NB_mirror", node));
> +    input_data->nbrec_static_route_table =
> +        EN_OVSDB_GET(engine_get_input("NB_logical_router_static_route", node));
>  
>      input_data->sbrec_datapath_binding_table =
>          EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> @@ -101,6 +103,8 @@ northd_get_input_data(struct engine_node *node,
>          EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node));
>      input_data->sbrec_mirror_table =
>          EN_OVSDB_GET(engine_get_input("SB_mirror", node));
> +    input_data->sbrec_ecmp_nh_table =
> +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
>  
>      struct ed_type_lb_data *lb_data =
>          engine_get_input_data("lb_data", node);
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index e1073812c..1c58da0bf 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -61,7 +61,8 @@ static unixctl_cb_func chassis_features_list;
>      NB_NODE(meter, "meter") \
>      NB_NODE(bfd, "bfd") \
>      NB_NODE(static_mac_binding, "static_mac_binding") \
> -    NB_NODE(chassis_template_var, "chassis_template_var")
> +    NB_NODE(chassis_template_var, "chassis_template_var") \
> +    NB_NODE(logical_router_static_route, "logical_router_static_route")
>  
>      enum nb_engine_node {
>  #define NB_NODE(NAME, NAME_STR) NB_##NAME,
> @@ -101,7 +102,8 @@ static unixctl_cb_func chassis_features_list;
>      SB_NODE(fdb, "fdb") \
>      SB_NODE(static_mac_binding, "static_mac_binding") \
>      SB_NODE(chassis_template_var, "chassis_template_var") \
> -    SB_NODE(logical_dp_group, "logical_dp_group")
> +    SB_NODE(logical_dp_group, "logical_dp_group") \
> +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
>  
>  enum sb_engine_node {
>  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> @@ -180,6 +182,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_northd, &en_nb_mirror, NULL);
>      engine_add_input(&en_northd, &en_nb_static_mac_binding, NULL);
>      engine_add_input(&en_northd, &en_nb_chassis_template_var, NULL);
> +    engine_add_input(&en_northd, &en_nb_logical_router_static_route, NULL);
>  
>      engine_add_input(&en_northd, &en_sb_chassis, NULL);
>      engine_add_input(&en_northd, &en_sb_mirror, NULL);
> @@ -192,6 +195,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_northd, &en_sb_fdb, NULL);
>      engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL);
>      engine_add_input(&en_northd, &en_sb_chassis_template_var, NULL);
> +    engine_add_input(&en_northd, &en_sb_ecmp_nexthop, NULL);
>      engine_add_input(&en_northd, &en_global_config,
>                       northd_global_config_handler);
>  
> diff --git a/northd/northd.c b/northd/northd.c
> index 1839b7d8b..7b8f442e1 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -16655,6 +16655,76 @@ sync_mirrors(struct ovsdb_idl_txn *ovnsb_txn,
>      shash_destroy(&sb_mirrors);
>  }
>  
> +struct sb_ecmp_nexthop_entry {
> +    struct hmap_node hmap_node;
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +};
> +
> +static struct sb_ecmp_nexthop_entry *
> +sb_ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop)
> +{
> +    uint32_t hash = hash_string(nexthop, 0);
> +    struct sb_ecmp_nexthop_entry *enh_e;
> +
> +    HMAP_FOR_EACH_WITH_HASH (enh_e, hmap_node, hash, map) {
> +        if (!strcmp(enh_e->sb_ecmp_nexthop->nexthop, nexthop)) {
> +            return enh_e;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +#define NEXTHOP_IDS_LEN	65535
> +static void
> +sync_ecmp_symmetric_reply_nexthop(struct ovsdb_idl_txn *ovnsb_txn,
> +        const struct nbrec_logical_router_static_route_table *nbrec_sr_table,
> +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table)
> +{
> +    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +    struct sb_ecmp_nexthop_entry *enh_e;
> +
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, sbrec_ecmp_nh_table) {
> +        uint32_t hash = hash_string(sb_ecmp_nexthop->nexthop, 0);
> +        enh_e = xmalloc(sizeof *enh_e);
> +        enh_e->sb_ecmp_nexthop = sb_ecmp_nexthop;
> +        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
> +        hmap_insert(&sb_only, &enh_e->hmap_node, hash);
> +    }
> +
> +    const struct nbrec_logical_router_static_route *r;
> +    NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) {
> +        if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) {

Do we really need to re-parse route options?

> +            continue;
> +        }
> +

It's my impression that this doesn't take BFD into account at all.
Shouldn't we remove SB nexthop entries if BFD is down for them?

Or should ovn-controller take care of ignoring the next hops that have
BFD down?

> +        enh_e = sb_ecmp_nexthop_lookup(&sb_only, r->nexthop);
> +        if (!enh_e) {
> +            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
> +            if (id == NEXTHOP_IDS_LEN) {

Should we at least log something here?

> +                continue;
> +            }
> +            bitmap_set1(nexthop_ids, id);
> +
> +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> +            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
> +        } else {
> +            hmap_remove(&sb_only, &enh_e->hmap_node);
> +            free(enh_e);
> +        }
> +    }
> +
> +    HMAP_FOR_EACH_POP (enh_e, hmap_node, &sb_only) {
> +        sbrec_ecmp_nexthop_delete(enh_e->sb_ecmp_nexthop);
> +        free(enh_e);
> +    }
> +    hmap_destroy(&sb_only);
> +
> +    bitmap_free(nexthop_ids);
> +}
> +
>  /*
>   * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
>   * and Southbound db.
> @@ -17335,6 +17405,9 @@ ovnnb_db_run(struct northd_input *input_data,
>                       &data->ls_datapaths.datapaths);
>      sync_template_vars(ovnsb_txn, input_data->nbrec_chassis_template_var_table,
>                         input_data->sbrec_chassis_template_var_table);
> +    sync_ecmp_symmetric_reply_nexthop(ovnsb_txn,
> +                                      input_data->nbrec_static_route_table,
> +                                      input_data->sbrec_ecmp_nh_table);
>  
>      cleanup_stale_fdb_entries(input_data->sbrec_fdb_table,
>                                &data->ls_datapaths.datapaths);
> diff --git a/northd/northd.h b/northd/northd.h
> index 3f1cd8341..2d4bc9363 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -34,6 +34,8 @@ struct northd_input {
>      const struct nbrec_chassis_template_var_table
>          *nbrec_chassis_template_var_table;
>      const struct nbrec_mirror_table *nbrec_mirror_table;
> +    const struct nbrec_logical_router_static_route_table
> +        *nbrec_static_route_table;
>  
>      /* Southbound table references */
>      const struct sbrec_datapath_binding_table *sbrec_datapath_binding_table;
> @@ -50,6 +52,7 @@ struct northd_input {
>      const struct sbrec_chassis_template_var_table
>          *sbrec_chassis_template_var_table;
>      const struct sbrec_mirror_table *sbrec_mirror_table;
> +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table;
>  
>      /* Northd lb data node inputs*/
>      const struct hmap *lbs;
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index 84ae09515..c7a1cbf59 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Southbound",
> -    "version": "20.33.0",
> -    "cksum": "4076371179 31328",
> +    "version": "20.34.0",
> +    "cksum": "2226521861 31989",
>      "tables": {
>          "SB_Global": {
>              "columns": {
> @@ -607,6 +607,20 @@
>                                            "refTable": "Datapath_Binding"}}}},
>              "indexes": [["logical_port", "ip"]],
>              "isRoot": true},
> +        "ECMP_Nexthop": {
> +            "columns": {
> +                "nexthop": {"type": "string"},
> +                "id": {"type": {"key": {"type": "integer",
> +                                        "minInteger": 0,
> +                                        "maxInteger": 65535}}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["nexthop"]],
> +            "isRoot": true},
>          "Chassis_Template_Var": {
>              "columns": {
>                  "chassis": {"type": "string"},
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index ac4e585f2..7e51411f4 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -5095,4 +5095,30 @@ tcp.flags = RST;
>        The set of variable values for a given chassis.
>      </column>
>    </table>
> +
> +  <table name="ECMP_Nexthop">
> +    <column name="nexthop">
> +      <p>
> +        Nexthop IP address for this route.  Nexthop IP address should be the IP
> +        address of a connected router port or the IP address of a logical port
> +        or can be set to <code>discard</code> for dropping packets which match
> +        the given route.
> +      </p>
> +    </column>
> +
> +    <column name="id">
> +      <p>
> +        Nexthop unique indetifier. Nexthop ID is used to track active
> +        ecmp-symmetric-reply connections and flush stale ones.
> +      </p>
> +    </column>
> +
> +    <column name="options">
> +      Reserved for future use.
> +    </column>
> +
> +    <column name="external_ids">
> +      See <em>External IDs</em> at the beginning of this document.
> +    </column>
> +  </table>
>  </database>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 89aed5adc..2160e8de7 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -6542,6 +6542,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
>  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
>  
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
> +check_row_count ECMP_Nexthop 1
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  
> @@ -6553,6 +6554,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
>  ])
>  
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
> +check_row_count ECMP_Nexthop 2
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
> @@ -6589,6 +6591,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [
>  
>  # add ecmp route with wrong nexthop
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20
> +check_row_count ECMP_Nexthop 3
>  
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
> @@ -6603,6 +6606,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.
>  
>  check ovn-nbctl lr-route-del lr0
>  wait_row_count nb:Logical_Router_Static_Route 0
> +check_row_count ECMP_Nexthop 0
>  
>  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>  ovn-sbctl dump-flows lr0 > lr0flows

Regards,
Dumitru
Han Zhou May 29, 2024, 1:03 p.m. UTC | #2
On Wed, Mar 13, 2024 at 12:00 AM Lorenzo Bianconi <
lorenzo.bianconi@redhat.com> wrote:
>
> Introduce ECMP_Nexthop table in the SB db in order to track active
> ecmp-symmetric-reply connections and flush stale ones.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
> ---
>  northd/en-northd.c       |  4 +++
>  northd/inc-proc-northd.c |  8 +++--
>  northd/northd.c          | 73 ++++++++++++++++++++++++++++++++++++++++
>  northd/northd.h          |  3 ++
>  ovn-sb.ovsschema         | 18 ++++++++--
>  ovn-sb.xml               | 26 ++++++++++++++
>  tests/ovn-northd.at      |  4 +++
>  7 files changed, 132 insertions(+), 4 deletions(-)
>
> diff --git a/northd/en-northd.c b/northd/en-northd.c
> index 4479b4aff..2f8408fbc 100644
> --- a/northd/en-northd.c
> +++ b/northd/en-northd.c
> @@ -76,6 +76,8 @@ northd_get_input_data(struct engine_node *node,
>          EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node));
>      input_data->nbrec_mirror_table =
>          EN_OVSDB_GET(engine_get_input("NB_mirror", node));
> +    input_data->nbrec_static_route_table =
> +        EN_OVSDB_GET(engine_get_input("NB_logical_router_static_route",
node));
>
>      input_data->sbrec_datapath_binding_table =
>          EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
> @@ -101,6 +103,8 @@ northd_get_input_data(struct engine_node *node,
>          EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node));
>      input_data->sbrec_mirror_table =
>          EN_OVSDB_GET(engine_get_input("SB_mirror", node));
> +    input_data->sbrec_ecmp_nh_table =
> +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
>
>      struct ed_type_lb_data *lb_data =
>          engine_get_input_data("lb_data", node);
> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> index e1073812c..1c58da0bf 100644
> --- a/northd/inc-proc-northd.c
> +++ b/northd/inc-proc-northd.c
> @@ -61,7 +61,8 @@ static unixctl_cb_func chassis_features_list;
>      NB_NODE(meter, "meter") \
>      NB_NODE(bfd, "bfd") \
>      NB_NODE(static_mac_binding, "static_mac_binding") \
> -    NB_NODE(chassis_template_var, "chassis_template_var")
> +    NB_NODE(chassis_template_var, "chassis_template_var") \
> +    NB_NODE(logical_router_static_route, "logical_router_static_route")
>
>      enum nb_engine_node {
>  #define NB_NODE(NAME, NAME_STR) NB_##NAME,
> @@ -101,7 +102,8 @@ static unixctl_cb_func chassis_features_list;
>      SB_NODE(fdb, "fdb") \
>      SB_NODE(static_mac_binding, "static_mac_binding") \
>      SB_NODE(chassis_template_var, "chassis_template_var") \
> -    SB_NODE(logical_dp_group, "logical_dp_group")
> +    SB_NODE(logical_dp_group, "logical_dp_group") \
> +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
>
>  enum sb_engine_node {
>  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> @@ -180,6 +182,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_northd, &en_nb_mirror, NULL);
>      engine_add_input(&en_northd, &en_nb_static_mac_binding, NULL);
>      engine_add_input(&en_northd, &en_nb_chassis_template_var, NULL);
> +    engine_add_input(&en_northd, &en_nb_logical_router_static_route,
NULL);
>
>      engine_add_input(&en_northd, &en_sb_chassis, NULL);
>      engine_add_input(&en_northd, &en_sb_mirror, NULL);
> @@ -192,6 +195,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
>      engine_add_input(&en_northd, &en_sb_fdb, NULL);
>      engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL);
>      engine_add_input(&en_northd, &en_sb_chassis_template_var, NULL);
> +    engine_add_input(&en_northd, &en_sb_ecmp_nexthop, NULL);
>      engine_add_input(&en_northd, &en_global_config,
>                       northd_global_config_handler);
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 1839b7d8b..7b8f442e1 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -16655,6 +16655,76 @@ sync_mirrors(struct ovsdb_idl_txn *ovnsb_txn,
>      shash_destroy(&sb_mirrors);
>  }
>
> +struct sb_ecmp_nexthop_entry {
> +    struct hmap_node hmap_node;
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +};
> +
> +static struct sb_ecmp_nexthop_entry *
> +sb_ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop)
> +{
> +    uint32_t hash = hash_string(nexthop, 0);
> +    struct sb_ecmp_nexthop_entry *enh_e;
> +
> +    HMAP_FOR_EACH_WITH_HASH (enh_e, hmap_node, hash, map) {
> +        if (!strcmp(enh_e->sb_ecmp_nexthop->nexthop, nexthop)) {
> +            return enh_e;
> +        }
> +    }
> +    return NULL;
> +}
> +
> +#define NEXTHOP_IDS_LEN        65535
> +static void
> +sync_ecmp_symmetric_reply_nexthop(struct ovsdb_idl_txn *ovnsb_txn,
> +        const struct nbrec_logical_router_static_route_table
*nbrec_sr_table,
> +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table)
> +{
> +    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> +    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
> +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> +    struct sb_ecmp_nexthop_entry *enh_e;
> +
> +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
sbrec_ecmp_nh_table) {
> +        uint32_t hash = hash_string(sb_ecmp_nexthop->nexthop, 0);
> +        enh_e = xmalloc(sizeof *enh_e);
> +        enh_e->sb_ecmp_nexthop = sb_ecmp_nexthop;
> +        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
> +        hmap_insert(&sb_only, &enh_e->hmap_node, hash);
> +    }
> +
> +    const struct nbrec_logical_router_static_route *r;
> +    NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table)
{
> +        if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) {
> +            continue;
> +        }
> +
> +        enh_e = sb_ecmp_nexthop_lookup(&sb_only, r->nexthop);
> +        if (!enh_e) {
> +            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
> +            if (id == NEXTHOP_IDS_LEN) {
> +                continue;
> +            }
> +            bitmap_set1(nexthop_ids, id);
> +
> +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> +            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
> +        } else {
> +            hmap_remove(&sb_only, &enh_e->hmap_node);
> +            free(enh_e);
> +        }
> +    }
> +
> +    HMAP_FOR_EACH_POP (enh_e, hmap_node, &sb_only) {
> +        sbrec_ecmp_nexthop_delete(enh_e->sb_ecmp_nexthop);
> +        free(enh_e);
> +    }
> +    hmap_destroy(&sb_only);
> +
> +    bitmap_free(nexthop_ids);
> +}
> +
>  /*
>   * struct 'dns_info' is used to sync the DNS records between OVN
Northbound db
>   * and Southbound db.
> @@ -17335,6 +17405,9 @@ ovnnb_db_run(struct northd_input *input_data,
>                       &data->ls_datapaths.datapaths);
>      sync_template_vars(ovnsb_txn,
input_data->nbrec_chassis_template_var_table,
>                         input_data->sbrec_chassis_template_var_table);
> +    sync_ecmp_symmetric_reply_nexthop(ovnsb_txn,
> +
 input_data->nbrec_static_route_table,
> +                                      input_data->sbrec_ecmp_nh_table);
>
>      cleanup_stale_fdb_entries(input_data->sbrec_fdb_table,
>                                &data->ls_datapaths.datapaths);
> diff --git a/northd/northd.h b/northd/northd.h
> index 3f1cd8341..2d4bc9363 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -34,6 +34,8 @@ struct northd_input {
>      const struct nbrec_chassis_template_var_table
>          *nbrec_chassis_template_var_table;
>      const struct nbrec_mirror_table *nbrec_mirror_table;
> +    const struct nbrec_logical_router_static_route_table
> +        *nbrec_static_route_table;
>
>      /* Southbound table references */
>      const struct sbrec_datapath_binding_table
*sbrec_datapath_binding_table;
> @@ -50,6 +52,7 @@ struct northd_input {
>      const struct sbrec_chassis_template_var_table
>          *sbrec_chassis_template_var_table;
>      const struct sbrec_mirror_table *sbrec_mirror_table;
> +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table;
>
>      /* Northd lb data node inputs*/
>      const struct hmap *lbs;
> diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> index 84ae09515..c7a1cbf59 100644
> --- a/ovn-sb.ovsschema
> +++ b/ovn-sb.ovsschema
> @@ -1,7 +1,7 @@
>  {
>      "name": "OVN_Southbound",
> -    "version": "20.33.0",
> -    "cksum": "4076371179 31328",
> +    "version": "20.34.0",
> +    "cksum": "2226521861 31989",
>      "tables": {
>          "SB_Global": {
>              "columns": {
> @@ -607,6 +607,20 @@
>                                            "refTable":
"Datapath_Binding"}}}},
>              "indexes": [["logical_port", "ip"]],
>              "isRoot": true},
> +        "ECMP_Nexthop": {
> +            "columns": {
> +                "nexthop": {"type": "string"},
> +                "id": {"type": {"key": {"type": "integer",
> +                                        "minInteger": 0,
> +                                        "maxInteger": 65535}}},
> +                "external_ids": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}},
> +                "options": {
> +                    "type": {"key": "string", "value": "string",
> +                             "min": 0, "max": "unlimited"}}},
> +            "indexes": [["nexthop"]],
> +            "isRoot": true},
>          "Chassis_Template_Var": {
>              "columns": {
>                  "chassis": {"type": "string"},
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index ac4e585f2..7e51411f4 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -5095,4 +5095,30 @@ tcp.flags = RST;
>        The set of variable values for a given chassis.
>      </column>
>    </table>
> +
> +  <table name="ECMP_Nexthop">
> +    <column name="nexthop">
> +      <p>
> +        Nexthop IP address for this route.  Nexthop IP address should be
the IP
> +        address of a connected router port or the IP address of a
logical port
> +        or can be set to <code>discard</code> for dropping packets which
match
> +        the given route.
> +      </p>
> +    </column>
> +
> +    <column name="id">
> +      <p>
> +        Nexthop unique indetifier. Nexthop ID is used to track active
> +        ecmp-symmetric-reply connections and flush stale ones.
> +      </p>
> +    </column>
> +
> +    <column name="options">
> +      Reserved for future use.
> +    </column>
> +
> +    <column name="external_ids">
> +      See <em>External IDs</em> at the beginning of this document.
> +    </column>
> +  </table>
>  </database>

Hi Lorenzo, I haven't reviewed all the patches of this series yet, but it
is really hard for me to understand this new SB table just by looking at
the documentation above. Each table should have some basic description for
the table itself before describing columns, i.e. the purpose of the table
and what does each row represent. Also in the description of the columns,
it is confusing what "this route" and "the given route" mean. It may be
clear to you since you are implementing the feature, but I think the SB
documentation needs to be clear for someone new to the feature.

Thanks,
Han

> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 89aed5adc..2160e8de7 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -6542,6 +6542,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
>  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
>
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0
1.0.0.1 192.168.0.10
> +check_row_count ECMP_Nexthop 1
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>
> @@ -6553,6 +6554,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows
| ovn_strip_lflows], [0], [dn
>  ])
>
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0
1.0.0.1 192.168.0.20
> +check_row_count ECMP_Nexthop 2
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
ovn_strip_lflows], [0], [dnl
> @@ -6589,6 +6591,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp"
lr0flows | ovn_strip_lflows], [0], [
>
>  # add ecmp route with wrong nexthop
>  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0
1.0.0.1 192.168.1.20
> +check_row_count ECMP_Nexthop 3
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
ovn_strip_lflows], [0], [dnl
> @@ -6603,6 +6606,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows
| sed 's/192\.168\.0\..0/192.
>
>  check ovn-nbctl lr-route-del lr0
>  wait_row_count nb:Logical_Router_Static_Route 0
> +check_row_count ECMP_Nexthop 0
>
>  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>  ovn-sbctl dump-flows lr0 > lr0flows
> --
> 2.44.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
diff mbox series

Patch

diff --git a/northd/en-northd.c b/northd/en-northd.c
index 4479b4aff..2f8408fbc 100644
--- a/northd/en-northd.c
+++ b/northd/en-northd.c
@@ -76,6 +76,8 @@  northd_get_input_data(struct engine_node *node,
         EN_OVSDB_GET(engine_get_input("NB_chassis_template_var", node));
     input_data->nbrec_mirror_table =
         EN_OVSDB_GET(engine_get_input("NB_mirror", node));
+    input_data->nbrec_static_route_table =
+        EN_OVSDB_GET(engine_get_input("NB_logical_router_static_route", node));
 
     input_data->sbrec_datapath_binding_table =
         EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));
@@ -101,6 +103,8 @@  northd_get_input_data(struct engine_node *node,
         EN_OVSDB_GET(engine_get_input("SB_chassis_template_var", node));
     input_data->sbrec_mirror_table =
         EN_OVSDB_GET(engine_get_input("SB_mirror", node));
+    input_data->sbrec_ecmp_nh_table =
+        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
 
     struct ed_type_lb_data *lb_data =
         engine_get_input_data("lb_data", node);
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index e1073812c..1c58da0bf 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -61,7 +61,8 @@  static unixctl_cb_func chassis_features_list;
     NB_NODE(meter, "meter") \
     NB_NODE(bfd, "bfd") \
     NB_NODE(static_mac_binding, "static_mac_binding") \
-    NB_NODE(chassis_template_var, "chassis_template_var")
+    NB_NODE(chassis_template_var, "chassis_template_var") \
+    NB_NODE(logical_router_static_route, "logical_router_static_route")
 
     enum nb_engine_node {
 #define NB_NODE(NAME, NAME_STR) NB_##NAME,
@@ -101,7 +102,8 @@  static unixctl_cb_func chassis_features_list;
     SB_NODE(fdb, "fdb") \
     SB_NODE(static_mac_binding, "static_mac_binding") \
     SB_NODE(chassis_template_var, "chassis_template_var") \
-    SB_NODE(logical_dp_group, "logical_dp_group")
+    SB_NODE(logical_dp_group, "logical_dp_group") \
+    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
 
 enum sb_engine_node {
 #define SB_NODE(NAME, NAME_STR) SB_##NAME,
@@ -180,6 +182,7 @@  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_add_input(&en_northd, &en_nb_mirror, NULL);
     engine_add_input(&en_northd, &en_nb_static_mac_binding, NULL);
     engine_add_input(&en_northd, &en_nb_chassis_template_var, NULL);
+    engine_add_input(&en_northd, &en_nb_logical_router_static_route, NULL);
 
     engine_add_input(&en_northd, &en_sb_chassis, NULL);
     engine_add_input(&en_northd, &en_sb_mirror, NULL);
@@ -192,6 +195,7 @@  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
     engine_add_input(&en_northd, &en_sb_fdb, NULL);
     engine_add_input(&en_northd, &en_sb_static_mac_binding, NULL);
     engine_add_input(&en_northd, &en_sb_chassis_template_var, NULL);
+    engine_add_input(&en_northd, &en_sb_ecmp_nexthop, NULL);
     engine_add_input(&en_northd, &en_global_config,
                      northd_global_config_handler);
 
diff --git a/northd/northd.c b/northd/northd.c
index 1839b7d8b..7b8f442e1 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -16655,6 +16655,76 @@  sync_mirrors(struct ovsdb_idl_txn *ovnsb_txn,
     shash_destroy(&sb_mirrors);
 }
 
+struct sb_ecmp_nexthop_entry {
+    struct hmap_node hmap_node;
+    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+};
+
+static struct sb_ecmp_nexthop_entry *
+sb_ecmp_nexthop_lookup(const struct hmap *map, const char *nexthop)
+{
+    uint32_t hash = hash_string(nexthop, 0);
+    struct sb_ecmp_nexthop_entry *enh_e;
+
+    HMAP_FOR_EACH_WITH_HASH (enh_e, hmap_node, hash, map) {
+        if (!strcmp(enh_e->sb_ecmp_nexthop->nexthop, nexthop)) {
+            return enh_e;
+        }
+    }
+    return NULL;
+}
+
+#define NEXTHOP_IDS_LEN	65535
+static void
+sync_ecmp_symmetric_reply_nexthop(struct ovsdb_idl_txn *ovnsb_txn,
+        const struct nbrec_logical_router_static_route_table *nbrec_sr_table,
+        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table)
+{
+    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
+    struct hmap sb_only = HMAP_INITIALIZER(&sb_only);
+    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
+    struct sb_ecmp_nexthop_entry *enh_e;
+
+    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop, sbrec_ecmp_nh_table) {
+        uint32_t hash = hash_string(sb_ecmp_nexthop->nexthop, 0);
+        enh_e = xmalloc(sizeof *enh_e);
+        enh_e->sb_ecmp_nexthop = sb_ecmp_nexthop;
+        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
+        hmap_insert(&sb_only, &enh_e->hmap_node, hash);
+    }
+
+    const struct nbrec_logical_router_static_route *r;
+    NBREC_LOGICAL_ROUTER_STATIC_ROUTE_TABLE_FOR_EACH (r, nbrec_sr_table) {
+        if (!smap_get_bool(&r->options, "ecmp_symmetric_reply", false)) {
+            continue;
+        }
+
+        enh_e = sb_ecmp_nexthop_lookup(&sb_only, r->nexthop);
+        if (!enh_e) {
+            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
+            if (id == NEXTHOP_IDS_LEN) {
+                continue;
+            }
+            bitmap_set1(nexthop_ids, id);
+
+            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
+            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
+            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
+        } else {
+            hmap_remove(&sb_only, &enh_e->hmap_node);
+            free(enh_e);
+        }
+    }
+
+    HMAP_FOR_EACH_POP (enh_e, hmap_node, &sb_only) {
+        sbrec_ecmp_nexthop_delete(enh_e->sb_ecmp_nexthop);
+        free(enh_e);
+    }
+    hmap_destroy(&sb_only);
+
+    bitmap_free(nexthop_ids);
+}
+
 /*
  * struct 'dns_info' is used to sync the DNS records between OVN Northbound db
  * and Southbound db.
@@ -17335,6 +17405,9 @@  ovnnb_db_run(struct northd_input *input_data,
                      &data->ls_datapaths.datapaths);
     sync_template_vars(ovnsb_txn, input_data->nbrec_chassis_template_var_table,
                        input_data->sbrec_chassis_template_var_table);
+    sync_ecmp_symmetric_reply_nexthop(ovnsb_txn,
+                                      input_data->nbrec_static_route_table,
+                                      input_data->sbrec_ecmp_nh_table);
 
     cleanup_stale_fdb_entries(input_data->sbrec_fdb_table,
                               &data->ls_datapaths.datapaths);
diff --git a/northd/northd.h b/northd/northd.h
index 3f1cd8341..2d4bc9363 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -34,6 +34,8 @@  struct northd_input {
     const struct nbrec_chassis_template_var_table
         *nbrec_chassis_template_var_table;
     const struct nbrec_mirror_table *nbrec_mirror_table;
+    const struct nbrec_logical_router_static_route_table
+        *nbrec_static_route_table;
 
     /* Southbound table references */
     const struct sbrec_datapath_binding_table *sbrec_datapath_binding_table;
@@ -50,6 +52,7 @@  struct northd_input {
     const struct sbrec_chassis_template_var_table
         *sbrec_chassis_template_var_table;
     const struct sbrec_mirror_table *sbrec_mirror_table;
+    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nh_table;
 
     /* Northd lb data node inputs*/
     const struct hmap *lbs;
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index 84ae09515..c7a1cbf59 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@ 
 {
     "name": "OVN_Southbound",
-    "version": "20.33.0",
-    "cksum": "4076371179 31328",
+    "version": "20.34.0",
+    "cksum": "2226521861 31989",
     "tables": {
         "SB_Global": {
             "columns": {
@@ -607,6 +607,20 @@ 
                                           "refTable": "Datapath_Binding"}}}},
             "indexes": [["logical_port", "ip"]],
             "isRoot": true},
+        "ECMP_Nexthop": {
+            "columns": {
+                "nexthop": {"type": "string"},
+                "id": {"type": {"key": {"type": "integer",
+                                        "minInteger": 0,
+                                        "maxInteger": 65535}}},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}},
+                "options": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "indexes": [["nexthop"]],
+            "isRoot": true},
         "Chassis_Template_Var": {
             "columns": {
                 "chassis": {"type": "string"},
diff --git a/ovn-sb.xml b/ovn-sb.xml
index ac4e585f2..7e51411f4 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -5095,4 +5095,30 @@  tcp.flags = RST;
       The set of variable values for a given chassis.
     </column>
   </table>
+
+  <table name="ECMP_Nexthop">
+    <column name="nexthop">
+      <p>
+        Nexthop IP address for this route.  Nexthop IP address should be the IP
+        address of a connected router port or the IP address of a logical port
+        or can be set to <code>discard</code> for dropping packets which match
+        the given route.
+      </p>
+    </column>
+
+    <column name="id">
+      <p>
+        Nexthop unique indetifier. Nexthop ID is used to track active
+        ecmp-symmetric-reply connections and flush stale ones.
+      </p>
+    </column>
+
+    <column name="options">
+      Reserved for future use.
+    </column>
+
+    <column name="external_ids">
+      See <em>External IDs</em> at the beginning of this document.
+    </column>
+  </table>
 </database>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 89aed5adc..2160e8de7 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -6542,6 +6542,7 @@  check ovn-nbctl lsp-set-addresses public-lr0 router
 check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
 
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.10
+check_row_count ECMP_Nexthop 1
 
 ovn-sbctl dump-flows lr0 > lr0flows
 
@@ -6553,6 +6554,7 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | ovn_strip_lflows], [0], [dn
 ])
 
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.0.20
+check_row_count ECMP_Nexthop 2
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6589,6 +6591,7 @@  AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows | ovn_strip_lflows], [0], [
 
 # add ecmp route with wrong nexthop
 check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.168.1.20
+check_row_count ECMP_Nexthop 3
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | ovn_strip_lflows], [0], [dnl
@@ -6603,6 +6606,7 @@  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.
 
 check ovn-nbctl lr-route-del lr0
 wait_row_count nb:Logical_Router_Static_Route 0
+check_row_count ECMP_Nexthop 0
 
 check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
 ovn-sbctl dump-flows lr0 > lr0flows