Message ID | 20210917030538.9773-2-zhewang@nvidia.com |
---|---|
State | Changes Requested |
Delegated to: | Han Zhou |
Headers | show |
Series | [ovs-dev,1/2] Revert "northd: Don't poll ovsdb before the connection is fully established" | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | warning | apply and check: warning |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/github-robot-_ovn-kubernetes | fail | github build: failed |
On Thu, Sep 16, 2021 at 8:06 PM Zhen Wang <zhewang@nvidia.com> wrote: > > From: zhen wang <zhewang@nvidia.com> > > When ovn-northd work in HA mode, ovn-northd will not update the probe > interval in standby mode. This patch address the problem by updating > the value in main loop. > Thanks Zhen for the fix! Maybe the impact and steps to reproduce the problem can be mentioned. It may be: step1: power off the NB/SB leader step2: kill the active northd (and the standbys would never take over) Acked-by: Han Zhou <hzhou@ovn.org> > Signed-off-by: zhen wang <zhewang@nvidia.com> > --- > northd/northd.c | 25 ------------------------- > northd/ovn-northd.c | 29 +++++++++++++++++++++++++++++ > 2 files changed, 29 insertions(+), 25 deletions(-) > > diff --git a/northd/northd.c b/northd/northd.c > index b7e64470f..89b0e4921 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -72,10 +72,6 @@ static struct eth_addr svc_monitor_mac_ea; > * Otherwise, it will avoid using it. The default is true. */ > static bool use_ct_inv_match = true; > > -/* Default probe interval for NB and SB DB connections. */ > -#define DEFAULT_PROBE_INTERVAL_MSEC 5000 > -static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC; > -static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC; > #define MAX_OVN_TAGS 4096 > > /* Pipeline stages. */ > @@ -14082,20 +14078,6 @@ build_meter_groups(struct northd_context *ctx, > } > } > > -static int > -get_probe_interval(const char *db, const struct nbrec_nb_global *nb) > -{ > - int default_interval = (db && !stream_or_pstream_needs_probes(db) > - ? 0 : DEFAULT_PROBE_INTERVAL_MSEC); > - int interval = smap_get_int(&nb->options, > - "northd_probe_interval", default_interval); > - > - if (interval > 0 && interval < 1000) { > - interval = 1000; > - } > - return interval; > -} > - > static void > ovnnb_db_run(struct northd_context *ctx, > struct ovsdb_idl_index *sbrec_chassis_by_name, > @@ -14182,13 +14164,6 @@ ovnnb_db_run(struct northd_context *ctx, > > smap_destroy(&options); > > - /* Update the probe interval. */ > - northd_probe_interval_nb = get_probe_interval(ctx->ovnnb_db, nb); > - northd_probe_interval_sb = get_probe_interval(ctx->ovnsb_db, nb); > - > - ovsdb_idl_set_probe_interval(ctx->ovnnb_idl, northd_probe_interval_nb); > - ovsdb_idl_set_probe_interval(ctx->ovnsb_idl, northd_probe_interval_sb); > - > use_parallel_build = > (smap_get_bool(&nb->options, "use_parallel_build", false) && > can_parallelize_hashes(false)); > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index 6d4c5defc..0a9fd8190 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -65,6 +65,10 @@ static const char *ssl_private_key_file; > static const char *ssl_certificate_file; > static const char *ssl_ca_cert_file; > > +/* Default probe interval for NB and SB DB connections. */ > +#define DEFAULT_PROBE_INTERVAL_MSEC 5000 > +static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC; > +static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC; > static bool use_parallel_build = true; > static struct hashrow_locks lflow_locks; > > @@ -577,6 +581,20 @@ update_ssl_config(void) > } > } > > +static int > +get_probe_interval(const char *db, const struct nbrec_nb_global *nb) > +{ > + int default_interval = (db && !stream_or_pstream_needs_probes(db) > + ? 0 : DEFAULT_PROBE_INTERVAL_MSEC); > + int interval = smap_get_int(&nb->options, > + "northd_probe_interval", default_interval); > + > + if (interval > 0 && interval < 1000) { > + interval = 1000; > + } > + return interval; > +} > + > int > main(int argc, char *argv[]) > { > @@ -911,6 +929,12 @@ main(int argc, char *argv[]) > > while (!exiting) { > update_ssl_config(); > + const struct nbrec_nb_global *nb = nbrec_nb_global_first(ovnnb_idl_loop.idl); > + /* Update the probe interval. */ > + if (nb) { > + northd_probe_interval_nb = get_probe_interval(ovnnb_db, nb); > + northd_probe_interval_sb = get_probe_interval(ovnsb_db, nb); > + } > memory_run(); > if (memory_should_report()) { > struct simap usage = SIMAP_INITIALIZER(&usage); > @@ -1000,6 +1024,11 @@ main(int argc, char *argv[]) > poll_immediate_wake(); > } > > + ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl, > + northd_probe_interval_nb); > + ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl, > + northd_probe_interval_sb); > + > if (reset_ovnsb_idl_min_index) { > VLOG_INFO("Resetting southbound database cluster state"); > ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl); > -- > 2.20.1 >
On Fri, Sep 17, 2021 at 1:41 PM Han Zhou <hzhou@ovn.org> wrote: > > > > On Thu, Sep 16, 2021 at 8:06 PM Zhen Wang <zhewang@nvidia.com> wrote: > > > > From: zhen wang <zhewang@nvidia.com> > > > > When ovn-northd work in HA mode, ovn-northd will not update the probe > > interval in standby mode. This patch address the problem by updating > > the value in main loop. > > > > Thanks Zhen for the fix! Maybe the impact and steps to reproduce the problem can be mentioned. It may be: > step1: power off the NB/SB leader > step2: kill the active northd (and the standbys would never take over) > > Acked-by: Han Zhou <hzhou@ovn.org> > > > Signed-off-by: zhen wang <zhewang@nvidia.com> > > --- > > northd/northd.c | 25 ------------------------- > > northd/ovn-northd.c | 29 +++++++++++++++++++++++++++++ > > 2 files changed, 29 insertions(+), 25 deletions(-) > > > > diff --git a/northd/northd.c b/northd/northd.c > > index b7e64470f..89b0e4921 100644 > > --- a/northd/northd.c > > +++ b/northd/northd.c > > @@ -72,10 +72,6 @@ static struct eth_addr svc_monitor_mac_ea; > > * Otherwise, it will avoid using it. The default is true. */ > > static bool use_ct_inv_match = true; > > > > -/* Default probe interval for NB and SB DB connections. */ > > -#define DEFAULT_PROBE_INTERVAL_MSEC 5000 > > -static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC; > > -static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC; > > #define MAX_OVN_TAGS 4096 > > > > /* Pipeline stages. */ > > @@ -14082,20 +14078,6 @@ build_meter_groups(struct northd_context *ctx, > > } > > } > > > > -static int > > -get_probe_interval(const char *db, const struct nbrec_nb_global *nb) > > -{ > > - int default_interval = (db && !stream_or_pstream_needs_probes(db) > > - ? 0 : DEFAULT_PROBE_INTERVAL_MSEC); > > - int interval = smap_get_int(&nb->options, > > - "northd_probe_interval", default_interval); > > - > > - if (interval > 0 && interval < 1000) { > > - interval = 1000; > > - } > > - return interval; > > -} > > - > > static void > > ovnnb_db_run(struct northd_context *ctx, > > struct ovsdb_idl_index *sbrec_chassis_by_name, > > @@ -14182,13 +14164,6 @@ ovnnb_db_run(struct northd_context *ctx, > > > > smap_destroy(&options); > > > > - /* Update the probe interval. */ > > - northd_probe_interval_nb = get_probe_interval(ctx->ovnnb_db, nb); > > - northd_probe_interval_sb = get_probe_interval(ctx->ovnsb_db, nb); > > - > > - ovsdb_idl_set_probe_interval(ctx->ovnnb_idl, northd_probe_interval_nb); > > - ovsdb_idl_set_probe_interval(ctx->ovnsb_idl, northd_probe_interval_sb); > > - > > use_parallel_build = > > (smap_get_bool(&nb->options, "use_parallel_build", false) && > > can_parallelize_hashes(false)); > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > > index 6d4c5defc..0a9fd8190 100644 > > --- a/northd/ovn-northd.c > > +++ b/northd/ovn-northd.c > > @@ -65,6 +65,10 @@ static const char *ssl_private_key_file; > > static const char *ssl_certificate_file; > > static const char *ssl_ca_cert_file; > > > > +/* Default probe interval for NB and SB DB connections. */ > > +#define DEFAULT_PROBE_INTERVAL_MSEC 5000 > > +static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC; > > +static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC; > > static bool use_parallel_build = true; > > static struct hashrow_locks lflow_locks; > > > > @@ -577,6 +581,20 @@ update_ssl_config(void) > > } > > } > > > > +static int > > +get_probe_interval(const char *db, const struct nbrec_nb_global *nb) > > +{ > > + int default_interval = (db && !stream_or_pstream_needs_probes(db) > > + ? 0 : DEFAULT_PROBE_INTERVAL_MSEC); > > + int interval = smap_get_int(&nb->options, > > + "northd_probe_interval", default_interval); > > + > > + if (interval > 0 && interval < 1000) { > > + interval = 1000; > > + } > > + return interval; > > +} > > + > > int > > main(int argc, char *argv[]) > > { > > @@ -911,6 +929,12 @@ main(int argc, char *argv[]) > > > > while (!exiting) { > > update_ssl_config(); > > + const struct nbrec_nb_global *nb = nbrec_nb_global_first(ovnnb_idl_loop.idl); > > + /* Update the probe interval. */ > > + if (nb) { > > + northd_probe_interval_nb = get_probe_interval(ovnnb_db, nb); > > + northd_probe_interval_sb = get_probe_interval(ovnsb_db, nb); > > + } Sorry that I forgot a minor comment here: it is better to move this down, right before setting the probe intervals. Otherwise, since this is before ovsdb_idl_loop_run(), it reads the config from the last iteration, and the new setting (if updated) will be applied at the next iteration only. > > memory_run(); > > if (memory_should_report()) { > > struct simap usage = SIMAP_INITIALIZER(&usage); > > @@ -1000,6 +1024,11 @@ main(int argc, char *argv[]) > > poll_immediate_wake(); > > } > > > > + ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl, > > + northd_probe_interval_nb); > > + ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl, > > + northd_probe_interval_sb); > > + > > if (reset_ovnsb_idl_min_index) { > > VLOG_INFO("Resetting southbound database cluster state"); > > ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl); > > -- > > 2.20.1 > >
diff --git a/northd/northd.c b/northd/northd.c index b7e64470f..89b0e4921 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -72,10 +72,6 @@ static struct eth_addr svc_monitor_mac_ea; * Otherwise, it will avoid using it. The default is true. */ static bool use_ct_inv_match = true; -/* Default probe interval for NB and SB DB connections. */ -#define DEFAULT_PROBE_INTERVAL_MSEC 5000 -static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC; -static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC; #define MAX_OVN_TAGS 4096 /* Pipeline stages. */ @@ -14082,20 +14078,6 @@ build_meter_groups(struct northd_context *ctx, } } -static int -get_probe_interval(const char *db, const struct nbrec_nb_global *nb) -{ - int default_interval = (db && !stream_or_pstream_needs_probes(db) - ? 0 : DEFAULT_PROBE_INTERVAL_MSEC); - int interval = smap_get_int(&nb->options, - "northd_probe_interval", default_interval); - - if (interval > 0 && interval < 1000) { - interval = 1000; - } - return interval; -} - static void ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_index *sbrec_chassis_by_name, @@ -14182,13 +14164,6 @@ ovnnb_db_run(struct northd_context *ctx, smap_destroy(&options); - /* Update the probe interval. */ - northd_probe_interval_nb = get_probe_interval(ctx->ovnnb_db, nb); - northd_probe_interval_sb = get_probe_interval(ctx->ovnsb_db, nb); - - ovsdb_idl_set_probe_interval(ctx->ovnnb_idl, northd_probe_interval_nb); - ovsdb_idl_set_probe_interval(ctx->ovnsb_idl, northd_probe_interval_sb); - use_parallel_build = (smap_get_bool(&nb->options, "use_parallel_build", false) && can_parallelize_hashes(false)); diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 6d4c5defc..0a9fd8190 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -65,6 +65,10 @@ static const char *ssl_private_key_file; static const char *ssl_certificate_file; static const char *ssl_ca_cert_file; +/* Default probe interval for NB and SB DB connections. */ +#define DEFAULT_PROBE_INTERVAL_MSEC 5000 +static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC; +static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC; static bool use_parallel_build = true; static struct hashrow_locks lflow_locks; @@ -577,6 +581,20 @@ update_ssl_config(void) } } +static int +get_probe_interval(const char *db, const struct nbrec_nb_global *nb) +{ + int default_interval = (db && !stream_or_pstream_needs_probes(db) + ? 0 : DEFAULT_PROBE_INTERVAL_MSEC); + int interval = smap_get_int(&nb->options, + "northd_probe_interval", default_interval); + + if (interval > 0 && interval < 1000) { + interval = 1000; + } + return interval; +} + int main(int argc, char *argv[]) { @@ -911,6 +929,12 @@ main(int argc, char *argv[]) while (!exiting) { update_ssl_config(); + const struct nbrec_nb_global *nb = nbrec_nb_global_first(ovnnb_idl_loop.idl); + /* Update the probe interval. */ + if (nb) { + northd_probe_interval_nb = get_probe_interval(ovnnb_db, nb); + northd_probe_interval_sb = get_probe_interval(ovnsb_db, nb); + } memory_run(); if (memory_should_report()) { struct simap usage = SIMAP_INITIALIZER(&usage); @@ -1000,6 +1024,11 @@ main(int argc, char *argv[]) poll_immediate_wake(); } + ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl, + northd_probe_interval_nb); + ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl, + northd_probe_interval_sb); + if (reset_ovnsb_idl_min_index) { VLOG_INFO("Resetting southbound database cluster state"); ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl);