diff mbox series

[ovs-dev,2/2] Update the probe interval in main loop.

Message ID 20210917030538.9773-2-zhewang@nvidia.com
State Changes Requested
Delegated to: Han Zhou
Headers show
Series [ovs-dev,1/2] Revert "northd: Don't poll ovsdb before the connection is fully established" | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes fail github build: failed

Commit Message

Zhen Wang Sept. 17, 2021, 3:05 a.m. UTC
From: zhen wang <zhewang@nvidia.com>

When ovn-northd work in HA mode, ovn-northd will not update the probe
interval in standby mode. This patch address the problem by updating
the value in main loop.

Signed-off-by: zhen wang <zhewang@nvidia.com>
---
 northd/northd.c     | 25 -------------------------
 northd/ovn-northd.c | 29 +++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 25 deletions(-)

Comments

Han Zhou Sept. 17, 2021, 8:41 p.m. UTC | #1
On Thu, Sep 16, 2021 at 8:06 PM Zhen Wang <zhewang@nvidia.com> wrote:
>
> From: zhen wang <zhewang@nvidia.com>
>
> When ovn-northd work in HA mode, ovn-northd will not update the probe
> interval in standby mode. This patch address the problem by updating
> the value in main loop.
>

Thanks Zhen for the fix! Maybe the impact and steps to reproduce the
problem can be mentioned. It may be:
step1: power off the NB/SB leader
step2: kill the active northd (and the standbys would never take over)

Acked-by: Han Zhou <hzhou@ovn.org>

> Signed-off-by: zhen wang <zhewang@nvidia.com>
> ---
>  northd/northd.c     | 25 -------------------------
>  northd/ovn-northd.c | 29 +++++++++++++++++++++++++++++
>  2 files changed, 29 insertions(+), 25 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index b7e64470f..89b0e4921 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -72,10 +72,6 @@ static struct eth_addr svc_monitor_mac_ea;
>   * Otherwise, it will avoid using it.  The default is true. */
>  static bool use_ct_inv_match = true;
>
> -/* Default probe interval for NB and SB DB connections. */
> -#define DEFAULT_PROBE_INTERVAL_MSEC 5000
> -static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC;
> -static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC;
>  #define MAX_OVN_TAGS 4096
>
>  /* Pipeline stages. */
> @@ -14082,20 +14078,6 @@ build_meter_groups(struct northd_context *ctx,
>      }
>  }
>
> -static int
> -get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
> -{
> -    int default_interval = (db && !stream_or_pstream_needs_probes(db)
> -                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
> -    int interval = smap_get_int(&nb->options,
> -                                "northd_probe_interval",
default_interval);
> -
> -    if (interval > 0 && interval < 1000) {
> -        interval = 1000;
> -    }
> -    return interval;
> -}
> -
>  static void
>  ovnnb_db_run(struct northd_context *ctx,
>               struct ovsdb_idl_index *sbrec_chassis_by_name,
> @@ -14182,13 +14164,6 @@ ovnnb_db_run(struct northd_context *ctx,
>
>      smap_destroy(&options);
>
> -    /* Update the probe interval. */
> -    northd_probe_interval_nb = get_probe_interval(ctx->ovnnb_db, nb);
> -    northd_probe_interval_sb = get_probe_interval(ctx->ovnsb_db, nb);
> -
> -    ovsdb_idl_set_probe_interval(ctx->ovnnb_idl,
northd_probe_interval_nb);
> -    ovsdb_idl_set_probe_interval(ctx->ovnsb_idl,
northd_probe_interval_sb);
> -
>      use_parallel_build =
>          (smap_get_bool(&nb->options, "use_parallel_build", false) &&
>           can_parallelize_hashes(false));
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 6d4c5defc..0a9fd8190 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -65,6 +65,10 @@ static const char *ssl_private_key_file;
>  static const char *ssl_certificate_file;
>  static const char *ssl_ca_cert_file;
>
> +/* Default probe interval for NB and SB DB connections. */
> +#define DEFAULT_PROBE_INTERVAL_MSEC 5000
> +static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC;
> +static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC;
>  static bool use_parallel_build = true;
>  static struct hashrow_locks lflow_locks;
>
> @@ -577,6 +581,20 @@ update_ssl_config(void)
>      }
>  }
>
> +static int
> +get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
> +{
> +    int default_interval = (db && !stream_or_pstream_needs_probes(db)
> +                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
> +    int interval = smap_get_int(&nb->options,
> +                                "northd_probe_interval",
default_interval);
> +
> +    if (interval > 0 && interval < 1000) {
> +        interval = 1000;
> +    }
> +    return interval;
> +}
> +
>  int
>  main(int argc, char *argv[])
>  {
> @@ -911,6 +929,12 @@ main(int argc, char *argv[])
>
>      while (!exiting) {
>          update_ssl_config();
> +        const struct nbrec_nb_global *nb =
nbrec_nb_global_first(ovnnb_idl_loop.idl);
> +        /* Update the probe interval. */
> +        if (nb) {
> +            northd_probe_interval_nb = get_probe_interval(ovnnb_db, nb);
> +            northd_probe_interval_sb = get_probe_interval(ovnsb_db, nb);
> +        }
>          memory_run();
>          if (memory_should_report()) {
>              struct simap usage = SIMAP_INITIALIZER(&usage);
> @@ -1000,6 +1024,11 @@ main(int argc, char *argv[])
>              poll_immediate_wake();
>          }
>
> +        ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl,
> +                                     northd_probe_interval_nb);
> +        ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl,
> +                                     northd_probe_interval_sb);
> +
>          if (reset_ovnsb_idl_min_index) {
>              VLOG_INFO("Resetting southbound database cluster state");
>              ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl);
> --
> 2.20.1
>
Han Zhou Sept. 17, 2021, 9:51 p.m. UTC | #2
On Fri, Sep 17, 2021 at 1:41 PM Han Zhou <hzhou@ovn.org> wrote:
>
>
>
> On Thu, Sep 16, 2021 at 8:06 PM Zhen Wang <zhewang@nvidia.com> wrote:
> >
> > From: zhen wang <zhewang@nvidia.com>
> >
> > When ovn-northd work in HA mode, ovn-northd will not update the probe
> > interval in standby mode. This patch address the problem by updating
> > the value in main loop.
> >
>
> Thanks Zhen for the fix! Maybe the impact and steps to reproduce the
problem can be mentioned. It may be:
> step1: power off the NB/SB leader
> step2: kill the active northd (and the standbys would never take over)
>
> Acked-by: Han Zhou <hzhou@ovn.org>
>
> > Signed-off-by: zhen wang <zhewang@nvidia.com>
> > ---
> >  northd/northd.c     | 25 -------------------------
> >  northd/ovn-northd.c | 29 +++++++++++++++++++++++++++++
> >  2 files changed, 29 insertions(+), 25 deletions(-)
> >
> > diff --git a/northd/northd.c b/northd/northd.c
> > index b7e64470f..89b0e4921 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -72,10 +72,6 @@ static struct eth_addr svc_monitor_mac_ea;
> >   * Otherwise, it will avoid using it.  The default is true. */
> >  static bool use_ct_inv_match = true;
> >
> > -/* Default probe interval for NB and SB DB connections. */
> > -#define DEFAULT_PROBE_INTERVAL_MSEC 5000
> > -static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC;
> > -static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC;
> >  #define MAX_OVN_TAGS 4096
> >
> >  /* Pipeline stages. */
> > @@ -14082,20 +14078,6 @@ build_meter_groups(struct northd_context *ctx,
> >      }
> >  }
> >
> > -static int
> > -get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
> > -{
> > -    int default_interval = (db && !stream_or_pstream_needs_probes(db)
> > -                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
> > -    int interval = smap_get_int(&nb->options,
> > -                                "northd_probe_interval",
default_interval);
> > -
> > -    if (interval > 0 && interval < 1000) {
> > -        interval = 1000;
> > -    }
> > -    return interval;
> > -}
> > -
> >  static void
> >  ovnnb_db_run(struct northd_context *ctx,
> >               struct ovsdb_idl_index *sbrec_chassis_by_name,
> > @@ -14182,13 +14164,6 @@ ovnnb_db_run(struct northd_context *ctx,
> >
> >      smap_destroy(&options);
> >
> > -    /* Update the probe interval. */
> > -    northd_probe_interval_nb = get_probe_interval(ctx->ovnnb_db, nb);
> > -    northd_probe_interval_sb = get_probe_interval(ctx->ovnsb_db, nb);
> > -
> > -    ovsdb_idl_set_probe_interval(ctx->ovnnb_idl,
northd_probe_interval_nb);
> > -    ovsdb_idl_set_probe_interval(ctx->ovnsb_idl,
northd_probe_interval_sb);
> > -
> >      use_parallel_build =
> >          (smap_get_bool(&nb->options, "use_parallel_build", false) &&
> >           can_parallelize_hashes(false));
> > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> > index 6d4c5defc..0a9fd8190 100644
> > --- a/northd/ovn-northd.c
> > +++ b/northd/ovn-northd.c
> > @@ -65,6 +65,10 @@ static const char *ssl_private_key_file;
> >  static const char *ssl_certificate_file;
> >  static const char *ssl_ca_cert_file;
> >
> > +/* Default probe interval for NB and SB DB connections. */
> > +#define DEFAULT_PROBE_INTERVAL_MSEC 5000
> > +static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC;
> > +static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC;
> >  static bool use_parallel_build = true;
> >  static struct hashrow_locks lflow_locks;
> >
> > @@ -577,6 +581,20 @@ update_ssl_config(void)
> >      }
> >  }
> >
> > +static int
> > +get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
> > +{
> > +    int default_interval = (db && !stream_or_pstream_needs_probes(db)
> > +                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
> > +    int interval = smap_get_int(&nb->options,
> > +                                "northd_probe_interval",
default_interval);
> > +
> > +    if (interval > 0 && interval < 1000) {
> > +        interval = 1000;
> > +    }
> > +    return interval;
> > +}
> > +
> >  int
> >  main(int argc, char *argv[])
> >  {
> > @@ -911,6 +929,12 @@ main(int argc, char *argv[])
> >
> >      while (!exiting) {
> >          update_ssl_config();
> > +        const struct nbrec_nb_global *nb =
nbrec_nb_global_first(ovnnb_idl_loop.idl);
> > +        /* Update the probe interval. */
> > +        if (nb) {
> > +            northd_probe_interval_nb = get_probe_interval(ovnnb_db,
nb);
> > +            northd_probe_interval_sb = get_probe_interval(ovnsb_db,
nb);
> > +        }

Sorry that I forgot a minor comment here: it is better to move this down,
right before setting the probe intervals. Otherwise, since this is before
ovsdb_idl_loop_run(), it reads the config from the last iteration, and the
new setting (if updated) will be applied at the next iteration only.

> >          memory_run();
> >          if (memory_should_report()) {
> >              struct simap usage = SIMAP_INITIALIZER(&usage);
> > @@ -1000,6 +1024,11 @@ main(int argc, char *argv[])
> >              poll_immediate_wake();
> >          }
> >
> > +        ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl,
> > +                                     northd_probe_interval_nb);
> > +        ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl,
> > +                                     northd_probe_interval_sb);
> > +
> >          if (reset_ovnsb_idl_min_index) {
> >              VLOG_INFO("Resetting southbound database cluster state");
> >              ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl);
> > --
> > 2.20.1
> >
diff mbox series

Patch

diff --git a/northd/northd.c b/northd/northd.c
index b7e64470f..89b0e4921 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -72,10 +72,6 @@  static struct eth_addr svc_monitor_mac_ea;
  * Otherwise, it will avoid using it.  The default is true. */
 static bool use_ct_inv_match = true;
 
-/* Default probe interval for NB and SB DB connections. */
-#define DEFAULT_PROBE_INTERVAL_MSEC 5000
-static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC;
-static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC;
 #define MAX_OVN_TAGS 4096
 
 /* Pipeline stages. */
@@ -14082,20 +14078,6 @@  build_meter_groups(struct northd_context *ctx,
     }
 }
 
-static int
-get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
-{
-    int default_interval = (db && !stream_or_pstream_needs_probes(db)
-                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
-    int interval = smap_get_int(&nb->options,
-                                "northd_probe_interval", default_interval);
-
-    if (interval > 0 && interval < 1000) {
-        interval = 1000;
-    }
-    return interval;
-}
-
 static void
 ovnnb_db_run(struct northd_context *ctx,
              struct ovsdb_idl_index *sbrec_chassis_by_name,
@@ -14182,13 +14164,6 @@  ovnnb_db_run(struct northd_context *ctx,
 
     smap_destroy(&options);
 
-    /* Update the probe interval. */
-    northd_probe_interval_nb = get_probe_interval(ctx->ovnnb_db, nb);
-    northd_probe_interval_sb = get_probe_interval(ctx->ovnsb_db, nb);
-
-    ovsdb_idl_set_probe_interval(ctx->ovnnb_idl, northd_probe_interval_nb);
-    ovsdb_idl_set_probe_interval(ctx->ovnsb_idl, northd_probe_interval_sb);
-
     use_parallel_build =
         (smap_get_bool(&nb->options, "use_parallel_build", false) &&
          can_parallelize_hashes(false));
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 6d4c5defc..0a9fd8190 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -65,6 +65,10 @@  static const char *ssl_private_key_file;
 static const char *ssl_certificate_file;
 static const char *ssl_ca_cert_file;
 
+/* Default probe interval for NB and SB DB connections. */
+#define DEFAULT_PROBE_INTERVAL_MSEC 5000
+static int northd_probe_interval_nb = DEFAULT_PROBE_INTERVAL_MSEC;
+static int northd_probe_interval_sb = DEFAULT_PROBE_INTERVAL_MSEC;
 static bool use_parallel_build = true;
 static struct hashrow_locks lflow_locks;
 
@@ -577,6 +581,20 @@  update_ssl_config(void)
     }
 }
 
+static int
+get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
+{
+    int default_interval = (db && !stream_or_pstream_needs_probes(db)
+                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
+    int interval = smap_get_int(&nb->options,
+                                "northd_probe_interval", default_interval);
+
+    if (interval > 0 && interval < 1000) {
+        interval = 1000;
+    }
+    return interval;
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -911,6 +929,12 @@  main(int argc, char *argv[])
 
     while (!exiting) {
         update_ssl_config();
+        const struct nbrec_nb_global *nb = nbrec_nb_global_first(ovnnb_idl_loop.idl);
+        /* Update the probe interval. */
+        if (nb) {
+            northd_probe_interval_nb = get_probe_interval(ovnnb_db, nb);
+            northd_probe_interval_sb = get_probe_interval(ovnsb_db, nb);
+        }
         memory_run();
         if (memory_should_report()) {
             struct simap usage = SIMAP_INITIALIZER(&usage);
@@ -1000,6 +1024,11 @@  main(int argc, char *argv[])
             poll_immediate_wake();
         }
 
+        ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl,
+                                     northd_probe_interval_nb);
+        ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl,
+                                     northd_probe_interval_sb);
+
         if (reset_ovnsb_idl_min_index) {
             VLOG_INFO("Resetting southbound database cluster state");
             ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl);