[ovs-dev,v4] ovsdb replication: Provide option to configure probe interval.
diff mbox series

Message ID 20191220131352.341240-1-numans@ovn.org
State Changes Requested
Headers show
Series
  • [ovs-dev,v4] ovsdb replication: Provide option to configure probe interval.
Related show

Commit Message

Numan Siddique Dec. 20, 2019, 1:13 p.m. UTC
From: Numan Siddique <numans@ovn.org>

When ovsdb-server is in backup mode and connects to the active
ovsdb-server for replication, and if takes more than 5 seconds to
get the dump of the whole database, it will drop the connection
soon after as the default probe interval is 5 seconds. This
results in a snowball effect of reconnections to the active
ovsdb-server.

This patch handles or mitigates this issue by setting the
default probe interval value to 60 seconds and provide the option to
configure this value from the unixctl command.

Other option could be increase the value of 'RECONNECT_DEFAULT_PROBE_INTERVAL'
to a higher value.

Acked-by: Mark Michelson <mmichels@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>
---
v3 -> v4
-----
  * Addressed review comments from Ben


 ovsdb/ovsdb-server.1.in |  5 +++++
 ovsdb/ovsdb-server.c    | 49 +++++++++++++++++++++++++++++++++++------
 ovsdb/replication.c     | 12 +++++++++-
 ovsdb/replication.h     |  5 ++++-
 4 files changed, 62 insertions(+), 9 deletions(-)

Comments

Ben Pfaff Jan. 6, 2020, 10:41 p.m. UTC | #1
On Fri, Dec 20, 2019 at 06:43:52PM +0530, numans@ovn.org wrote:
> From: Numan Siddique <numans@ovn.org>
> 
> When ovsdb-server is in backup mode and connects to the active
> ovsdb-server for replication, and if takes more than 5 seconds to
> get the dump of the whole database, it will drop the connection
> soon after as the default probe interval is 5 seconds. This
> results in a snowball effect of reconnections to the active
> ovsdb-server.
> 
> This patch handles or mitigates this issue by setting the
> default probe interval value to 60 seconds and provide the option to
> configure this value from the unixctl command.
> 
> Other option could be increase the value of 'RECONNECT_DEFAULT_PROBE_INTERVAL'
> to a higher value.
> 
> Acked-by: Mark Michelson <mmichels@redhat.com>
> Signed-off-by: Numan Siddique <numans@ovn.org>
> Acked-by: Dumitru Ceara <dceara@redhat.com>

This patch really has two important effects.  One is making the interval
adjustable, the other is increasing the default.  I think that probably
both of these changes should get called out in NEWS.
Numan Siddique Jan. 7, 2020, 2:22 a.m. UTC | #2
On Tue, Jan 7, 2020 at 4:12 AM Ben Pfaff <blp@ovn.org> wrote:
>
> On Fri, Dec 20, 2019 at 06:43:52PM +0530, numans@ovn.org wrote:
> > From: Numan Siddique <numans@ovn.org>
> >
> > When ovsdb-server is in backup mode and connects to the active
> > ovsdb-server for replication, and if takes more than 5 seconds to
> > get the dump of the whole database, it will drop the connection
> > soon after as the default probe interval is 5 seconds. This
> > results in a snowball effect of reconnections to the active
> > ovsdb-server.
> >
> > This patch handles or mitigates this issue by setting the
> > default probe interval value to 60 seconds and provide the option to
> > configure this value from the unixctl command.
> >
> > Other option could be increase the value of 'RECONNECT_DEFAULT_PROBE_INTERVAL'
> > to a higher value.
> >
> > Acked-by: Mark Michelson <mmichels@redhat.com>
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > Acked-by: Dumitru Ceara <dceara@redhat.com>
>
> This patch really has two important effects.  One is making the interval
> adjustable, the other is increasing the default.  I think that probably
> both of these changes should get called out in NEWS.

Sure. I submitted v5 with the NEWS  entry -
https://patchwork.ozlabs.org/patch/1218539/

Thanks
Numan

> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>

Patch
diff mbox series

diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 21f527bc6..338f3bc29 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -288,6 +288,11 @@  Switches the server to an active role.  The server stops synchronizing
 its databases with an active server and closes all existing client
 connections, which requires clients to reconnect.
 .
+.IP "\fBovsdb\-server/set\-active\-ovsdb\-server\-probe\-interval \fIprobe interval"
+Sets  the probe interval (in milli seconds) for the connection to
+active \fIserver\fR.
+.
+.
 .IP "\fBovsdb\-server/set\-sync\-exclude\-tables \fIdb\fB:\fItable\fR[\fB,\fIdb\fB:\fItable\fR]..."
 Sets the \fItable\fR within \fIdb\fR that will be excluded from synchronization.
 This overrides the \fB\-\-sync\-exclude-tables\fR command-line option.
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 9827320ec..b6957d730 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -86,6 +86,7 @@  static unixctl_cb_func ovsdb_server_set_active_ovsdb_server;
 static unixctl_cb_func ovsdb_server_get_active_ovsdb_server;
 static unixctl_cb_func ovsdb_server_connect_active_ovsdb_server;
 static unixctl_cb_func ovsdb_server_disconnect_active_ovsdb_server;
+static unixctl_cb_func ovsdb_server_set_active_ovsdb_server_probe_interval;
 static unixctl_cb_func ovsdb_server_set_sync_exclude_tables;
 static unixctl_cb_func ovsdb_server_get_sync_exclude_tables;
 static unixctl_cb_func ovsdb_server_get_sync_status;
@@ -97,6 +98,7 @@  struct server_config {
     char **sync_from;
     char **sync_exclude;
     bool *is_backup;
+    int *replication_probe_interval;
     struct ovsdb_jsonrpc_server *jsonrpc;
 };
 static unixctl_cb_func ovsdb_server_add_remote;
@@ -144,9 +146,10 @@  static void load_config(FILE *config_file, struct sset *remotes,
 
 static void
 ovsdb_replication_init(const char *sync_from, const char *exclude,
-                       struct shash *all_dbs, const struct uuid *server_uuid)
+                       struct shash *all_dbs, const struct uuid *server_uuid,
+                       int probe_interval)
 {
-    replication_init(sync_from, exclude, server_uuid);
+    replication_init(sync_from, exclude, server_uuid, probe_interval);
     struct shash_node *node;
     SHASH_FOR_EACH (node, all_dbs) {
         struct db *db = node->data;
@@ -304,6 +307,7 @@  main(int argc, char *argv[])
     struct server_config server_config;
     struct shash all_dbs;
     struct shash_node *node, *next;
+    int replication_probe_interval = REPLICATION_DEFAULT_PROBE_INTERVAL;
 
     ovs_cmdl_proctitle_init(argc, argv);
     set_program_name(argv[0]);
@@ -351,6 +355,7 @@  main(int argc, char *argv[])
     server_config.sync_from = &sync_from;
     server_config.sync_exclude = &sync_exclude;
     server_config.is_backup = &is_backup;
+    server_config.replication_probe_interval = &replication_probe_interval;
 
     perf_counters_init();
 
@@ -436,6 +441,9 @@  main(int argc, char *argv[])
     unixctl_command_register("ovsdb-server/disconnect-active-ovsdb-server", "",
                              0, 0, ovsdb_server_disconnect_active_ovsdb_server,
                              &server_config);
+    unixctl_command_register(
+        "ovsdb-server/set-active-ovsdb-server-probe-interval", "", 1, 1,
+        ovsdb_server_set_active_ovsdb_server_probe_interval, &server_config);
     unixctl_command_register("ovsdb-server/set-sync-exclude-tables", "",
                              0, 1, ovsdb_server_set_sync_exclude_tables,
                              &server_config);
@@ -454,7 +462,8 @@  main(int argc, char *argv[])
     if (is_backup) {
         const struct uuid *server_uuid;
         server_uuid = ovsdb_jsonrpc_server_get_uuid(jsonrpc);
-        ovsdb_replication_init(sync_from, sync_exclude, &all_dbs, server_uuid);
+        ovsdb_replication_init(sync_from, sync_exclude, &all_dbs, server_uuid,
+                               replication_probe_interval);
     }
 
     main_loop(&server_config, jsonrpc, &all_dbs, unixctl, &remotes,
@@ -1317,7 +1326,8 @@  ovsdb_server_connect_active_ovsdb_server(struct unixctl_conn *conn,
         const struct uuid *server_uuid;
         server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc);
         ovsdb_replication_init(*config->sync_from, *config->sync_exclude,
-                               config->all_dbs, server_uuid);
+                               config->all_dbs, server_uuid,
+                               *config->replication_probe_interval);
         if (!*config->is_backup) {
             *config->is_backup = true;
             save_config(config);
@@ -1340,6 +1350,28 @@  ovsdb_server_disconnect_active_ovsdb_server(struct unixctl_conn *conn,
     unixctl_command_reply(conn, NULL);
 }
 
+static void
+ovsdb_server_set_active_ovsdb_server_probe_interval(struct unixctl_conn *conn,
+                                                   int argc OVS_UNUSED,
+                                                   const char *argv[],
+                                                   void *config_)
+{
+    struct server_config *config = config_;
+
+    int probe_interval;
+    if (str_to_int(argv[1], 10, &probe_interval)) {
+        *config->replication_probe_interval = probe_interval;
+        save_config(config);
+        if (*config->is_backup) {
+            replication_set_probe_interval(probe_interval);
+        }
+        unixctl_command_reply(conn, NULL);
+    } else {
+        unixctl_command_reply(
+            conn, "Invalid probe interval, integer value expected");
+    }
+}
+
 static void
 ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn,
                                      int argc OVS_UNUSED,
@@ -1357,7 +1389,8 @@  ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn,
             const struct uuid *server_uuid;
             server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc);
             ovsdb_replication_init(*config->sync_from, *config->sync_exclude,
-                                   config->all_dbs, server_uuid);
+                                   config->all_dbs, server_uuid,
+                                   *config->replication_probe_interval);
         }
         err = set_blacklist_tables(argv[1], false);
     }
@@ -1568,7 +1601,8 @@  ovsdb_server_add_database(struct unixctl_conn *conn, int argc OVS_UNUSED,
             const struct uuid *server_uuid;
             server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc);
             ovsdb_replication_init(*config->sync_from, *config->sync_exclude,
-                                   config->all_dbs, server_uuid);
+                                   config->all_dbs, server_uuid,
+                                   *config->replication_probe_interval);
         }
         unixctl_command_reply(conn, NULL);
     } else {
@@ -1590,7 +1624,8 @@  remove_db(struct server_config *config, struct shash_node *node, char *comment)
         const struct uuid *server_uuid;
         server_uuid = ovsdb_jsonrpc_server_get_uuid(config->jsonrpc);
         ovsdb_replication_init(*config->sync_from, *config->sync_exclude,
-                               config->all_dbs, server_uuid);
+                               config->all_dbs, server_uuid,
+                               *config->replication_probe_interval);
     }
 }
 
diff --git a/ovsdb/replication.c b/ovsdb/replication.c
index 42e27cba0..cbbce64df 100644
--- a/ovsdb/replication.c
+++ b/ovsdb/replication.c
@@ -125,7 +125,7 @@  static struct replication_db *find_db(const char *db_name);
 
 void
 replication_init(const char *sync_from_, const char *exclude_tables,
-                 const struct uuid *server)
+                 const struct uuid *server, int probe_interval)
 {
     free(sync_from);
     sync_from = xstrdup(sync_from_);
@@ -143,6 +143,8 @@  replication_init(const char *sync_from_, const char *exclude_tables,
     session = jsonrpc_session_open(sync_from, true);
     session_seqno = UINT_MAX;
 
+    jsonrpc_session_set_probe_interval(session, probe_interval);
+
     /* Keep a copy of local server uuid.  */
     server_uuid = *server;
 
@@ -979,6 +981,14 @@  is_replication_possible(struct ovsdb_schema *local_db_schema,
     return true;
 }
 
+void
+replication_set_probe_interval(int probe_interval)
+{
+    if (session) {
+        jsonrpc_session_set_probe_interval(session, probe_interval);
+    }
+}
+
 void
 replication_usage(void)
 {
diff --git a/ovsdb/replication.h b/ovsdb/replication.h
index 1f9c32fa7..c45f33e26 100644
--- a/ovsdb/replication.h
+++ b/ovsdb/replication.h
@@ -44,8 +44,10 @@  struct ovsdb;
  *    used mainly by uinxctl commands.
  */
 
+#define REPLICATION_DEFAULT_PROBE_INTERVAL 60000
+
 void replication_init(const char *sync_from, const char *exclude_tables,
-                      const struct uuid *server);
+                      const struct uuid *server, int probe_interval);
 void replication_run(void);
 void replication_wait(void);
 void replication_destroy(void);
@@ -54,6 +56,7 @@  void replication_add_local_db(const char *databse, struct ovsdb *db);
 bool replication_is_alive(void);
 int replication_get_last_error(void);
 char *replication_status(void);
+void replication_set_probe_interval(int);
 
 char *set_blacklist_tables(const char *blacklist, bool dryrun)
     OVS_WARN_UNUSED_RESULT;