@@ -19,6 +19,8 @@ ovsdb\-server \- Open vSwitch database server
[\fB\-\-sync\-from=\fIserver\fR]
[\fB\-\-sync\-exclude-tables=\fIdb\fB:\fItable\fR[\fB,\fIdb\fB:\fItable\fR]...\fR]
[\fB\-\-active\fR]
+.IP "Clustered mode options:"
+[\fB\-\-leader\-timeout=\fItimeout\fR]
.so lib/ssl-syn.man
.so lib/ssl-bootstrap-syn.man
.so lib/ssl-peer-ca-cert-syn.man
@@ -171,6 +173,16 @@ allow the syncing options to be specified using command line options,
yet start the server, as the default, active server. To switch the
running server to backup mode, use \fBovs-appctl(1)\fR to execute the
\fBovsdb\-server/connect\-active\-ovsdb\-server\fR command.
+.SS "Clustered Mode Options"
+These options apply only to databases working in clustered mode:
+.TP
+\fB\-\-leader\-timeout=\fIserver\fR
+Set the cluster leader election timeout value in miniseconds. This option
+should be set to the same value across all nodes in the cluster. Leader
+re-election will be triggered if a follower haven't heard from current leader
+within this interval. The default timeout is one second. Increasing this
+value reduces the chance of leader re-election during transient overload
+situations but increases the delay of reacting to real failures, too.
.SS "Public Key Infrastructure Options"
The options described below for configuring the SSL public key
infrastructure accept a special syntax for obtaining their
@@ -97,6 +97,7 @@ struct server_config {
char **sync_from;
char **sync_exclude;
bool *is_backup;
+ unsigned int *leader_timeout;
struct ovsdb_jsonrpc_server *jsonrpc;
};
static unixctl_cb_func ovsdb_server_add_remote;
@@ -119,7 +120,7 @@ static void parse_options(int argc, char *argvp[],
struct sset *db_filenames, struct sset *remotes,
char **unixctl_pathp, char **run_command,
char **sync_from, char **sync_exclude,
- bool *is_backup);
+ bool *is_backup, unsigned int *leader_timeout);
OVS_NO_RETURN static void usage(void);
static char *reconfigure_remotes(struct ovsdb_jsonrpc_server *,
@@ -312,8 +313,10 @@ main(int argc, char *argv[])
process_init();
bool active = false;
+ unsigned int leader_timeout = 0;
parse_options(argc, argv, &db_filenames, &remotes, &unixctl_path,
- &run_command, &sync_from, &sync_exclude, &active);
+ &run_command, &sync_from, &sync_exclude, &active,
+ &leader_timeout);
is_backup = sync_from && !active;
daemon_become_new_user(false);
@@ -351,6 +354,7 @@ main(int argc, char *argv[])
server_config.sync_from = &sync_from;
server_config.sync_exclude = &sync_exclude;
server_config.is_backup = &is_backup;
+ server_config.leader_timeout = &leader_timeout;
perf_counters_init();
@@ -639,7 +643,8 @@ open_db(struct server_config *config, const char *filename)
struct ovsdb_storage *storage;
struct ovsdb_error *error;
- error = ovsdb_storage_open(filename, true, &storage);
+ error = ovsdb_storage_open(filename, true, *config->leader_timeout,
+ &storage);
if (error) {
return error;
}
@@ -1664,7 +1669,8 @@ static void
parse_options(int argc, char *argv[],
struct sset *db_filenames, struct sset *remotes,
char **unixctl_pathp, char **run_command,
- char **sync_from, char **sync_exclude, bool *active)
+ char **sync_from, char **sync_exclude, bool *active,
+ unsigned int *leader_timeout)
{
enum {
OPT_REMOTE = UCHAR_MAX + 1,
@@ -1675,6 +1681,7 @@ parse_options(int argc, char *argv[],
OPT_SYNC_FROM,
OPT_SYNC_EXCLUDE,
OPT_ACTIVE,
+ OPT_LEADER_TIMEOUT,
OPT_NO_DBS,
VLOG_OPTION_ENUMS,
DAEMON_OPTION_ENUMS,
@@ -1697,6 +1704,7 @@ parse_options(int argc, char *argv[],
{"sync-from", required_argument, NULL, OPT_SYNC_FROM},
{"sync-exclude-tables", required_argument, NULL, OPT_SYNC_EXCLUDE},
{"active", no_argument, NULL, OPT_ACTIVE},
+ {"leader-timeout", required_argument, NULL, OPT_LEADER_TIMEOUT},
{"no-dbs", no_argument, NULL, OPT_NO_DBS},
{NULL, 0, NULL, 0},
};
@@ -1784,6 +1792,12 @@ parse_options(int argc, char *argv[],
*active = true;
break;
+ case OPT_LEADER_TIMEOUT:
+ if (!str_to_uint(optarg, 10, leader_timeout)) {
+ ovs_fatal(0, "leader-timeout must be a non-negative integer.");
+ }
+ break;
+
case OPT_NO_DBS:
add_default_db = false;
break;
@@ -1822,6 +1836,7 @@ usage(void)
daemon_usage();
vlog_usage();
replication_usage();
+ storage_usage();
printf("\nOther options:\n"
" --run COMMAND run COMMAND as subprocess then exit\n"
" --unixctl=SOCKET override default control socket name\n"
@@ -238,8 +238,11 @@ struct raft {
uint64_t last_applied; /* Max log index applied to state machine. */
struct uuid leader_sid; /* Server ID of leader (zero, if unknown). */
+#define ELECTION_DEFAULT 1024 /* Default value of election_interval. */
+ unsigned int election_interval; /* election_interval +
+ random(ELECTION_RANGE_MSEC) is the time
+ to wait before starting an election. */
/* Followers and candidates only. */
-#define ELECTION_BASE_MSEC 1024
#define ELECTION_RANGE_MSEC 1024
long long int election_base; /* Time of last heartbeat from leader. */
long long int election_timeout; /* Time at which we start an election. */
@@ -269,7 +272,6 @@ struct raft {
struct hmap add_servers; /* Contains "struct raft_server"s to add. */
struct raft_server *remove_server; /* Server being removed. */
struct hmap commands; /* Contains "struct raft_command"s. */
-#define PING_TIME_MSEC (ELECTION_BASE_MSEC / 3)
long long int ping_timeout; /* Time at which to send a heartbeat */
/* Candidates only. Reinitialized at start of election. */
@@ -360,7 +362,7 @@ raft_make_address_passive(const char *address_)
}
static struct raft *
-raft_alloc(void)
+raft_alloc(unsigned int election_interval)
{
raft_init();
@@ -377,6 +379,8 @@ raft_alloc(void)
hmap_init(&raft->add_servers);
hmap_init(&raft->commands);
+ raft->election_interval = election_interval ? election_interval :
+ ELECTION_DEFAULT;
raft_reset_ping_timer(raft);
raft_reset_election_timer(raft);
@@ -541,7 +545,7 @@ raft_join_cluster(const char *file_name,
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
raft_read_metadata(struct ovsdb_log *log, struct raft_metadata *md)
{
- struct raft *raft = raft_alloc();
+ struct raft *raft = raft_alloc(0);
raft->log = log;
struct ovsdb_error *error = raft_read_header(raft);
@@ -868,7 +872,7 @@ raft_read_log(struct raft *raft)
static void
raft_reset_election_timer(struct raft *raft)
{
- unsigned int duration = (ELECTION_BASE_MSEC
+ unsigned int duration = (raft->election_interval
+ random_range(ELECTION_RANGE_MSEC));
raft->election_base = time_msec();
raft->election_timeout = raft->election_base + duration;
@@ -877,7 +881,7 @@ raft_reset_election_timer(struct raft *raft)
static void
raft_reset_ping_timer(struct raft *raft)
{
- raft->ping_timeout = time_msec() + PING_TIME_MSEC;
+ raft->ping_timeout = time_msec() + raft->election_interval / 3;
}
static void
@@ -900,9 +904,10 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
* the cluster's log in 'file_name'. Takes ownership of 'log', whether
* successful or not. */
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
-raft_open(struct ovsdb_log *log, struct raft **raftp)
+raft_open(struct ovsdb_log *log, unsigned int election_interval,
+ struct raft **raftp)
{
- struct raft *raft = raft_alloc();
+ struct raft *raft = raft_alloc(election_interval);
raft->log = log;
struct ovsdb_error *error = raft_read_header(raft);
@@ -1113,7 +1118,7 @@ raft_send_remove_server_requests(struct raft *raft)
}
}
- raft->leave_timeout = time_msec() + ELECTION_BASE_MSEC;
+ raft->leave_timeout = time_msec() + raft->election_interval;
}
/* Attempts to start 'raft' leaving its cluster. The caller can check progress
@@ -1130,7 +1135,7 @@ raft_leave(struct raft *raft)
raft_transfer_leadership(raft, "this server is leaving the cluster");
raft_become_follower(raft);
raft_send_remove_server_requests(raft);
- raft->leave_timeout = time_msec() + ELECTION_BASE_MSEC;
+ raft->leave_timeout = time_msec() + raft->election_interval;
}
/* Returns true if 'raft' is currently attempting to leave its cluster. */
@@ -1785,7 +1790,7 @@ raft_run(struct raft *raft)
struct raft_command *cmd, *next_cmd;
HMAP_FOR_EACH_SAFE (cmd, next_cmd, hmap_node, &raft->commands) {
if (cmd->timestamp
- && now - cmd->timestamp > ELECTION_BASE_MSEC) {
+ && now - cmd->timestamp > raft->election_interval) {
raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT);
}
}
@@ -3231,10 +3236,10 @@ raft_should_suppress_disruptive_server(struct raft *raft,
return true;
case RAFT_FOLLOWER:
- if (now < raft->election_base + ELECTION_BASE_MSEC) {
+ if (now < raft->election_base + raft->election_interval) {
VLOG_WARN_RL(&rl, "ignoring vote request received after only "
"%lld ms (minimum election time is %d ms)",
- now - raft->election_base, ELECTION_BASE_MSEC);
+ now - raft->election_base, raft->election_interval);
return true;
}
return false;
@@ -100,7 +100,8 @@ struct ovsdb_error *raft_read_metadata(struct ovsdb_log *,
void raft_metadata_destroy(struct raft_metadata *);
/* Starting up or shutting down a server within a cluster. */
-struct ovsdb_error *raft_open(struct ovsdb_log *, struct raft **)
+struct ovsdb_error *raft_open(struct ovsdb_log *,
+ unsigned int election_interval, struct raft **)
OVS_WARN_UNUSED_RESULT;
void raft_close(struct raft *);
@@ -58,6 +58,7 @@ static void schedule_next_snapshot(struct ovsdb_storage *, bool quick);
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
ovsdb_storage_open__(const char *filename, bool rw, bool allow_clustered,
+ unsigned int leader_timeout,
struct ovsdb_storage **storagep)
{
*storagep = NULL;
@@ -78,7 +79,7 @@ ovsdb_storage_open__(const char *filename, bool rw, bool allow_clustered,
return ovsdb_error(NULL, "%s: cannot apply this operation to "
"clustered database file", filename);
}
- error = raft_open(log, &raft);
+ error = raft_open(log, leader_timeout, &raft);
log = NULL;
if (error) {
return error;
@@ -101,10 +102,10 @@ ovsdb_storage_open__(const char *filename, bool rw, bool allow_clustered,
* The returned storage might be clustered or standalone, depending on what the
* disk file contains. */
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
-ovsdb_storage_open(const char *filename, bool rw,
+ovsdb_storage_open(const char *filename, bool rw, unsigned int leader_timeout,
struct ovsdb_storage **storagep)
{
- return ovsdb_storage_open__(filename, rw, true, storagep);
+ return ovsdb_storage_open__(filename, rw, true, leader_timeout, storagep);
}
struct ovsdb_storage *
@@ -112,7 +113,7 @@ ovsdb_storage_open_standalone(const char *filename, bool rw)
{
struct ovsdb_storage *storage;
struct ovsdb_error *error = ovsdb_storage_open__(filename, rw, false,
- &storage);
+ 0, &storage);
if (error) {
ovs_fatal(0, "%s", ovsdb_error_to_string_free(error));
}
@@ -610,3 +611,12 @@ ovsdb_storage_peek_last_eid(struct ovsdb_storage *storage)
}
return raft_current_eid(storage->raft);
}
+
+void
+storage_usage(void)
+{
+ printf("\n\
+Clustered mode options:\n\
+ --leader-timeout=TIMEOUT\n\
+ set cluster leader election timeout in ms.\n");
+}
@@ -26,6 +26,7 @@ struct ovsdb_storage;
struct uuid;
struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw,
+ unsigned int leader_timeout,
struct ovsdb_storage **)
OVS_WARN_UNUSED_RESULT;
struct ovsdb_storage *ovsdb_storage_create_unbacked(void);
@@ -93,4 +94,5 @@ struct ovsdb_schema *ovsdb_storage_read_schema(struct ovsdb_storage *);
const struct uuid *ovsdb_storage_peek_last_eid(struct ovsdb_storage *);
+void storage_usage(void);
#endif /* ovsdb/storage.h */