@@ -8,6 +8,7 @@ Post-v2.14.0
after every DB compaction back to OS. Disabled by default.
* Maximum backlog on RAFT connections limited to 500 messages or 4GB.
Once threshold reached, connection is dropped (and re-established).
+ Use the 'cluster/set-backlog-threshold' command to change limits.
- DPDK:
* Removed support for vhost-user dequeue zero-copy.
- The environment variable OVS_UNBOUND_CONF, if set, is now used
@@ -381,6 +381,11 @@ This command must be executed on the leader. It initiates the change to the
cluster. To see if the change takes effect (committed), use
\fBcluster/status\fR to show the current setting. Once a change is committed,
it persists at server restarts.
+.IP "\fBcluster/set\-backlog\-threshold \fIdb\fR \fIn_msgs\fR \fIn_bytes\fR"
+Sets the backlog limits for \fIdb\fR's RAFT connections to a maximum of
+\fIn_msgs\fR messages or \fIn_bytes\fR bytes. If the backlog on one of the
+connections reaches the limit, it will be disconnected (and re-established).
+Values are checked only if the backlog contains more than 50 messages.
.
.so lib/vlog-unixctl.man
.so lib/memory-unixctl.man
@@ -305,6 +305,12 @@ struct raft {
bool ever_had_leader; /* There has been leader elected since the raft
is initialized, meaning it is ever
connected. */
+
+ /* Connection backlog limits. */
+#define DEFAULT_MAX_BACKLOG_N_MSGS 500
+#define DEFAULT_MAX_BACKLOG_N_BYTES UINT32_MAX
+ size_t conn_backlog_max_n_msgs; /* Number of messages. */
+ size_t conn_backlog_max_n_bytes; /* Number of bytes. */
};
/* All Raft structures. */
@@ -412,6 +418,9 @@ raft_alloc(void)
raft->election_timer = ELECTION_BASE_MSEC;
+ raft->conn_backlog_max_n_msgs = DEFAULT_MAX_BACKLOG_N_MSGS;
+ raft->conn_backlog_max_n_bytes = DEFAULT_MAX_BACKLOG_N_BYTES;
+
return raft;
}
@@ -925,9 +934,6 @@ raft_reset_ping_timer(struct raft *raft)
raft->ping_timeout = time_msec() + raft->election_timer / 3;
}
-#define RAFT_MAX_BACKLOG_N_MSGS 500
-#define RAFT_MAX_BACKLOG_BYTES UINT32_MAX
-
static void
raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
const struct uuid *sid, bool incoming)
@@ -943,8 +949,8 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
conn->incoming = incoming;
conn->js_seqno = jsonrpc_session_get_seqno(conn->js);
jsonrpc_session_set_probe_interval(js, 0);
- jsonrpc_session_set_backlog_threshold(js, RAFT_MAX_BACKLOG_N_MSGS,
- RAFT_MAX_BACKLOG_BYTES);
+ jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs,
+ raft->conn_backlog_max_n_bytes);
}
/* Starts the local server in an existing Raft cluster, using the local copy of
@@ -4727,6 +4733,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn,
unixctl_command_reply(conn, "change of election timer initiated.");
}
+static void
+raft_unixctl_set_backlog_threshold(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[],
+ void *aux OVS_UNUSED)
+{
+ const char *cluster_name = argv[1];
+ unsigned long long n_msgs, n_bytes;
+ struct raft_conn *r_conn;
+
+ struct raft *raft = raft_lookup_by_name(cluster_name);
+ if (!raft) {
+ unixctl_command_reply_error(conn, "unknown cluster");
+ return;
+ }
+
+ if (!str_to_ullong(argv[2], 10, &n_msgs)
+ || !str_to_ullong(argv[3], 10, &n_bytes)) {
+ unixctl_command_reply_error(conn, "invalid argument");
+ return;
+ }
+
+ if (n_msgs < 50 || n_msgs > SIZE_MAX || n_bytes > SIZE_MAX) {
+ unixctl_command_reply_error(conn, "values out of range");
+ return;
+ }
+
+ raft->conn_backlog_max_n_msgs = n_msgs;
+ raft->conn_backlog_max_n_bytes = n_bytes;
+
+ LIST_FOR_EACH (r_conn, list_node, &raft->conns) {
+ jsonrpc_session_set_backlog_threshold(r_conn->js, n_msgs, n_bytes);
+ }
+
+ unixctl_command_reply(conn, NULL);
+}
+
static void
raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
int argc OVS_UNUSED, const char *argv[],
@@ -4787,6 +4829,9 @@ raft_init(void)
raft_unixctl_kick, NULL);
unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2,
raft_unixctl_change_election_timer, NULL);
+ unixctl_command_register("cluster/set-backlog-threshold",
+ "DB N_MSGS N_BYTES", 3, 3,
+ raft_unixctl_set_backlog_threshold, NULL);
unixctl_command_register("cluster/failure-test", "FAILURE SCENARIO", 1, 1,
raft_unixctl_failure_test, NULL);
ovsthread_once_done(&once);
New appctl 'cluster/set-backlog-threshold' to configure thresholds on backlog of raft jsonrpc connections. Could be used, for example, in some extreme conditions where size of a database expected to be very large, i.e. comparable with default 4GB threshold. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> --- NEWS | 1 + ovsdb/ovsdb-server.1.in | 5 ++++ ovsdb/raft.c | 55 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 56 insertions(+), 5 deletions(-)