diff mbox series

[ovs-dev,4/7] raft: Avoid busy loop during leader election.

Message ID 1582942030-31096-4-git-send-email-hzhou@ovn.org
State Accepted
Commit bb66a0a6eb7971556504a294f5cf796d1d72db25
Headers show
Series [ovs-dev,1/7] raft-rpc.c: Fix message format. | expand

Commit Message

Han Zhou Feb. 29, 2020, 2:07 a.m. UTC
When a server doesn't see a leader yet, e.g. during leader re-election,
if a transaction comes from a client, it will cause 100% CPU busy loop.
With debug log enabled it is like:

2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
...

The problem is that in ovsdb_trigger_try(), all cluster errors are treated
as temporary error and retry immediately. This patch fixes it by introducing
'run_triggers_now', which tells if a retry is needed immediately. When the
cluster error is with detail 'not leader', we don't immediately retry, but
will wait for the next poll event to trigger the retry. When 'not leader'
status changes, there must be a event, i.e. raft RPC that changes the
status, so the trigger is guaranteed to be triggered, without busy loop.

Signed-off-by: Han Zhou <hzhou@ovn.org>
---
 ovsdb/ovsdb.c       |  2 +-
 ovsdb/ovsdb.h       |  1 +
 ovsdb/transaction.c |  2 +-
 ovsdb/trigger.c     | 11 +++++++++--
 4 files changed, 12 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index cfc96b3..7e683e6 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -414,7 +414,7 @@  ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
     db->storage = storage;
     ovs_list_init(&db->monitors);
     ovs_list_init(&db->triggers);
-    db->run_triggers = false;
+    db->run_triggers_now = db->run_triggers = false;
 
     shash_init(&db->tables);
     if (schema) {
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 32e5333..5c30a83 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -83,6 +83,7 @@  struct ovsdb {
     /* Triggers. */
     struct ovs_list triggers;   /* Contains "struct ovsdb_trigger"s. */
     bool run_triggers;
+    bool run_triggers_now;
 
     struct ovsdb_table *rbac_role;
 
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index 369436b..8ffefcf 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -967,7 +967,7 @@  ovsdb_txn_complete(struct ovsdb_txn *txn)
 {
     if (!ovsdb_txn_is_empty(txn)) {
 
-        txn->db->run_triggers = true;
+        txn->db->run_triggers_now = txn->db->run_triggers = true;
         ovsdb_monitors_commit(txn->db, txn);
         ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs));
         ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c
index 7e62e90..0372302 100644
--- a/ovsdb/trigger.c
+++ b/ovsdb/trigger.c
@@ -141,7 +141,7 @@  ovsdb_trigger_run(struct ovsdb *db, long long int now)
     struct ovsdb_trigger *t, *next;
 
     bool run_triggers = db->run_triggers;
-    db->run_triggers = false;
+    db->run_triggers_now = db->run_triggers = false;
 
     bool disconnect_all = false;
 
@@ -160,7 +160,7 @@  ovsdb_trigger_run(struct ovsdb *db, long long int now)
 void
 ovsdb_trigger_wait(struct ovsdb *db, long long int now)
 {
-    if (db->run_triggers) {
+    if (db->run_triggers_now) {
         poll_immediate_wake();
     } else {
         long long int deadline = LLONG_MAX;
@@ -319,9 +319,16 @@  ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now)
             if (!strcmp(ovsdb_error_get_tag(error), "cluster error")) {
                 /* Temporary error.  Transition back to "initialized" state to
                  * try again. */
+                char *err_s = ovsdb_error_to_string(error);
+                VLOG_DBG("cluster error %s", err_s);
+
                 jsonrpc_msg_destroy(t->reply);
                 t->reply = NULL;
                 t->db->run_triggers = true;
+                if (!strstr(err_s, "not leader")) {
+                    t->db->run_triggers_now = true;
+                }
+                free(err_s);
                 ovsdb_error_destroy(error);
             } else {
                 /* Permanent error.  Transition to "completed" state to report