diff mbox series

[ovs-dev,3/4] raft.c: Set candidate_retrying if no leader elected since last election.

Message ID 1565713402-5458-3-git-send-email-hzhou8@ebay.com
State Superseded
Headers show
Series [ovs-dev,1/4] raft.c: Move raft_reset_ping_timer() out of the loop. | expand

Commit Message

Han Zhou Aug. 13, 2019, 4:23 p.m. UTC
From: Han Zhou <hzhou8@ebay.com>

candiate_retrying is used to determine if the current node is disconnected
from the cluster when the node is in candiate role. However, a node
can flap between candidate and follower role before a leader is elected
when majority of the cluster is down, so is_connected() will flap, too, which
confuses clients.

This patch avoids the flapping with the help of a new member had_leader,
so that if no leader was elected since last election, we know we are
still retrying, and keep as disconnected from the cluster.

Signed-off-by: Han Zhou <hzhou8@ebay.com>
---
 ovsdb/raft.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

Comments

Han Zhou Aug. 14, 2019, 10:40 p.m. UTC | #1
On Tue, Aug 13, 2019 at 9:23 AM Han Zhou <zhouhan@gmail.com> wrote:
>
> From: Han Zhou <hzhou8@ebay.com>
>
> candiate_retrying is used to determine if the current node is disconnected
> from the cluster when the node is in candiate role. However, a node
> can flap between candidate and follower role before a leader is elected
> when majority of the cluster is down, so is_connected() will flap, too,
which
> confuses clients.
>
> This patch avoids the flapping with the help of a new member had_leader,
> so that if no leader was elected since last election, we know we are
> still retrying, and keep as disconnected from the cluster.
>
> Signed-off-by: Han Zhou <hzhou8@ebay.com>
> ---
>  ovsdb/raft.c | 25 ++++++++++++++++++++-----
>  1 file changed, 20 insertions(+), 5 deletions(-)
>
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 1c38b3b..63c3081 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -286,8 +286,11 @@ struct raft {
>
>      /* Candidates only.  Reinitialized at start of election. */
>      int n_votes;                /* Number of votes for me. */
> -    bool candidate_retrying;    /* The first round of election timed-out
and it
> -                                   is now retrying. */
> +    bool candidate_retrying;    /* The earlier election timed-out and we
are
> +                                   now retrying. */
> +    bool had_leader;            /* There has been leader elected since
last
> +                                   election initiated. This is to help
setting
> +                                   candidate_retrying. */
>  };
>
>  /* All Raft structures. */
> @@ -345,6 +348,7 @@ static bool raft_handle_write_error(struct raft *,
struct ovsdb_error *);
>
>  static void raft_run_reconfigure(struct raft *);
>
> +static void raft_set_leader(struct raft *, const struct uuid *sid);
>  static struct raft_server *
>  raft_find_server(const struct raft *raft, const struct uuid *sid)
>  {
> @@ -1616,8 +1620,11 @@ raft_start_election(struct raft *raft, bool
leadership_transfer)
>      }
>
>      ovs_assert(raft->role != RAFT_LEADER);
> -    raft->candidate_retrying = (raft->role == RAFT_CANDIDATE);
>      raft->role = RAFT_CANDIDATE;
> +    /* If there was no leader elected since last election, we know we are
> +     * retrying now. */
> +    raft->candidate_retrying = !raft->had_leader;
> +    raft->had_leader = false;
>
>      raft->n_votes = 0;
>
> @@ -2450,6 +2457,14 @@ raft_server_init_leader(struct raft *raft, struct
raft_server *s)
>  }
>
>  static void
> +raft_set_leader(struct raft *raft, const struct uuid *sid)
> +{
> +    raft->leader_sid = *sid;
> +    raft->had_leader = true;
> +    raft->candidate_retrying = false;
> +}
> +
> +static void
>  raft_become_leader(struct raft *raft)
>  {
>      log_all_commands(raft);
> @@ -2461,7 +2476,7 @@ raft_become_leader(struct raft *raft)
>
>      ovs_assert(raft->role != RAFT_LEADER);
>      raft->role = RAFT_LEADER;
> -    raft->leader_sid = raft->sid;
> +    raft_set_leader(raft, &raft->sid);
>      raft->election_timeout = LLONG_MAX;
>      raft_reset_ping_timer(raft);
>
> @@ -2855,7 +2870,7 @@ raft_update_leader(struct raft *raft, const struct
uuid *sid)
>                        raft_get_nickname(raft, sid, buf, sizeof buf),
>                        raft->term);
>          }
> -        raft->leader_sid = *sid;
> +        raft_set_leader(raft, sid);
>
>          /* Record the leader to the log.  This is not used by the
algorithm
>           * (although it could be, for quick restart), but it is used for
> --
> 2.1.0
>

Sorry that I forgot to update the condition in raft_is_connected(). I fixed
it and added a test case in v2:
https://patchwork.ozlabs.org/patch/1147277/
diff mbox series

Patch

diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 1c38b3b..63c3081 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -286,8 +286,11 @@  struct raft {
 
     /* Candidates only.  Reinitialized at start of election. */
     int n_votes;                /* Number of votes for me. */
-    bool candidate_retrying;    /* The first round of election timed-out and it
-                                   is now retrying. */
+    bool candidate_retrying;    /* The earlier election timed-out and we are
+                                   now retrying. */
+    bool had_leader;            /* There has been leader elected since last
+                                   election initiated. This is to help setting
+                                   candidate_retrying. */
 };
 
 /* All Raft structures. */
@@ -345,6 +348,7 @@  static bool raft_handle_write_error(struct raft *, struct ovsdb_error *);
 
 static void raft_run_reconfigure(struct raft *);
 
+static void raft_set_leader(struct raft *, const struct uuid *sid);
 static struct raft_server *
 raft_find_server(const struct raft *raft, const struct uuid *sid)
 {
@@ -1616,8 +1620,11 @@  raft_start_election(struct raft *raft, bool leadership_transfer)
     }
 
     ovs_assert(raft->role != RAFT_LEADER);
-    raft->candidate_retrying = (raft->role == RAFT_CANDIDATE);
     raft->role = RAFT_CANDIDATE;
+    /* If there was no leader elected since last election, we know we are
+     * retrying now. */
+    raft->candidate_retrying = !raft->had_leader;
+    raft->had_leader = false;
 
     raft->n_votes = 0;
 
@@ -2450,6 +2457,14 @@  raft_server_init_leader(struct raft *raft, struct raft_server *s)
 }
 
 static void
+raft_set_leader(struct raft *raft, const struct uuid *sid)
+{
+    raft->leader_sid = *sid;
+    raft->had_leader = true;
+    raft->candidate_retrying = false;
+}
+
+static void
 raft_become_leader(struct raft *raft)
 {
     log_all_commands(raft);
@@ -2461,7 +2476,7 @@  raft_become_leader(struct raft *raft)
 
     ovs_assert(raft->role != RAFT_LEADER);
     raft->role = RAFT_LEADER;
-    raft->leader_sid = raft->sid;
+    raft_set_leader(raft, &raft->sid);
     raft->election_timeout = LLONG_MAX;
     raft_reset_ping_timer(raft);
 
@@ -2855,7 +2870,7 @@  raft_update_leader(struct raft *raft, const struct uuid *sid)
                       raft_get_nickname(raft, sid, buf, sizeof buf),
                       raft->term);
         }
-        raft->leader_sid = *sid;
+        raft_set_leader(raft, sid);
 
         /* Record the leader to the log.  This is not used by the algorithm
          * (although it could be, for quick restart), but it is used for