diff mbox series

[ovs-dev] raft: Avoid sending equal snapshots.

Message ID 20200523173412.477681-1-i.maximets@ovn.org
State Accepted
Headers show
Series [ovs-dev] raft: Avoid sending equal snapshots. | expand

Commit Message

Ilya Maximets May 23, 2020, 5:34 p.m. UTC
Snapshots are huge.  In some cases we could receive several outdated
append replies from the remote server.  This could happen in high
scale cases if the remote server is overloaded and not able to process
all the raft requests in time.  As an action to each outdated append
reply we're sending full database snapshot.  While remote server is
already overloaded those snapshots will stuck in jsonrpc backlog for
a long time making it grow up to few GB.  Since remote server wasn't
able to timely process incoming messages it will likely not able to
process snapshots leading to the same situation with low chances to
recover.  Remote server will likely stuck in 'candidate' state, other
servers will grow their memory consumption due to growing jsonrpc
backlogs:

jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644,
             num of msgs: 3795, backlog: 8838994624.

This patch is trying to avoid that situation by avoiding sending of
equal snapshot install requests.  This helps maintain reasonable memory
consumption and allows the cluster to recover on a larger scale.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---

I'm not an expert in this code, so there might be better way to track
equal snapshot installation requests.  Suggestions are welcome.

 ovsdb/raft-private.c |  1 +
 ovsdb/raft-private.h |  4 ++++
 ovsdb/raft.c         | 39 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)

Comments

Han Zhou May 23, 2020, 6:36 p.m. UTC | #1
On Sat, May 23, 2020 at 10:34 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> Snapshots are huge.  In some cases we could receive several outdated
> append replies from the remote server.  This could happen in high
> scale cases if the remote server is overloaded and not able to process
> all the raft requests in time.  As an action to each outdated append
> reply we're sending full database snapshot.  While remote server is
> already overloaded those snapshots will stuck in jsonrpc backlog for
> a long time making it grow up to few GB.  Since remote server wasn't
> able to timely process incoming messages it will likely not able to
> process snapshots leading to the same situation with low chances to
> recover.  Remote server will likely stuck in 'candidate' state, other
> servers will grow their memory consumption due to growing jsonrpc
> backlogs:

Hi Ilya, this patch LGTM. Just not not clear about this last part of the
commit message. Why would remote server stuck in 'candidate' state if there
are pending messages from leader for it to handle? If the follower was busy
processing older messages, it wouldn't have had a chance to see election
timer timeout without receiving heartbeat from leader, so it shouldn't try
to start voting, right? Otherwise:

Acked-by: Han Zhou <hzhou@ovn.org>

>
> jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644,
>              num of msgs: 3795, backlog: 8838994624.
>
> This patch is trying to avoid that situation by avoiding sending of
> equal snapshot install requests.  This helps maintain reasonable memory
> consumption and allows the cluster to recover on a larger scale.
>
> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
> ---
>
> I'm not an expert in this code, so there might be better way to track
> equal snapshot installation requests.  Suggestions are welcome.
>
>  ovsdb/raft-private.c |  1 +
>  ovsdb/raft-private.h |  4 ++++
>  ovsdb/raft.c         | 39 ++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 43 insertions(+), 1 deletion(-)
>
> diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c
> index 26d39a087..9468fdaf4 100644
> --- a/ovsdb/raft-private.c
> +++ b/ovsdb/raft-private.c
> @@ -137,6 +137,7 @@ raft_server_destroy(struct raft_server *s)
>      if (s) {
>          free(s->address);
>          free(s->nickname);
> +        free(s->last_install_snapshot_request);
>          free(s);
>      }
>  }
> diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h
> index ac8656d42..1f366b4ab 100644
> --- a/ovsdb/raft-private.h
> +++ b/ovsdb/raft-private.h
> @@ -27,6 +27,7 @@
>
>  struct ds;
>  struct ovsdb_parser;
> +struct raft_install_snapshot_request;
>
>  /* Formatting server IDs and cluster IDs for use in human-readable
logs.  Do
>   * not use these in cases where the whole server or cluster ID is
needed; use
> @@ -83,6 +84,9 @@ struct raft_server {
>      bool replied;            /* Reply to append_request was received
from this
>                                  node during current election_timeout
interval.
>                                  */
> +    /* Copy of the last install_snapshot_request sent to this server. */
> +    struct raft_install_snapshot_request *last_install_snapshot_request;
> +
>      /* For use in adding and removing servers: */
>      struct uuid requester_sid;  /* Nonzero if requested via RPC. */
>      struct unixctl_conn *requester_conn; /* Only if requested via
unixctl. */
> diff --git a/ovsdb/raft.c b/ovsdb/raft.c
> index 515eadab3..708b0624c 100644
> --- a/ovsdb/raft.c
> +++ b/ovsdb/raft.c
> @@ -1421,8 +1421,20 @@ raft_conn_run(struct raft *raft, struct raft_conn
*conn)
>      jsonrpc_session_run(conn->js);
>
>      unsigned int new_seqno = jsonrpc_session_get_seqno(conn->js);
> -    bool just_connected = (new_seqno != conn->js_seqno
> +    bool reconnected = new_seqno != conn->js_seqno;
> +    bool just_connected = (reconnected
>                             && jsonrpc_session_is_connected(conn->js));
> +
> +    if (reconnected) {
> +        /* Clear 'last_install_snapshot_request' since it might not
reach the
> +         * destination or server was restarted. */
> +        struct raft_server *server = raft_find_server(raft, &conn->sid);
> +        if (server) {
> +            free(server->last_install_snapshot_request);
> +            server->last_install_snapshot_request = NULL;
> +        }
> +    }
> +
>      conn->js_seqno = new_seqno;
>      if (just_connected) {
>          if (raft->joining) {
> @@ -3296,6 +3308,31 @@ raft_send_install_snapshot_request(struct raft
*raft,
>              .election_timer = raft->election_timer, /* use latest value
*/
>          }
>      };
> +
> +    if (s->last_install_snapshot_request) {
> +        struct raft_install_snapshot_request *old, *new;
> +
> +        old = s->last_install_snapshot_request;
> +        new = &rpc.install_snapshot_request;
> +        if (   old->term           == new->term
> +            && old->last_index     == new->last_index
> +            && old->last_term      == new->last_term
> +            && old->last_servers   == new->last_servers
> +            && old->data           == new->data
> +            && old->election_timer == new->election_timer
> +            && uuid_equals(&old->last_eid, &new->last_eid)) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5,
5);
> +
> +            VLOG_WARN_RL(&rl, "not sending exact same
install_snapshot_request"
> +                              " to server %s again", s->nickname);
> +            return;
> +        }
> +    }
> +    free(s->last_install_snapshot_request);
> +    CONST_CAST(struct raft_server *, s)->last_install_snapshot_request
> +        = xmemdup(&rpc.install_snapshot_request,
> +                  sizeof rpc.install_snapshot_request);
> +
>      raft_send(raft, &rpc);
>  }
>
> --
> 2.25.4
>
Ilya Maximets May 25, 2020, 1:11 p.m. UTC | #2
On 5/23/20 8:36 PM, Han Zhou wrote:
> 
> 
> On Sat, May 23, 2020 at 10:34 AM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> wrote:
>>
>> Snapshots are huge.  In some cases we could receive several outdated
>> append replies from the remote server.  This could happen in high
>> scale cases if the remote server is overloaded and not able to process
>> all the raft requests in time.  As an action to each outdated append
>> reply we're sending full database snapshot.  While remote server is
>> already overloaded those snapshots will stuck in jsonrpc backlog for
>> a long time making it grow up to few GB.  Since remote server wasn't
>> able to timely process incoming messages it will likely not able to
>> process snapshots leading to the same situation with low chances to
>> recover.  Remote server will likely stuck in 'candidate' state, other
>> servers will grow their memory consumption due to growing jsonrpc
>> backlogs:
> 
> Hi Ilya, this patch LGTM. Just not not clear about this last part of the commit message. Why would remote server stuck in 'candidate' state if there are pending messages from leader for it to handle? If the follower was busy processing older messages, it wouldn't have had a chance to see election timer timeout without receiving heartbeat from leader, so it shouldn't try to start voting, right?

I'm not sure what exactly happens, but that is what I see in my setup.
Overloaded server sends vote requests almost each second with the term
increased by 1 each time.  I think it doesn't see heartbeats since it
processes only few messages at a time and a single message processing
like applying the snapshot could lead to election timer expiration.


> Otherwise:
> 
> Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org>>
> 
>>
>> jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644 <http://192.16.0.3:6644>,
>>              num of msgs: 3795, backlog: 8838994624.
>>
>> This patch is trying to avoid that situation by avoiding sending of
>> equal snapshot install requests.  This helps maintain reasonable memory
>> consumption and allows the cluster to recover on a larger scale.
>>
>> Signed-off-by: Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>>
Han Zhou May 25, 2020, 11:54 p.m. UTC | #3
On Mon, May 25, 2020 at 6:11 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 5/23/20 8:36 PM, Han Zhou wrote:
> >
> >
> > On Sat, May 23, 2020 at 10:34 AM Ilya Maximets <i.maximets@ovn.org
<mailto:i.maximets@ovn.org>> wrote:
> >>
> >> Snapshots are huge.  In some cases we could receive several outdated
> >> append replies from the remote server.  This could happen in high
> >> scale cases if the remote server is overloaded and not able to process
> >> all the raft requests in time.  As an action to each outdated append
> >> reply we're sending full database snapshot.  While remote server is
> >> already overloaded those snapshots will stuck in jsonrpc backlog for
> >> a long time making it grow up to few GB.  Since remote server wasn't
> >> able to timely process incoming messages it will likely not able to
> >> process snapshots leading to the same situation with low chances to
> >> recover.  Remote server will likely stuck in 'candidate' state, other
> >> servers will grow their memory consumption due to growing jsonrpc
> >> backlogs:
> >
> > Hi Ilya, this patch LGTM. Just not not clear about this last part of
the commit message. Why would remote server stuck in 'candidate' state if
there are pending messages from leader for it to handle? If the follower
was busy processing older messages, it wouldn't have had a chance to see
election timer timeout without receiving heartbeat from leader, so it
shouldn't try to start voting, right?
>
> I'm not sure what exactly happens, but that is what I see in my setup.
> Overloaded server sends vote requests almost each second with the term
> increased by 1 each time.  I think it doesn't see heartbeats since it
> processes only few messages at a time and a single message processing
> like applying the snapshot could lead to election timer expiration.
>
It processes at most 50 messages at a time for each connection in
raft_conn_run(), which should guarantee append_request (heartbeat) is seen.
However, it is possible that the connection is lost due to inactivity
probe, then append_request could be missed, causing re-election. Did you
see such case after applying the patch that disables inactivity probe for
raft connections?

>
> > Otherwise:
> >
> > Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org>>
> >
> >>
> >> jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644 <
http://192.16.0.3:6644>,
> >>              num of msgs: 3795, backlog: 8838994624.
> >>
> >> This patch is trying to avoid that situation by avoiding sending of
> >> equal snapshot install requests.  This helps maintain reasonable memory
> >> consumption and allows the cluster to recover on a larger scale.
> >>
> >> Signed-off-by: Ilya Maximets <i.maximets@ovn.org <mailto:
i.maximets@ovn.org>>
Ilya Maximets May 26, 2020, 5:50 p.m. UTC | #4
On 5/26/20 1:54 AM, Han Zhou wrote:
> 
> 
> On Mon, May 25, 2020 at 6:11 AM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> wrote:
>>
>> On 5/23/20 8:36 PM, Han Zhou wrote:
>> >
>> >
>> > On Sat, May 23, 2020 at 10:34 AM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org> <mailto:i.maximets@ovn.org <mailto:i.maximets@ovn.org>>> wrote:
>> >>
>> >> Snapshots are huge.  In some cases we could receive several outdated
>> >> append replies from the remote server.  This could happen in high
>> >> scale cases if the remote server is overloaded and not able to process
>> >> all the raft requests in time.  As an action to each outdated append
>> >> reply we're sending full database snapshot.  While remote server is
>> >> already overloaded those snapshots will stuck in jsonrpc backlog for
>> >> a long time making it grow up to few GB.  Since remote server wasn't
>> >> able to timely process incoming messages it will likely not able to
>> >> process snapshots leading to the same situation with low chances to
>> >> recover.  Remote server will likely stuck in 'candidate' state, other
>> >> servers will grow their memory consumption due to growing jsonrpc
>> >> backlogs:
>> >
>> > Hi Ilya, this patch LGTM. Just not not clear about this last part of the commit message. Why would remote server stuck in 'candidate' state if there are pending messages from leader for it to handle? If the follower was busy processing older messages, it wouldn't have had a chance to see election timer timeout without receiving heartbeat from leader, so it shouldn't try to start voting, right?
>>
>> I'm not sure what exactly happens, but that is what I see in my setup.
>> Overloaded server sends vote requests almost each second with the term
>> increased by 1 each time.  I think it doesn't see heartbeats since it
>> processes only few messages at a time and a single message processing
>> like applying the snapshot could lead to election timer expiration.
>>
> It processes at most 50 messages at a time for each connection in raft_conn_run(), which should guarantee append_request (heartbeat) is seen. However, it is possible that the connection is lost due to inactivity probe, then append_request could be missed, causing re-election. Did you see such case after applying the patch that disables inactivity probe for raft connections?

Yes, I tested with inactivity probe disabled.

Excessive send backlog doesn't mean that we always have something to receive
on the other side.  jsonrpc backlog is stored on a sender side and each time
sender calls jsonrpc_run() one message from that backlog pushed to stream_send().
In our case steam-ssl buffers this one or even part of this one message for
sending.  And only that one chunk of data could be continuously received on
the other side without additional actions from the sender.  To receive more
data on receiver side, sender should call jsonrpc_run() --> stream_ssl_run()
again.  So, we're not always receiving 50 messages during a single
raft_conn_run().  In practice, we're receiving only few of them, i.e. might
easily skip some appends or heartbeats and start voting.

I'm not 100% sure that this is what really happens, but it seems possible.

Also, old messages with stale term doesn't reset election timeout.  This might
contribute to the issue as well.

What do you think?

> 
>>
>> > Otherwise:
>> >
>> > Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org> <mailto:hzhou@ovn.org <mailto:hzhou@ovn.org>>>
>> >
>> >>
>> >> jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644 <http://192.16.0.3:6644> <http://192.16.0.3:6644>,
>> >>              num of msgs: 3795, backlog: 8838994624.
>> >>
>> >> This patch is trying to avoid that situation by avoiding sending of
>> >> equal snapshot install requests.  This helps maintain reasonable memory
>> >> consumption and allows the cluster to recover on a larger scale.
>> >>
>> >> Signed-off-by: Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org> <mailto:i.maximets@ovn.org <mailto:i.maximets@ovn.org>>>
Han Zhou May 26, 2020, 9:38 p.m. UTC | #5
On Tue, May 26, 2020 at 10:50 AM Ilya Maximets <i.maximets@ovn.org> wrote:
>
> On 5/26/20 1:54 AM, Han Zhou wrote:
> >
> >
> > On Mon, May 25, 2020 at 6:11 AM Ilya Maximets <i.maximets@ovn.org
<mailto:i.maximets@ovn.org>> wrote:
> >>
> >> On 5/23/20 8:36 PM, Han Zhou wrote:
> >> >
> >> >
> >> > On Sat, May 23, 2020 at 10:34 AM Ilya Maximets <i.maximets@ovn.org
<mailto:i.maximets@ovn.org> <mailto:i.maximets@ovn.org <mailto:
i.maximets@ovn.org>>> wrote:
> >> >>
> >> >> Snapshots are huge.  In some cases we could receive several outdated
> >> >> append replies from the remote server.  This could happen in high
> >> >> scale cases if the remote server is overloaded and not able to
process
> >> >> all the raft requests in time.  As an action to each outdated append
> >> >> reply we're sending full database snapshot.  While remote server is
> >> >> already overloaded those snapshots will stuck in jsonrpc backlog for
> >> >> a long time making it grow up to few GB.  Since remote server wasn't
> >> >> able to timely process incoming messages it will likely not able to
> >> >> process snapshots leading to the same situation with low chances to
> >> >> recover.  Remote server will likely stuck in 'candidate' state,
other
> >> >> servers will grow their memory consumption due to growing jsonrpc
> >> >> backlogs:
> >> >
> >> > Hi Ilya, this patch LGTM. Just not not clear about this last part of
the commit message. Why would remote server stuck in 'candidate' state if
there are pending messages from leader for it to handle? If the follower
was busy processing older messages, it wouldn't have had a chance to see
election timer timeout without receiving heartbeat from leader, so it
shouldn't try to start voting, right?
> >>
> >> I'm not sure what exactly happens, but that is what I see in my setup.
> >> Overloaded server sends vote requests almost each second with the term
> >> increased by 1 each time.  I think it doesn't see heartbeats since it
> >> processes only few messages at a time and a single message processing
> >> like applying the snapshot could lead to election timer expiration.
> >>
> > It processes at most 50 messages at a time for each connection in
raft_conn_run(), which should guarantee append_request (heartbeat) is seen.
However, it is possible that the connection is lost due to inactivity
probe, then append_request could be missed, causing re-election. Did you
see such case after applying the patch that disables inactivity probe for
raft connections?
>
> Yes, I tested with inactivity probe disabled.
>
> Excessive send backlog doesn't mean that we always have something to
receive
> on the other side.  jsonrpc backlog is stored on a sender side and each
time
> sender calls jsonrpc_run() one message from that backlog pushed to
stream_send().
> In our case steam-ssl buffers this one or even part of this one message
for
> sending.  And only that one chunk of data could be continuously received
on
> the other side without additional actions from the sender.  To receive
more
> data on receiver side, sender should call jsonrpc_run() -->
stream_ssl_run()
> again.  So, we're not always receiving 50 messages during a single
> raft_conn_run().  In practice, we're receiving only few of them, i.e.
might
> easily skip some appends or heartbeats and start voting.
>
> I'm not 100% sure that this is what really happens, but it seems possible.
>
> Also, old messages with stale term doesn't reset election timeout.  This
might
> contribute to the issue as well.
>
> What do you think?
>

Thanks for the explain. I wasn't aware of that stream_send() behavior. It
makes sense!

> >
> >>
> >> > Otherwise:
> >> >
> >> > Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org> <mailto:
hzhou@ovn.org <mailto:hzhou@ovn.org>>>
> >> >
> >> >>
> >> >> jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644
<http://192.16.0.3:6644> <http://192.16.0.3:6644>,
> >> >>              num of msgs: 3795, backlog: 8838994624.
> >> >>
> >> >> This patch is trying to avoid that situation by avoiding sending of
> >> >> equal snapshot install requests.  This helps maintain reasonable
memory
> >> >> consumption and allows the cluster to recover on a larger scale.
> >> >>
> >> >> Signed-off-by: Ilya Maximets <i.maximets@ovn.org <mailto:
i.maximets@ovn.org> <mailto:i.maximets@ovn.org <mailto:i.maximets@ovn.org>>>
>
Ilya Maximets May 28, 2020, 5:06 p.m. UTC | #6
On 5/23/20 8:36 PM, Han Zhou wrote:
> 
> 
> On Sat, May 23, 2020 at 10:34 AM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> wrote:
>>
>> Snapshots are huge.  In some cases we could receive several outdated
>> append replies from the remote server.  This could happen in high
>> scale cases if the remote server is overloaded and not able to process
>> all the raft requests in time.  As an action to each outdated append
>> reply we're sending full database snapshot.  While remote server is
>> already overloaded those snapshots will stuck in jsonrpc backlog for
>> a long time making it grow up to few GB.  Since remote server wasn't
>> able to timely process incoming messages it will likely not able to
>> process snapshots leading to the same situation with low chances to
>> recover.  Remote server will likely stuck in 'candidate' state, other
>> servers will grow their memory consumption due to growing jsonrpc
>> backlogs:
> 
> Hi Ilya, this patch LGTM. Just not not clear about this last part of the commit message. Why would remote server stuck in 'candidate' state if there are pending messages from leader for it to handle? If the follower was busy processing older messages, it wouldn't have had a chance to see election timer timeout without receiving heartbeat from leader, so it shouldn't try to start voting, right? Otherwise:
> 
> Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org>>

Thanks!  Applied to master.

Best regards, Ilya Maximets.
Ilya Maximets June 5, 2020, 2:54 p.m. UTC | #7
On 5/28/20 7:06 PM, Ilya Maximets wrote:
> On 5/23/20 8:36 PM, Han Zhou wrote:
>>
>>
>> On Sat, May 23, 2020 at 10:34 AM Ilya Maximets <i.maximets@ovn.org <mailto:i.maximets@ovn.org>> wrote:
>>>
>>> Snapshots are huge.  In some cases we could receive several outdated
>>> append replies from the remote server.  This could happen in high
>>> scale cases if the remote server is overloaded and not able to process
>>> all the raft requests in time.  As an action to each outdated append
>>> reply we're sending full database snapshot.  While remote server is
>>> already overloaded those snapshots will stuck in jsonrpc backlog for
>>> a long time making it grow up to few GB.  Since remote server wasn't
>>> able to timely process incoming messages it will likely not able to
>>> process snapshots leading to the same situation with low chances to
>>> recover.  Remote server will likely stuck in 'candidate' state, other
>>> servers will grow their memory consumption due to growing jsonrpc
>>> backlogs:
>>
>> Hi Ilya, this patch LGTM. Just not not clear about this last part of the commit message. Why would remote server stuck in 'candidate' state if there are pending messages from leader for it to handle? If the follower was busy processing older messages, it wouldn't have had a chance to see election timer timeout without receiving heartbeat from leader, so it shouldn't try to start voting, right? Otherwise:
>>
>> Acked-by: Han Zhou <hzhou@ovn.org <mailto:hzhou@ovn.org>>
> 
> Thanks!  Applied to master.

As agreed during OVN weekly irc meeting, I also backported this fix
to branch-2.13.

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c
index 26d39a087..9468fdaf4 100644
--- a/ovsdb/raft-private.c
+++ b/ovsdb/raft-private.c
@@ -137,6 +137,7 @@  raft_server_destroy(struct raft_server *s)
     if (s) {
         free(s->address);
         free(s->nickname);
+        free(s->last_install_snapshot_request);
         free(s);
     }
 }
diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h
index ac8656d42..1f366b4ab 100644
--- a/ovsdb/raft-private.h
+++ b/ovsdb/raft-private.h
@@ -27,6 +27,7 @@ 
 
 struct ds;
 struct ovsdb_parser;
+struct raft_install_snapshot_request;
 
 /* Formatting server IDs and cluster IDs for use in human-readable logs.  Do
  * not use these in cases where the whole server or cluster ID is needed; use
@@ -83,6 +84,9 @@  struct raft_server {
     bool replied;            /* Reply to append_request was received from this
                                 node during current election_timeout interval.
                                 */
+    /* Copy of the last install_snapshot_request sent to this server. */
+    struct raft_install_snapshot_request *last_install_snapshot_request;
+
     /* For use in adding and removing servers: */
     struct uuid requester_sid;  /* Nonzero if requested via RPC. */
     struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 515eadab3..708b0624c 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -1421,8 +1421,20 @@  raft_conn_run(struct raft *raft, struct raft_conn *conn)
     jsonrpc_session_run(conn->js);
 
     unsigned int new_seqno = jsonrpc_session_get_seqno(conn->js);
-    bool just_connected = (new_seqno != conn->js_seqno
+    bool reconnected = new_seqno != conn->js_seqno;
+    bool just_connected = (reconnected
                            && jsonrpc_session_is_connected(conn->js));
+
+    if (reconnected) {
+        /* Clear 'last_install_snapshot_request' since it might not reach the
+         * destination or server was restarted. */
+        struct raft_server *server = raft_find_server(raft, &conn->sid);
+        if (server) {
+            free(server->last_install_snapshot_request);
+            server->last_install_snapshot_request = NULL;
+        }
+    }
+
     conn->js_seqno = new_seqno;
     if (just_connected) {
         if (raft->joining) {
@@ -3296,6 +3308,31 @@  raft_send_install_snapshot_request(struct raft *raft,
             .election_timer = raft->election_timer, /* use latest value */
         }
     };
+
+    if (s->last_install_snapshot_request) {
+        struct raft_install_snapshot_request *old, *new;
+
+        old = s->last_install_snapshot_request;
+        new = &rpc.install_snapshot_request;
+        if (   old->term           == new->term
+            && old->last_index     == new->last_index
+            && old->last_term      == new->last_term
+            && old->last_servers   == new->last_servers
+            && old->data           == new->data
+            && old->election_timer == new->election_timer
+            && uuid_equals(&old->last_eid, &new->last_eid)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+
+            VLOG_WARN_RL(&rl, "not sending exact same install_snapshot_request"
+                              " to server %s again", s->nickname);
+            return;
+        }
+    }
+    free(s->last_install_snapshot_request);
+    CONST_CAST(struct raft_server *, s)->last_install_snapshot_request
+        = xmemdup(&rpc.install_snapshot_request,
+                  sizeof rpc.install_snapshot_request);
+
     raft_send(raft, &rpc);
 }