diff mbox series

[v2,1/3] Replication: Ignore requests after failover

Message ID 20190815200823.3de1bd14@luklap
State New
Headers show
Series colo: Add support for continious replication | expand

Commit Message

Lukas Straub Aug. 15, 2019, 6:08 p.m. UTC
After failover the Secondary side of replication shouldn't change state, because
it now functions as our primary disk.

In replication_start, replication_do_checkpoint, replication_stop, ignore
the request if current state is BLOCK_REPLICATION_DONE (sucessful failover) or
BLOCK_REPLICATION_FAILOVER (failover in progres i.e. currently merging active
and hidden images into the base image).

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
---
 block/replication.c | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

--
2.20.1

Comments

Dr. David Alan Gilbert Aug. 15, 2019, 6:51 p.m. UTC | #1
* Lukas Straub (lukasstraub2@web.de) wrote:
> After failover the Secondary side of replication shouldn't change state, because
> it now functions as our primary disk.
> 
> In replication_start, replication_do_checkpoint, replication_stop, ignore
> the request if current state is BLOCK_REPLICATION_DONE (sucessful failover) or
> BLOCK_REPLICATION_FAILOVER (failover in progres i.e. currently merging active
> and hidden images into the base image).
> 
> Signed-off-by: Lukas Straub <lukasstraub2@web.de>

We should add some block people to this one to review it; cc'ing in
Kevin and Max.

Dave

> ---
>  block/replication.c | 38 +++++++++++++++++++++++++++++++++++---
>  1 file changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/block/replication.c b/block/replication.c
> index 3d4dedddfc..97cc65c0cf 100644
> --- a/block/replication.c
> +++ b/block/replication.c
> @@ -454,6 +454,17 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>      aio_context_acquire(aio_context);
>      s = bs->opaque;
> 
> +    if (s->stage == BLOCK_REPLICATION_DONE ||
> +        s->stage == BLOCK_REPLICATION_FAILOVER) {
> +        /*
> +         * This case happens when a secondary is promoted to primary.
> +         * Ignore the request because the secondary side of replication
> +         * doesn't have to do anything anymore.
> +         */
> +        aio_context_release(aio_context);
> +        return;
> +    }
> +
>      if (s->stage != BLOCK_REPLICATION_NONE) {
>          error_setg(errp, "Block replication is running or done");
>          aio_context_release(aio_context);
> @@ -529,8 +540,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
>                     "Block device is in use by internal backup job");
> 
>          top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
> -        if (!top_bs || !bdrv_is_root_node(top_bs) ||
> -            !check_top_bs(top_bs, bs)) {
> +        if (!top_bs || !check_top_bs(top_bs, bs)) {
>              error_setg(errp, "No top_bs or it is invalid");
>              reopen_backing_file(bs, false, NULL);
>              aio_context_release(aio_context);
> @@ -577,6 +587,17 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
>      aio_context_acquire(aio_context);
>      s = bs->opaque;
> 
> +    if (s->stage == BLOCK_REPLICATION_DONE ||
> +        s->stage == BLOCK_REPLICATION_FAILOVER) {
> +        /*
> +         * This case happens when a secondary was promoted to primary.
> +         * Ignore the request because the secondary side of replication
> +         * doesn't have to do anything anymore.
> +         */
> +        aio_context_release(aio_context);
> +        return;
> +    }
> +
>      if (s->mode == REPLICATION_MODE_SECONDARY) {
>          secondary_do_checkpoint(s, errp);
>      }
> @@ -593,7 +614,7 @@ static void replication_get_error(ReplicationState *rs, Error **errp)
>      aio_context_acquire(aio_context);
>      s = bs->opaque;
> 
> -    if (s->stage != BLOCK_REPLICATION_RUNNING) {
> +    if (s->stage == BLOCK_REPLICATION_NONE) {
>          error_setg(errp, "Block replication is not running");
>          aio_context_release(aio_context);
>          return;
> @@ -635,6 +656,17 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
>      aio_context_acquire(aio_context);
>      s = bs->opaque;
> 
> +    if (s->stage == BLOCK_REPLICATION_DONE ||
> +        s->stage == BLOCK_REPLICATION_FAILOVER) {
> +        /*
> +         * This case happens when a secondary was promoted to primary.
> +         * Ignore the request because the secondary side of replication
> +         * doesn't have to do anything anymore.
> +         */
> +        aio_context_release(aio_context);
> +        return;
> +    }
> +
>      if (s->stage != BLOCK_REPLICATION_RUNNING) {
>          error_setg(errp, "Block replication is not running");
>          aio_context_release(aio_context);
> --
> 2.20.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/block/replication.c b/block/replication.c
index 3d4dedddfc..97cc65c0cf 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -454,6 +454,17 @@  static void replication_start(ReplicationState *rs, ReplicationMode mode,
     aio_context_acquire(aio_context);
     s = bs->opaque;

+    if (s->stage == BLOCK_REPLICATION_DONE ||
+        s->stage == BLOCK_REPLICATION_FAILOVER) {
+        /*
+         * This case happens when a secondary is promoted to primary.
+         * Ignore the request because the secondary side of replication
+         * doesn't have to do anything anymore.
+         */
+        aio_context_release(aio_context);
+        return;
+    }
+
     if (s->stage != BLOCK_REPLICATION_NONE) {
         error_setg(errp, "Block replication is running or done");
         aio_context_release(aio_context);
@@ -529,8 +540,7 @@  static void replication_start(ReplicationState *rs, ReplicationMode mode,
                    "Block device is in use by internal backup job");

         top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
-        if (!top_bs || !bdrv_is_root_node(top_bs) ||
-            !check_top_bs(top_bs, bs)) {
+        if (!top_bs || !check_top_bs(top_bs, bs)) {
             error_setg(errp, "No top_bs or it is invalid");
             reopen_backing_file(bs, false, NULL);
             aio_context_release(aio_context);
@@ -577,6 +587,17 @@  static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
     aio_context_acquire(aio_context);
     s = bs->opaque;

+    if (s->stage == BLOCK_REPLICATION_DONE ||
+        s->stage == BLOCK_REPLICATION_FAILOVER) {
+        /*
+         * This case happens when a secondary was promoted to primary.
+         * Ignore the request because the secondary side of replication
+         * doesn't have to do anything anymore.
+         */
+        aio_context_release(aio_context);
+        return;
+    }
+
     if (s->mode == REPLICATION_MODE_SECONDARY) {
         secondary_do_checkpoint(s, errp);
     }
@@ -593,7 +614,7 @@  static void replication_get_error(ReplicationState *rs, Error **errp)
     aio_context_acquire(aio_context);
     s = bs->opaque;

-    if (s->stage != BLOCK_REPLICATION_RUNNING) {
+    if (s->stage == BLOCK_REPLICATION_NONE) {
         error_setg(errp, "Block replication is not running");
         aio_context_release(aio_context);
         return;
@@ -635,6 +656,17 @@  static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
     aio_context_acquire(aio_context);
     s = bs->opaque;

+    if (s->stage == BLOCK_REPLICATION_DONE ||
+        s->stage == BLOCK_REPLICATION_FAILOVER) {
+        /*
+         * This case happens when a secondary was promoted to primary.
+         * Ignore the request because the secondary side of replication
+         * doesn't have to do anything anymore.
+         */
+        aio_context_release(aio_context);
+        return;
+    }
+
     if (s->stage != BLOCK_REPLICATION_RUNNING) {
         error_setg(errp, "Block replication is not running");
         aio_context_release(aio_context);