diff mbox series

[5/5] migration: disable RDMA WRITR after postcopy started.

Message ID 1523089594-1422-6-git-send-email-lidongchen@tencent.com
State New
Headers show
Series Enable postcopy RDMA live migration | expand

Commit Message

858585 jemmy April 7, 2018, 8:26 a.m. UTC
RDMA write operations are performed with no notification to the destination
qemu, then the destination qemu can not wakeup. So disable RDMA WRITE after
postcopy started.

Signed-off-by: Lidong Chen <lidongchen@tencent.com>
---
 migration/qemu-file.c |  3 ++-
 migration/rdma.c      | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

Comments

Dr. David Alan Gilbert April 11, 2018, 3:56 p.m. UTC | #1
* Lidong Chen (jemmy858585@gmail.com) wrote:
> RDMA write operations are performed with no notification to the destination
> qemu, then the destination qemu can not wakeup. So disable RDMA WRITE after
> postcopy started.
> 
> Signed-off-by: Lidong Chen <lidongchen@tencent.com>

This patch needs to be near the beginning of the series; at the moment a
bisect would lead you to the middle of the series which had return
paths, but then would fail to work properly because it would try and use
the RDMA code.

> ---
>  migration/qemu-file.c |  3 ++-
>  migration/rdma.c      | 12 ++++++++++++
>  2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 8acb574..a64ac3a 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -260,7 +260,8 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
>          int ret = f->hooks->save_page(f, f->opaque, block_offset,
>                                        offset, size, bytes_sent);
>          f->bytes_xfer += size;
> -        if (ret != RAM_SAVE_CONTROL_DELAYED) {
> +        if (ret != RAM_SAVE_CONTROL_DELAYED &&
> +            ret != RAM_SAVE_CONTROL_NOT_SUPP) {

What about f->bytes_xfer in this case?

Is there anything we have to do at the switchover into postcopy to make
sure that all pages have been received?

Dave

>              if (bytes_sent && *bytes_sent > 0) {
>                  qemu_update_position(f, *bytes_sent);
>              } else if (ret < 0) {
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 81be482..8529ddd 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -2964,6 +2964,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
>  
>      CHECK_ERROR_STATE();
>  
> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> +        return RAM_SAVE_CONTROL_NOT_SUPP;
> +    }
> +
>      qemu_fflush(f);
>  
>      if (size > 0) {
> @@ -3528,6 +3532,10 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
>  
>      CHECK_ERROR_STATE();
>  
> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> +        return 0;
> +    }
> +
>      trace_qemu_rdma_registration_start(flags);
>      qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
>      qemu_fflush(f);
> @@ -3550,6 +3558,10 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
>  
>      CHECK_ERROR_STATE();
>  
> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> +        return 0;
> +    }
> +
>      qemu_fflush(f);
>      ret = qemu_rdma_drain_cq(f, rdma);
>  
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
858585 jemmy April 12, 2018, 6:50 a.m. UTC | #2
On Wed, Apr 11, 2018 at 11:56 PM, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
> * Lidong Chen (jemmy858585@gmail.com) wrote:
>> RDMA write operations are performed with no notification to the destination
>> qemu, then the destination qemu can not wakeup. So disable RDMA WRITE after
>> postcopy started.
>>
>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>
> This patch needs to be near the beginning of the series; at the moment a
> bisect would lead you to the middle of the series which had return
> paths, but then would fail to work properly because it would try and use
> the RDMA code.

I will fix this problem in next version.

>
>> ---
>>  migration/qemu-file.c |  3 ++-
>>  migration/rdma.c      | 12 ++++++++++++
>>  2 files changed, 14 insertions(+), 1 deletion(-)
>>
>> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
>> index 8acb574..a64ac3a 100644
>> --- a/migration/qemu-file.c
>> +++ b/migration/qemu-file.c
>> @@ -260,7 +260,8 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
>>          int ret = f->hooks->save_page(f, f->opaque, block_offset,
>>                                        offset, size, bytes_sent);
>>          f->bytes_xfer += size;
>> -        if (ret != RAM_SAVE_CONTROL_DELAYED) {
>> +        if (ret != RAM_SAVE_CONTROL_DELAYED &&
>> +            ret != RAM_SAVE_CONTROL_NOT_SUPP) {
>
> What about f->bytes_xfer in this case?

f->bytes_xfer should not update when RAM_SAVE_CONTROL_NOT_SUPP.
I will fix this problem in next version.

>
> Is there anything we have to do at the switchover into postcopy to make
> sure that all pages have been received?

ram_save_iterate invoke ram_control_after_iterate(f, RAM_CONTROL_ROUND),
so before next iteration which switchover into postcopy, all the pages
sent by previous
iteration have been received.

>
> Dave
>
>>              if (bytes_sent && *bytes_sent > 0) {
>>                  qemu_update_position(f, *bytes_sent);
>>              } else if (ret < 0) {
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index 81be482..8529ddd 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -2964,6 +2964,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
>>
>>      CHECK_ERROR_STATE();
>>
>> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
>> +        return RAM_SAVE_CONTROL_NOT_SUPP;
>> +    }
>> +
>>      qemu_fflush(f);
>>
>>      if (size > 0) {
>> @@ -3528,6 +3532,10 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
>>
>>      CHECK_ERROR_STATE();
>>
>> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
>> +        return 0;
>> +    }
>> +
>>      trace_qemu_rdma_registration_start(flags);
>>      qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
>>      qemu_fflush(f);
>> @@ -3550,6 +3558,10 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
>>
>>      CHECK_ERROR_STATE();
>>
>> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
>> +        return 0;
>> +    }
>> +
>>      qemu_fflush(f);
>>      ret = qemu_rdma_drain_cq(f, rdma);
>>
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert April 12, 2018, 6:55 p.m. UTC | #3
* 858585 jemmy (jemmy858585@gmail.com) wrote:
> On Wed, Apr 11, 2018 at 11:56 PM, Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> > * Lidong Chen (jemmy858585@gmail.com) wrote:
> >> RDMA write operations are performed with no notification to the destination
> >> qemu, then the destination qemu can not wakeup. So disable RDMA WRITE after
> >> postcopy started.
> >>
> >> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> >
> > This patch needs to be near the beginning of the series; at the moment a
> > bisect would lead you to the middle of the series which had return
> > paths, but then would fail to work properly because it would try and use
> > the RDMA code.
> 
> I will fix this problem in next version.
> 
> >
> >> ---
> >>  migration/qemu-file.c |  3 ++-
> >>  migration/rdma.c      | 12 ++++++++++++
> >>  2 files changed, 14 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> >> index 8acb574..a64ac3a 100644
> >> --- a/migration/qemu-file.c
> >> +++ b/migration/qemu-file.c
> >> @@ -260,7 +260,8 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
> >>          int ret = f->hooks->save_page(f, f->opaque, block_offset,
> >>                                        offset, size, bytes_sent);
> >>          f->bytes_xfer += size;
> >> -        if (ret != RAM_SAVE_CONTROL_DELAYED) {
> >> +        if (ret != RAM_SAVE_CONTROL_DELAYED &&
> >> +            ret != RAM_SAVE_CONTROL_NOT_SUPP) {
> >
> > What about f->bytes_xfer in this case?
> 
> f->bytes_xfer should not update when RAM_SAVE_CONTROL_NOT_SUPP.
> I will fix this problem in next version.
> 
> >
> > Is there anything we have to do at the switchover into postcopy to make
> > sure that all pages have been received?
> 
> ram_save_iterate invoke ram_control_after_iterate(f, RAM_CONTROL_ROUND),
> so before next iteration which switchover into postcopy, all the pages
> sent by previous
> iteration have been received.

OK, great.

Dave

> >
> > Dave
> >
> >>              if (bytes_sent && *bytes_sent > 0) {
> >>                  qemu_update_position(f, *bytes_sent);
> >>              } else if (ret < 0) {
> >> diff --git a/migration/rdma.c b/migration/rdma.c
> >> index 81be482..8529ddd 100644
> >> --- a/migration/rdma.c
> >> +++ b/migration/rdma.c
> >> @@ -2964,6 +2964,10 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
> >>
> >>      CHECK_ERROR_STATE();
> >>
> >> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> >> +        return RAM_SAVE_CONTROL_NOT_SUPP;
> >> +    }
> >> +
> >>      qemu_fflush(f);
> >>
> >>      if (size > 0) {
> >> @@ -3528,6 +3532,10 @@ static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
> >>
> >>      CHECK_ERROR_STATE();
> >>
> >> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> >> +        return 0;
> >> +    }
> >> +
> >>      trace_qemu_rdma_registration_start(flags);
> >>      qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
> >>      qemu_fflush(f);
> >> @@ -3550,6 +3558,10 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
> >>
> >>      CHECK_ERROR_STATE();
> >>
> >> +    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> >> +        return 0;
> >> +    }
> >> +
> >>      qemu_fflush(f);
> >>      ret = qemu_rdma_drain_cq(f, rdma);
> >>
> >> --
> >> 1.8.3.1
> >>
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 8acb574..a64ac3a 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -260,7 +260,8 @@  size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
         int ret = f->hooks->save_page(f, f->opaque, block_offset,
                                       offset, size, bytes_sent);
         f->bytes_xfer += size;
-        if (ret != RAM_SAVE_CONTROL_DELAYED) {
+        if (ret != RAM_SAVE_CONTROL_DELAYED &&
+            ret != RAM_SAVE_CONTROL_NOT_SUPP) {
             if (bytes_sent && *bytes_sent > 0) {
                 qemu_update_position(f, *bytes_sent);
             } else if (ret < 0) {
diff --git a/migration/rdma.c b/migration/rdma.c
index 81be482..8529ddd 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2964,6 +2964,10 @@  static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
 
     CHECK_ERROR_STATE();
 
+    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+        return RAM_SAVE_CONTROL_NOT_SUPP;
+    }
+
     qemu_fflush(f);
 
     if (size > 0) {
@@ -3528,6 +3532,10 @@  static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
 
     CHECK_ERROR_STATE();
 
+    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+        return 0;
+    }
+
     trace_qemu_rdma_registration_start(flags);
     qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
     qemu_fflush(f);
@@ -3550,6 +3558,10 @@  static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
 
     CHECK_ERROR_STATE();
 
+    if (migrate_get_current()->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
+        return 0;
+    }
+
     qemu_fflush(f);
     ret = qemu_rdma_drain_cq(f, rdma);