diff mbox series

[v2,15/16] migration/rdma: Simplify the function that saves a page

Message ID 20230515195709.63843-16-quintela@redhat.com
State New
Headers show
Series Migration: More migration atomic counters | expand

Commit Message

Juan Quintela May 15, 2023, 7:57 p.m. UTC
When we sent a page through QEMUFile hooks (RDMA) there are three
posiblities:
- We are not using RDMA. return RAM_SAVE_CONTROL_DELAYED and
  control_save_page() returns false to let anything else to proceed.
- There is one error but we are using RDMA.  Then we return a negative
  value, control_save_page() needs to return true.
- Everything goes well and RDMA start the sent of the page
  asynchronously.  It returns RAM_SAVE_CONTROL_DELAYED and we need to
  return 1 for ram_save_page_legacy.

Clear?

I know, I know, the interfaz is as bad as it gets.  I think that now
it is a bit clearer, but this needs to be done some other way.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 migration/qemu-file.h | 14 ++++++--------
 migration/qemu-file.c | 12 ++++++------
 migration/ram.c       | 10 +++-------
 migration/rdma.c      | 19 +++----------------
 4 files changed, 18 insertions(+), 37 deletions(-)

Comments

Leonardo Bras May 25, 2023, 8:10 a.m. UTC | #1
On Mon, 2023-05-15 at 21:57 +0200, Juan Quintela wrote:
> When we sent a page through QEMUFile hooks (RDMA) there are three
> posiblities:
> - We are not using RDMA. return RAM_SAVE_CONTROL_DELAYED and
>   control_save_page() returns false to let anything else to proceed.
> - There is one error but we are using RDMA.  Then we return a negative
>   value, control_save_page() needs to return true.
> - Everything goes well and RDMA start the sent of the page
>   asynchronously.  It returns RAM_SAVE_CONTROL_DELAYED and we need to
>   return 1 for ram_save_page_legacy.
> 
> Clear?
> 
> I know, I know, the interfaz is as bad as it gets.  I think that now
> it is a bit clearer, but this needs to be done some other way.

interface?

> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> ---
>  migration/qemu-file.h | 14 ++++++--------
>  migration/qemu-file.c | 12 ++++++------
>  migration/ram.c       | 10 +++-------
>  migration/rdma.c      | 19 +++----------------
>  4 files changed, 18 insertions(+), 37 deletions(-)
> 
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 37f42315c7..ed77996201 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -49,11 +49,10 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, uint64_t flags, void *data);
>   * This function allows override of where the RAM page
>   * is saved (such as RDMA, for example.)
>   */
> -typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
> -                                 ram_addr_t block_offset,
> -                                 ram_addr_t offset,
> -                                 size_t size,
> -                                 uint64_t *bytes_sent);
> +typedef int (QEMURamSaveFunc)(QEMUFile *f,
> +                              ram_addr_t block_offset,
> +                              ram_addr_t offset,
> +                              size_t size);
>  
>  typedef struct QEMUFileHooks {
>      QEMURamHookFunc *before_ram_iterate;
> @@ -146,9 +145,8 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
>  #define RAM_SAVE_CONTROL_NOT_SUPP -1000
>  #define RAM_SAVE_CONTROL_DELAYED  -2000
>  
> -size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
> -                             ram_addr_t offset, size_t size,
> -                             uint64_t *bytes_sent);
> +int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
> +                          ram_addr_t offset, size_t size);
>  QIOChannel *qemu_file_get_ioc(QEMUFile *file);
>  
>  #endif
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 72e130631d..32ef5e9651 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -336,14 +336,14 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
>      }
>  }
>  
> -size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
> -                             ram_addr_t offset, size_t size,
> -                             uint64_t *bytes_sent)
> +int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
> +                          ram_addr_t offset, size_t size)
>  {
>      if (f->hooks && f->hooks->save_page) {
> -        int ret = f->hooks->save_page(f, block_offset,
> -                                      offset, size, bytes_sent);
> -
> +        int ret = f->hooks->save_page(f, block_offset, offset, size);
> +        /*
> +         * RAM_SAVE_CONTROL_* are negative values
> +         */
>          if (ret != RAM_SAVE_CONTROL_DELAYED &&
>              ret != RAM_SAVE_CONTROL_NOT_SUPP) {
>              if (ret < 0) {
> diff --git a/migration/ram.c b/migration/ram.c
> index 2d3927a15f..f9fcbb3bb8 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1173,23 +1173,19 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block,
>  static bool control_save_page(PageSearchStatus *pss, RAMBlock *block,
>                                ram_addr_t offset, int *pages)
>  {
> -    uint64_t bytes_xmit = 0;
>      int ret;
>  
> -    *pages = -1;
>      ret = ram_control_save_page(pss->pss_channel, block->offset, offset,
> -                                TARGET_PAGE_SIZE, &bytes_xmit);
> +                                TARGET_PAGE_SIZE);
>      if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
>          return false;
>      }
>  
> -    if (bytes_xmit) {
> -        *pages = 1;
> -    }
> -
>      if (ret == RAM_SAVE_CONTROL_DELAYED) {
> +        *pages = 1;
>          return true;
>      }
> +    *pages = ret;
>      return true;
>  }
>  
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 416dec00a2..12d3c23fdc 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -3239,13 +3239,12 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
>   *
>   *    @size : Number of bytes to transfer
>   *
> - *    @bytes_sent : User-specificed pointer to indicate how many bytes were
> + *    @pages_sent : User-specificed pointer to indicate how many pages were
>   *                  sent. Usually, this will not be more than a few bytes of
>   *                  the protocol because most transfers are sent asynchronously.
>   */

There is new doc to pages_sent but the parameter is not added to the signature
bellow. Am I missing something?

> -static size_t qemu_rdma_save_page(QEMUFile *f,
> -                                  ram_addr_t block_offset, ram_addr_t offset,
> -                                  size_t size, uint64_t *bytes_sent)
> +static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset,
> +                               ram_addr_t offset, size_t size)
>  {
>      QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
>      RDMAContext *rdma;
> @@ -3277,18 +3276,6 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>          goto err;
>      }
>  
> -    /*
> -     * We always return 1 bytes because the RDMA
> -     * protocol is completely asynchronous. We do not yet know
> -     * whether an  identified chunk is zero or not because we're
> -     * waiting for other pages to potentially be merged with
> -     * the current chunk. So, we have to call qemu_update_position()
> -     * later on when the actual write occurs.
> -     */
> -    if (bytes_sent) {
> -        *bytes_sent = 1;
> -    }
> -
>      /*
>       * Drain the Completion Queue if possible, but do not block,
>       * just poll.

Oh, so this one complements 13/16. 
Since it doesn't do imaginary transfers anymore, there is no need to use
bytes_sent pointer to keep track of them anymore.

Other than the pages_sent above that I couldn't understand:
Reviewed-by: Leonardo Bras <leobras@redhat.com>
Juan Quintela May 26, 2023, 8:21 a.m. UTC | #2
Leonardo Brás <leobras@redhat.com> wrote:
> On Mon, 2023-05-15 at 21:57 +0200, Juan Quintela wrote:
>> When we sent a page through QEMUFile hooks (RDMA) there are three
>> posiblities:
>> - We are not using RDMA. return RAM_SAVE_CONTROL_DELAYED and
>>   control_save_page() returns false to let anything else to proceed.
>> - There is one error but we are using RDMA.  Then we return a negative
>>   value, control_save_page() needs to return true.
>> - Everything goes well and RDMA start the sent of the page
>>   asynchronously.  It returns RAM_SAVE_CONTROL_DELAYED and we need to
>>   return 1 for ram_save_page_legacy.
>> 
>> Clear?
>> 
>> I know, I know, the interfaz is as bad as it gets.  I think that now
>> it is a bit clearer, but this needs to be done some other way.
>
> interface?

Yeap.  I used the Spanish spelling, that, you know, in English is wrong O:-)

Thanks.
>> diff --git a/migration/rdma.c b/migration/rdma.c
>> index 416dec00a2..12d3c23fdc 100644
>> --- a/migration/rdma.c
>> +++ b/migration/rdma.c
>> @@ -3239,13 +3239,12 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
>>   *
>>   *    @size : Number of bytes to transfer
>>   *
>> - *    @bytes_sent : User-specificed pointer to indicate how many bytes were
>> + *    @pages_sent : User-specificed pointer to indicate how many pages were
>>   *                  sent. Usually, this will not be more than a few bytes of
>>   *                  the protocol because most transfers are sent asynchronously.
>>   */
>
> There is new doc to pages_sent but the parameter is not added to the signature
> bellow. Am I missing something?

Good catch.

I redid this patch several times.  And it appears that I forgot some leftovers.

>
>> -static size_t qemu_rdma_save_page(QEMUFile *f,
>> -                                  ram_addr_t block_offset, ram_addr_t offset,
>> -                                  size_t size, uint64_t *bytes_sent)
>> +static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset,
>> +                               ram_addr_t offset, size_t size)
>>  {
>>      QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
>>      RDMAContext *rdma;
>> @@ -3277,18 +3276,6 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
>>          goto err;
>>      }
>>  
>> -    /*
>> -     * We always return 1 bytes because the RDMA
>> -     * protocol is completely asynchronous. We do not yet know
>> -     * whether an  identified chunk is zero or not because we're
>> -     * waiting for other pages to potentially be merged with
>> -     * the current chunk. So, we have to call qemu_update_position()
>> -     * later on when the actual write occurs.
>> -     */
>> -    if (bytes_sent) {
>> -        *bytes_sent = 1;
>> -    }
>> -
>>      /*
>>       * Drain the Completion Queue if possible, but do not block,
>>       * just poll.
>
> Oh, so this one complements 13/16. 
> Since it doesn't do imaginary transfers anymore, there is no need to use
> bytes_sent pointer to keep track of them anymore.
>
> Other than the pages_sent above that I couldn't understand:
> Reviewed-by: Leonardo Bras <leobras@redhat.com>

Dropping that bit.

Thanks.
Leonardo Bras May 26, 2023, 7:03 p.m. UTC | #3
On Fri, May 26, 2023 at 5:21 AM Juan Quintela <quintela@redhat.com> wrote:
>
> Leonardo Brás <leobras@redhat.com> wrote:
> > On Mon, 2023-05-15 at 21:57 +0200, Juan Quintela wrote:
> >> When we sent a page through QEMUFile hooks (RDMA) there are three
> >> posiblities:
> >> - We are not using RDMA. return RAM_SAVE_CONTROL_DELAYED and
> >>   control_save_page() returns false to let anything else to proceed.
> >> - There is one error but we are using RDMA.  Then we return a negative
> >>   value, control_save_page() needs to return true.
> >> - Everything goes well and RDMA start the sent of the page
> >>   asynchronously.  It returns RAM_SAVE_CONTROL_DELAYED and we need to
> >>   return 1 for ram_save_page_legacy.
> >>
> >> Clear?
> >>
> >> I know, I know, the interfaz is as bad as it gets.  I think that now
> >> it is a bit clearer, but this needs to be done some other way.
> >
> > interface?
>
> Yeap.  I used the Spanish spelling, that, you know, in English is wrong O:-)

Happens to me all the time :)

>
> Thanks.
> >> diff --git a/migration/rdma.c b/migration/rdma.c
> >> index 416dec00a2..12d3c23fdc 100644
> >> --- a/migration/rdma.c
> >> +++ b/migration/rdma.c
> >> @@ -3239,13 +3239,12 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
> >>   *
> >>   *    @size : Number of bytes to transfer
> >>   *
> >> - *    @bytes_sent : User-specificed pointer to indicate how many bytes were
> >> + *    @pages_sent : User-specificed pointer to indicate how many pages were
> >>   *                  sent. Usually, this will not be more than a few bytes of
> >>   *                  the protocol because most transfers are sent asynchronously.
> >>   */
> >
> > There is new doc to pages_sent but the parameter is not added to the signature
> > bellow. Am I missing something?
>
> Good catch.

:)

>
> I redid this patch several times.  And it appears that I forgot some leftovers.
>
> >
> >> -static size_t qemu_rdma_save_page(QEMUFile *f,
> >> -                                  ram_addr_t block_offset, ram_addr_t offset,
> >> -                                  size_t size, uint64_t *bytes_sent)
> >> +static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset,
> >> +                               ram_addr_t offset, size_t size)
> >>  {
> >>      QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
> >>      RDMAContext *rdma;
> >> @@ -3277,18 +3276,6 @@ static size_t qemu_rdma_save_page(QEMUFile *f,
> >>          goto err;
> >>      }
> >>
> >> -    /*
> >> -     * We always return 1 bytes because the RDMA
> >> -     * protocol is completely asynchronous. We do not yet know
> >> -     * whether an  identified chunk is zero or not because we're
> >> -     * waiting for other pages to potentially be merged with
> >> -     * the current chunk. So, we have to call qemu_update_position()
> >> -     * later on when the actual write occurs.
> >> -     */
> >> -    if (bytes_sent) {
> >> -        *bytes_sent = 1;
> >> -    }
> >> -
> >>      /*
> >>       * Drain the Completion Queue if possible, but do not block,
> >>       * just poll.
> >
> > Oh, so this one complements 13/16.
> > Since it doesn't do imaginary transfers anymore, there is no need to use
> > bytes_sent pointer to keep track of them anymore.
> >
> > Other than the pages_sent above that I couldn't understand:
> > Reviewed-by: Leonardo Bras <leobras@redhat.com>
>
> Dropping that bit.
>
> Thanks.
>
diff mbox series

Patch

diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 37f42315c7..ed77996201 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -49,11 +49,10 @@  typedef int (QEMURamHookFunc)(QEMUFile *f, uint64_t flags, void *data);
  * This function allows override of where the RAM page
  * is saved (such as RDMA, for example.)
  */
-typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
-                                 ram_addr_t block_offset,
-                                 ram_addr_t offset,
-                                 size_t size,
-                                 uint64_t *bytes_sent);
+typedef int (QEMURamSaveFunc)(QEMUFile *f,
+                              ram_addr_t block_offset,
+                              ram_addr_t offset,
+                              size_t size);
 
 typedef struct QEMUFileHooks {
     QEMURamHookFunc *before_ram_iterate;
@@ -146,9 +145,8 @@  void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
 #define RAM_SAVE_CONTROL_NOT_SUPP -1000
 #define RAM_SAVE_CONTROL_DELAYED  -2000
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                             ram_addr_t offset, size_t size,
-                             uint64_t *bytes_sent);
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                          ram_addr_t offset, size_t size);
 QIOChannel *qemu_file_get_ioc(QEMUFile *file);
 
 #endif
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 72e130631d..32ef5e9651 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -336,14 +336,14 @@  void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
     }
 }
 
-size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
-                             ram_addr_t offset, size_t size,
-                             uint64_t *bytes_sent)
+int ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+                          ram_addr_t offset, size_t size)
 {
     if (f->hooks && f->hooks->save_page) {
-        int ret = f->hooks->save_page(f, block_offset,
-                                      offset, size, bytes_sent);
-
+        int ret = f->hooks->save_page(f, block_offset, offset, size);
+        /*
+         * RAM_SAVE_CONTROL_* are negative values
+         */
         if (ret != RAM_SAVE_CONTROL_DELAYED &&
             ret != RAM_SAVE_CONTROL_NOT_SUPP) {
             if (ret < 0) {
diff --git a/migration/ram.c b/migration/ram.c
index 2d3927a15f..f9fcbb3bb8 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1173,23 +1173,19 @@  static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block,
 static bool control_save_page(PageSearchStatus *pss, RAMBlock *block,
                               ram_addr_t offset, int *pages)
 {
-    uint64_t bytes_xmit = 0;
     int ret;
 
-    *pages = -1;
     ret = ram_control_save_page(pss->pss_channel, block->offset, offset,
-                                TARGET_PAGE_SIZE, &bytes_xmit);
+                                TARGET_PAGE_SIZE);
     if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
         return false;
     }
 
-    if (bytes_xmit) {
-        *pages = 1;
-    }
-
     if (ret == RAM_SAVE_CONTROL_DELAYED) {
+        *pages = 1;
         return true;
     }
+    *pages = ret;
     return true;
 }
 
diff --git a/migration/rdma.c b/migration/rdma.c
index 416dec00a2..12d3c23fdc 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3239,13 +3239,12 @@  qio_channel_rdma_shutdown(QIOChannel *ioc,
  *
  *    @size : Number of bytes to transfer
  *
- *    @bytes_sent : User-specificed pointer to indicate how many bytes were
+ *    @pages_sent : User-specificed pointer to indicate how many pages were
  *                  sent. Usually, this will not be more than a few bytes of
  *                  the protocol because most transfers are sent asynchronously.
  */
-static size_t qemu_rdma_save_page(QEMUFile *f,
-                                  ram_addr_t block_offset, ram_addr_t offset,
-                                  size_t size, uint64_t *bytes_sent)
+static int qemu_rdma_save_page(QEMUFile *f, ram_addr_t block_offset,
+                               ram_addr_t offset, size_t size)
 {
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
     RDMAContext *rdma;
@@ -3277,18 +3276,6 @@  static size_t qemu_rdma_save_page(QEMUFile *f,
         goto err;
     }
 
-    /*
-     * We always return 1 bytes because the RDMA
-     * protocol is completely asynchronous. We do not yet know
-     * whether an  identified chunk is zero or not because we're
-     * waiting for other pages to potentially be merged with
-     * the current chunk. So, we have to call qemu_update_position()
-     * later on when the actual write occurs.
-     */
-    if (bytes_sent) {
-        *bytes_sent = 1;
-    }
-
     /*
      * Drain the Completion Queue if possible, but do not block,
      * just poll.