diff mbox series

[v4,14/25] memory: Add Error** argument to the global_dirty_log routines

Message ID 20240306133441.2351700-15-clg@redhat.com
State New
Headers show
Series migration: Improve error reporting | expand

Commit Message

Cédric Le Goater March 6, 2024, 1:34 p.m. UTC
Now that the log_global*() handlers take an Error** parameter and
return a bool, do the same for memory_global_dirty_log_start() and
memory_global_dirty_log_stop(). The error is reported in the callers
for now and it will be propagated in the call stack in the next
changes.

To be noted a functional change in ram_init_bitmaps(), if the dirty
pages logger fails to start, there is no need to synchronize the dirty
pages bitmaps. colo_incoming_start_dirty_log() could be modified in a
similar way.

Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
Cc: Paul Durrant <paul@xen.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hyman Huang <yong.huang@smartx.com>
Reviewed-by: Hyman Huang <yong.huang@smartx.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---

 Changes in v4:

 - Dropped log_global_stop() and log_global_sync() changes
 
 include/exec/memory.h |  5 ++++-
 hw/i386/xen/xen-hvm.c |  2 +-
 migration/dirtyrate.c | 13 +++++++++++--
 migration/ram.c       | 22 ++++++++++++++++++++--
 system/memory.c       | 11 +++++------
 5 files changed, 41 insertions(+), 12 deletions(-)

Comments

Peter Xu March 15, 2024, 11:34 a.m. UTC | #1
On Wed, Mar 06, 2024 at 02:34:29PM +0100, Cédric Le Goater wrote:
> Now that the log_global*() handlers take an Error** parameter and
> return a bool, do the same for memory_global_dirty_log_start() and
> memory_global_dirty_log_stop(). The error is reported in the callers
> for now and it will be propagated in the call stack in the next
> changes.
> 
> To be noted a functional change in ram_init_bitmaps(), if the dirty
> pages logger fails to start, there is no need to synchronize the dirty
> pages bitmaps. colo_incoming_start_dirty_log() could be modified in a
> similar way.
> 
> Cc: Stefano Stabellini <sstabellini@kernel.org>
> Cc: Anthony Perard <anthony.perard@citrix.com>
> Cc: Paul Durrant <paul@xen.org>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Hyman Huang <yong.huang@smartx.com>
> Reviewed-by: Hyman Huang <yong.huang@smartx.com>
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
> 
>  Changes in v4:
> 
>  - Dropped log_global_stop() and log_global_sync() changes
>  
>  include/exec/memory.h |  5 ++++-
>  hw/i386/xen/xen-hvm.c |  2 +-
>  migration/dirtyrate.c | 13 +++++++++++--
>  migration/ram.c       | 22 ++++++++++++++++++++--
>  system/memory.c       | 11 +++++------
>  5 files changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 5555567bc4c9fdb53e8f63487f1400980275687d..c129ee6db7162504bd72d4cfc69b5affb2cd87e8 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -2570,8 +2570,11 @@ void memory_listener_unregister(MemoryListener *listener);
>   * memory_global_dirty_log_start: begin dirty logging for all regions
>   *
>   * @flags: purpose of starting dirty log, migration or dirty rate
> + * @errp: pointer to Error*, to store an error if it happens.
> + *
> + * Return: true on success, else false setting @errp with error.
>   */
> -void memory_global_dirty_log_start(unsigned int flags);
> +bool memory_global_dirty_log_start(unsigned int flags, Error **errp);
>  
>  /**
>   * memory_global_dirty_log_stop: end dirty logging for all regions
> diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
> index 0608ca99f5166fd6379ee674442484e805eff9c0..57cb7df50788a6c31eff68c95e8eaa856fdebede 100644
> --- a/hw/i386/xen/xen-hvm.c
> +++ b/hw/i386/xen/xen-hvm.c
> @@ -654,7 +654,7 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
>  void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
>  {
>      if (enable) {
> -        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
> +        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
>      } else {
>          memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
>      }
> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> index 1d2e85746fb7b10eb7f149976970f9a92125af8a..d02d70b7b4b86a29d4d5540ded416543536d8f98 100644
> --- a/migration/dirtyrate.c
> +++ b/migration/dirtyrate.c
> @@ -90,9 +90,15 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
>  
>  void global_dirty_log_change(unsigned int flag, bool start)
>  {
> +    Error *local_err = NULL;
> +    bool ret;
> +
>      bql_lock();
>      if (start) {
> -        memory_global_dirty_log_start(flag);
> +        ret = memory_global_dirty_log_start(flag, &local_err);
> +        if (!ret) {
> +            error_report_err(local_err);
> +        }
>      } else {
>          memory_global_dirty_log_stop(flag);
>      }
> @@ -608,9 +614,12 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
>  {
>      int64_t start_time;
>      DirtyPageRecord dirty_pages;
> +    Error *local_err = NULL;
>  
>      bql_lock();
> -    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
> +    if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
> +        error_report_err(local_err);
> +    }
>  
>      /*
>       * 1'round of log sync may return all 1 bits with
> diff --git a/migration/ram.c b/migration/ram.c
> index c5149b7d717aefad7f590422af0ea4a40e7507be..397b4c0f218a66d194e44f9c5f9fe8e9885c48b6 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2836,18 +2836,31 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
>  
>  static void ram_init_bitmaps(RAMState *rs)
>  {
> +    Error *local_err = NULL;
> +    bool ret = true;
> +
>      qemu_mutex_lock_ramlist();
>  
>      WITH_RCU_READ_LOCK_GUARD() {
>          ram_list_init_bitmaps();
>          /* We don't use dirty log with background snapshots */
>          if (!migrate_background_snapshot()) {
> -            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
> +            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
> +                                                &local_err);
> +            if (!ret) {
> +                error_report_err(local_err);
> +                goto out_unlock;

Here we may need to free the bitmaps created in ram_list_init_bitmaps().

We can have a helper ram_bitmaps_destroy() for that.

One thing be careful is the new file_bmap can be created but missing in the
ram_save_cleanup(), it's because it's freed earlier.  IMHO if we will have
a new ram_bitmaps_destroy() we can unconditionally free file_bmap there
too, as if it's freed early g_free() is noop.

> +            }
>              migration_bitmap_sync_precopy(rs, false);
>          }
>      }
> +out_unlock:
>      qemu_mutex_unlock_ramlist();
>  
> +    if (!ret) {
> +        return;
> +    }
> +
>      /*
>       * After an eventual first bitmap sync, fixup the initial bitmap
>       * containing all 1s to exclude any discarded pages from migration.
> @@ -3631,6 +3644,8 @@ int colo_init_ram_cache(void)
>  void colo_incoming_start_dirty_log(void)
>  {
>      RAMBlock *block = NULL;
> +    Error *local_err = NULL;
> +
>      /* For memory_global_dirty_log_start below. */
>      bql_lock();
>      qemu_mutex_lock_ramlist();
> @@ -3642,7 +3657,10 @@ void colo_incoming_start_dirty_log(void)
>              /* Discard this dirty bitmap record */
>              bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
>          }
> -        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
> +        if (!memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
> +                                           &local_err)) {
> +            error_report_err(local_err);
> +        }
>      }
>      ram_state->migration_dirty_pages = 0;
>      qemu_mutex_unlock_ramlist();
> diff --git a/system/memory.c b/system/memory.c
> index 3600e716149407c10a1f6bf8f0a81c2611cf15ba..cbc098216b789f50460f1d1bc7ec122030693d9e 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -2931,10 +2931,9 @@ static void memory_global_dirty_log_rollback(MemoryListener *listener,
>      }
>  }
>  
> -void memory_global_dirty_log_start(unsigned int flags)
> +bool memory_global_dirty_log_start(unsigned int flags, Error **errp)
>  {
>      unsigned int old_flags;
> -    Error *local_err = NULL;
>  
>      assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
>  
> @@ -2946,7 +2945,7 @@ void memory_global_dirty_log_start(unsigned int flags)
>  
>      flags &= ~global_dirty_tracking;
>      if (!flags) {
> -        return;
> +        return true;
>      }
>  
>      old_flags = global_dirty_tracking;
> @@ -2959,7 +2958,7 @@ void memory_global_dirty_log_start(unsigned int flags)
>  
>          QTAILQ_FOREACH(listener, &memory_listeners, link) {
>              if (listener->log_global_start) {
> -                ret = listener->log_global_start(listener, &local_err);
> +                ret = listener->log_global_start(listener, errp);
>                  if (!ret) {
>                      break;
>                  }
> @@ -2969,14 +2968,14 @@ void memory_global_dirty_log_start(unsigned int flags)
>          if (!ret) {
>              memory_global_dirty_log_rollback(QTAILQ_PREV(listener, link),
>                                               flags);
> -            error_report_err(local_err);
> -            return;
> +            return false;
>          }
>  
>          memory_region_transaction_begin();
>          memory_region_update_pending = true;
>          memory_region_transaction_commit();
>      }
> +    return true;
>  }
>  
>  static void memory_global_dirty_log_do_stop(unsigned int flags)
> -- 
> 2.44.0
>
Yong Huang March 16, 2024, 2:41 a.m. UTC | #2
On Wed, Mar 6, 2024 at 9:35 PM Cédric Le Goater <clg@redhat.com> wrote:

> Now that the log_global*() handlers take an Error** parameter and
> return a bool, do the same for memory_global_dirty_log_start() and
> memory_global_dirty_log_stop(). The error is reported in the callers
> for now and it will be propagated in the call stack in the next
> changes.


> To be noted a functional change in ram_init_bitmaps(), if the dirty
>
Hi, Cédric Le Goater. Could the functional modification be made
separately from the patch? And my "Reviewed-by" is attached
to the first patch that refines memory_global_dirty_log_start's
function declaration.


> pages logger fails to start, there is no need to synchronize the dirty
> pages bitmaps. colo_incoming_start_dirty_log() could be modified in a
> similar way.
>
> Cc: Stefano Stabellini <sstabellini@kernel.org>
> Cc: Anthony Perard <anthony.perard@citrix.com>
> Cc: Paul Durrant <paul@xen.org>
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Hyman Huang <yong.huang@smartx.com>
> Reviewed-by: Hyman Huang <yong.huang@smartx.com>
> Signed-off-by: Cédric Le Goater <clg@redhat.com>
> ---
>
>  Changes in v4:
>
>  - Dropped log_global_stop() and log_global_sync() changes
>
>  include/exec/memory.h |  5 ++++-
>  hw/i386/xen/xen-hvm.c |  2 +-
>  migration/dirtyrate.c | 13 +++++++++++--
>  migration/ram.c       | 22 ++++++++++++++++++++--
>  system/memory.c       | 11 +++++------
>  5 files changed, 41 insertions(+), 12 deletions(-)
>
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index
> 5555567bc4c9fdb53e8f63487f1400980275687d..c129ee6db7162504bd72d4cfc69b5affb2cd87e8
> 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -2570,8 +2570,11 @@ void memory_listener_unregister(MemoryListener
> *listener);
>   * memory_global_dirty_log_start: begin dirty logging for all regions
>   *
>   * @flags: purpose of starting dirty log, migration or dirty rate
> + * @errp: pointer to Error*, to store an error if it happens.
> + *
> + * Return: true on success, else false setting @errp with error.
>   */
> -void memory_global_dirty_log_start(unsigned int flags);
> +bool memory_global_dirty_log_start(unsigned int flags, Error **errp);
>
>  /**
>   * memory_global_dirty_log_stop: end dirty logging for all regions
> diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
> index
> 0608ca99f5166fd6379ee674442484e805eff9c0..57cb7df50788a6c31eff68c95e8eaa856fdebede
> 100644
> --- a/hw/i386/xen/xen-hvm.c
> +++ b/hw/i386/xen/xen-hvm.c
> @@ -654,7 +654,7 @@ void xen_hvm_modified_memory(ram_addr_t start,
> ram_addr_t length)
>  void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
>  {
>      if (enable) {
> -        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
> +        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
>      } else {
>          memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
>      }
> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> index
> 1d2e85746fb7b10eb7f149976970f9a92125af8a..d02d70b7b4b86a29d4d5540ded416543536d8f98
> 100644
> --- a/migration/dirtyrate.c
> +++ b/migration/dirtyrate.c
> @@ -90,9 +90,15 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord
> dirty_pages,
>
>  void global_dirty_log_change(unsigned int flag, bool start)
>  {
> +    Error *local_err = NULL;
> +    bool ret;
> +
>      bql_lock();
>      if (start) {
> -        memory_global_dirty_log_start(flag);
> +        ret = memory_global_dirty_log_start(flag, &local_err);
> +        if (!ret) {
> +            error_report_err(local_err);
> +        }
>      } else {
>          memory_global_dirty_log_stop(flag);
>      }
> @@ -608,9 +614,12 @@ static void calculate_dirtyrate_dirty_bitmap(struct
> DirtyRateConfig config)
>  {
>      int64_t start_time;
>      DirtyPageRecord dirty_pages;
> +    Error *local_err = NULL;
>
>      bql_lock();
> -    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
> +    if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE,
> &local_err)) {
> +        error_report_err(local_err);
> +    }
>
>      /*
>       * 1'round of log sync may return all 1 bits with
> diff --git a/migration/ram.c b/migration/ram.c
> index
> c5149b7d717aefad7f590422af0ea4a40e7507be..397b4c0f218a66d194e44f9c5f9fe8e9885c48b6
> 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2836,18 +2836,31 @@ static void
> migration_bitmap_clear_discarded_pages(RAMState *rs)
>
>  static void ram_init_bitmaps(RAMState *rs)
>  {
> +    Error *local_err = NULL;
> +    bool ret = true;
> +
>      qemu_mutex_lock_ramlist();
>
>      WITH_RCU_READ_LOCK_GUARD() {
>          ram_list_init_bitmaps();
>          /* We don't use dirty log with background snapshots */
>          if (!migrate_background_snapshot()) {
> -            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
> +            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
> +                                                &local_err);
> +            if (!ret) {
> +                error_report_err(local_err);
> +                goto out_unlock;
> +            }
>              migration_bitmap_sync_precopy(rs, false);
>          }
>      }
> +out_unlock:
>      qemu_mutex_unlock_ramlist();
>
> +    if (!ret) {
> +        return;
> +    }
> +
>      /*
>       * After an eventual first bitmap sync, fixup the initial bitmap
>       * containing all 1s to exclude any discarded pages from migration.
> @@ -3631,6 +3644,8 @@ int colo_init_ram_cache(void)
>  void colo_incoming_start_dirty_log(void)
>  {
>      RAMBlock *block = NULL;
> +    Error *local_err = NULL;
> +
>      /* For memory_global_dirty_log_start below. */
>      bql_lock();
>      qemu_mutex_lock_ramlist();
> @@ -3642,7 +3657,10 @@ void colo_incoming_start_dirty_log(void)
>              /* Discard this dirty bitmap record */
>              bitmap_zero(block->bmap, block->max_length >>
> TARGET_PAGE_BITS);
>          }
> -        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
> +        if (!memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
> +                                           &local_err)) {
> +            error_report_err(local_err);
> +        }
>      }
>      ram_state->migration_dirty_pages = 0;
>      qemu_mutex_unlock_ramlist();
> diff --git a/system/memory.c b/system/memory.c
> index
> 3600e716149407c10a1f6bf8f0a81c2611cf15ba..cbc098216b789f50460f1d1bc7ec122030693d9e
> 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -2931,10 +2931,9 @@ static void
> memory_global_dirty_log_rollback(MemoryListener *listener,
>      }
>  }
>
> -void memory_global_dirty_log_start(unsigned int flags)
> +bool memory_global_dirty_log_start(unsigned int flags, Error **errp)
>  {
>      unsigned int old_flags;
> -    Error *local_err = NULL;
>
>      assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
>
> @@ -2946,7 +2945,7 @@ void memory_global_dirty_log_start(unsigned int
> flags)
>
>      flags &= ~global_dirty_tracking;
>      if (!flags) {
> -        return;
> +        return true;
>      }
>
>      old_flags = global_dirty_tracking;
> @@ -2959,7 +2958,7 @@ void memory_global_dirty_log_start(unsigned int
> flags)
>
>          QTAILQ_FOREACH(listener, &memory_listeners, link) {
>              if (listener->log_global_start) {
> -                ret = listener->log_global_start(listener, &local_err);
> +                ret = listener->log_global_start(listener, errp);
>                  if (!ret) {
>                      break;
>                  }
> @@ -2969,14 +2968,14 @@ void memory_global_dirty_log_start(unsigned int
> flags)
>          if (!ret) {
>              memory_global_dirty_log_rollback(QTAILQ_PREV(listener, link),
>                                               flags);
> -            error_report_err(local_err);
> -            return;
> +            return false;
>          }
>
>          memory_region_transaction_begin();
>          memory_region_update_pending = true;
>          memory_region_transaction_commit();
>      }
> +    return true;
>  }
>
>  static void memory_global_dirty_log_do_stop(unsigned int flags)
> --
> 2.44.0
>
>
Cédric Le Goater March 18, 2024, 10:43 a.m. UTC | #3
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -2836,18 +2836,31 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
>>   
>>   static void ram_init_bitmaps(RAMState *rs)
>>   {
>> +    Error *local_err = NULL;
>> +    bool ret = true;
>> +
>>       qemu_mutex_lock_ramlist();
>>   
>>       WITH_RCU_READ_LOCK_GUARD() {
>>           ram_list_init_bitmaps();
>>           /* We don't use dirty log with background snapshots */
>>           if (!migrate_background_snapshot()) {
>> -            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>> +            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
>> +                                                &local_err);
>> +            if (!ret) {
>> +                error_report_err(local_err);
>> +                goto out_unlock;
> 
> Here we may need to free the bitmaps created in ram_list_init_bitmaps().
> 
> We can have a helper ram_bitmaps_destroy() for that.
> 
> One thing be careful is the new file_bmap can be created but missing in the
> ram_save_cleanup(), it's because it's freed earlier.  IMHO if we will have
> a new ram_bitmaps_destroy() we can unconditionally free file_bmap there
> too, as if it's freed early g_free() is noop.

OK. Let's do that in a new prereq patch. I will change ram_state_init()
and xbzrle_init() to take an Error ** argument while at it.


Thanks,

C.
Cédric Le Goater March 18, 2024, 4:03 p.m. UTC | #4
On 3/15/24 12:34, Peter Xu wrote:
> On Wed, Mar 06, 2024 at 02:34:29PM +0100, Cédric Le Goater wrote:
>> Now that the log_global*() handlers take an Error** parameter and
>> return a bool, do the same for memory_global_dirty_log_start() and
>> memory_global_dirty_log_stop(). The error is reported in the callers
>> for now and it will be propagated in the call stack in the next
>> changes.
>>
>> To be noted a functional change in ram_init_bitmaps(), if the dirty
>> pages logger fails to start, there is no need to synchronize the dirty
>> pages bitmaps. colo_incoming_start_dirty_log() could be modified in a
>> similar way.
>>
>> Cc: Stefano Stabellini <sstabellini@kernel.org>
>> Cc: Anthony Perard <anthony.perard@citrix.com>
>> Cc: Paul Durrant <paul@xen.org>
>> Cc: "Michael S. Tsirkin" <mst@redhat.com>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: David Hildenbrand <david@redhat.com>
>> Cc: Hyman Huang <yong.huang@smartx.com>
>> Reviewed-by: Hyman Huang <yong.huang@smartx.com>
>> Signed-off-by: Cédric Le Goater <clg@redhat.com>
>> ---
>>
>>   Changes in v4:
>>
>>   - Dropped log_global_stop() and log_global_sync() changes
>>   
>>   include/exec/memory.h |  5 ++++-
>>   hw/i386/xen/xen-hvm.c |  2 +-
>>   migration/dirtyrate.c | 13 +++++++++++--
>>   migration/ram.c       | 22 ++++++++++++++++++++--
>>   system/memory.c       | 11 +++++------
>>   5 files changed, 41 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>> index 5555567bc4c9fdb53e8f63487f1400980275687d..c129ee6db7162504bd72d4cfc69b5affb2cd87e8 100644
>> --- a/include/exec/memory.h
>> +++ b/include/exec/memory.h
>> @@ -2570,8 +2570,11 @@ void memory_listener_unregister(MemoryListener *listener);
>>    * memory_global_dirty_log_start: begin dirty logging for all regions
>>    *
>>    * @flags: purpose of starting dirty log, migration or dirty rate
>> + * @errp: pointer to Error*, to store an error if it happens.
>> + *
>> + * Return: true on success, else false setting @errp with error.
>>    */
>> -void memory_global_dirty_log_start(unsigned int flags);
>> +bool memory_global_dirty_log_start(unsigned int flags, Error **errp);
>>   
>>   /**
>>    * memory_global_dirty_log_stop: end dirty logging for all regions
>> diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
>> index 0608ca99f5166fd6379ee674442484e805eff9c0..57cb7df50788a6c31eff68c95e8eaa856fdebede 100644
>> --- a/hw/i386/xen/xen-hvm.c
>> +++ b/hw/i386/xen/xen-hvm.c
>> @@ -654,7 +654,7 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
>>   void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
>>   {
>>       if (enable) {
>> -        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>> +        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
>>       } else {
>>           memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
>>       }
>> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
>> index 1d2e85746fb7b10eb7f149976970f9a92125af8a..d02d70b7b4b86a29d4d5540ded416543536d8f98 100644
>> --- a/migration/dirtyrate.c
>> +++ b/migration/dirtyrate.c
>> @@ -90,9 +90,15 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
>>   
>>   void global_dirty_log_change(unsigned int flag, bool start)
>>   {
>> +    Error *local_err = NULL;
>> +    bool ret;
>> +
>>       bql_lock();
>>       if (start) {
>> -        memory_global_dirty_log_start(flag);
>> +        ret = memory_global_dirty_log_start(flag, &local_err);
>> +        if (!ret) {
>> +            error_report_err(local_err);
>> +        }
>>       } else {
>>           memory_global_dirty_log_stop(flag);
>>       }
>> @@ -608,9 +614,12 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
>>   {
>>       int64_t start_time;
>>       DirtyPageRecord dirty_pages;
>> +    Error *local_err = NULL;
>>   
>>       bql_lock();
>> -    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
>> +    if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
>> +        error_report_err(local_err);
>> +    }
>>   
>>       /*
>>        * 1'round of log sync may return all 1 bits with
>> diff --git a/migration/ram.c b/migration/ram.c
>> index c5149b7d717aefad7f590422af0ea4a40e7507be..397b4c0f218a66d194e44f9c5f9fe8e9885c48b6 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -2836,18 +2836,31 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
>>   
>>   static void ram_init_bitmaps(RAMState *rs)
>>   {
>> +    Error *local_err = NULL;
>> +    bool ret = true;
>> +
>>       qemu_mutex_lock_ramlist();
>>   
>>       WITH_RCU_READ_LOCK_GUARD() {
>>           ram_list_init_bitmaps();
>>           /* We don't use dirty log with background snapshots */
>>           if (!migrate_background_snapshot()) {
>> -            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>> +            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
>> +                                                &local_err);
>> +            if (!ret) {
>> +                error_report_err(local_err);
>> +                goto out_unlock;
> 
> Here we may need to free the bitmaps created in ram_list_init_bitmaps().
> 
> We can have a helper ram_bitmaps_destroy() for that.
> 
> One thing be careful is the new file_bmap can be created but missing in the
> ram_save_cleanup(), it's because it's freed earlier.  IMHO if we will have
> a new ram_bitmaps_destroy() we can unconditionally free file_bmap there
> too, as if it's freed early g_free() is noop.

ok. will do.


Thanks,

C.
Cédric Le Goater March 18, 2024, 4:08 p.m. UTC | #5
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -2836,18 +2836,31 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
>>   
>>   static void ram_init_bitmaps(RAMState *rs)
>>   {
>> +    Error *local_err = NULL;
>> +    bool ret = true;
>> +
>>       qemu_mutex_lock_ramlist();
>>   
>>       WITH_RCU_READ_LOCK_GUARD() {
>>           ram_list_init_bitmaps();

btw, should we use bitmap_try_new() to create the bitmaps instead of
bitmap_new() which can abort() ?


>>           /* We don't use dirty log with background snapshots */
>>           if (!migrate_background_snapshot()) {
>> -            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>> +            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
>> +                                                &local_err);
>> +            if (!ret) {
>> +                error_report_err(local_err);
>> +                goto out_unlock;
> 
> Here we may need to free the bitmaps created in ram_list_init_bitmaps().


C.
Cédric Le Goater March 18, 2024, 4:19 p.m. UTC | #6
On 3/16/24 03:41, Yong Huang wrote:
> 
> 
> On Wed, Mar 6, 2024 at 9:35 PM Cédric Le Goater <clg@redhat.com <mailto:clg@redhat.com>> wrote:
> 
>     Now that the log_global*() handlers take an Error** parameter and
>     return a bool, do the same for memory_global_dirty_log_start() and
>     memory_global_dirty_log_stop(). The error is reported in the callers
>     for now and it will be propagated in the call stack in the next
>     changes. 
> 
> 
>     To be noted a functional change in ram_init_bitmaps(), if the dirty
> 
> Hi, Cédric Le Goater. Could the functional modification be made
> separately from the patch? 

Are you suggesting one patch to add the Error ** parameter and a second
to report the error if there is a failure ? From the moment the prototype
is modified, all handlers need to take the change into account to avoid
a build break. Looks difficult.

> And my "Reviewed-by" is attached
> to the first patch that refines memory_global_dirty_log_start's
> function declaration.

OK. I should resend a v5 anyhow, I will remove your R-b and you can
reconsider.

Thanks,

C.


> 
>     pages logger fails to start, there is no need to synchronize the dirty
>     pages bitmaps. colo_incoming_start_dirty_log() could be modified in a
>     similar way.
> 
>     Cc: Stefano Stabellini <sstabellini@kernel.org <mailto:sstabellini@kernel.org>>
>     Cc: Anthony Perard <anthony.perard@citrix.com <mailto:anthony.perard@citrix.com>>
>     Cc: Paul Durrant <paul@xen.org <mailto:paul@xen.org>>
>     Cc: "Michael S. Tsirkin" <mst@redhat.com <mailto:mst@redhat.com>>
>     Cc: Paolo Bonzini <pbonzini@redhat.com <mailto:pbonzini@redhat.com>>
>     Cc: David Hildenbrand <david@redhat.com <mailto:david@redhat.com>>
>     Cc: Hyman Huang <yong.huang@smartx.com <mailto:yong.huang@smartx.com>>
>     Reviewed-by: Hyman Huang <yong.huang@smartx.com <mailto:yong.huang@smartx.com>>
>     Signed-off-by: Cédric Le Goater <clg@redhat.com <mailto:clg@redhat.com>>
>     ---
> 
>       Changes in v4:
> 
>       - Dropped log_global_stop() and log_global_sync() changes
> 
>       include/exec/memory.h |  5 ++++-
>       hw/i386/xen/xen-hvm.c |  2 +-
>       migration/dirtyrate.c | 13 +++++++++++--
>       migration/ram.c       | 22 ++++++++++++++++++++--
>       system/memory.c       | 11 +++++------
>       5 files changed, 41 insertions(+), 12 deletions(-)
> 
>     diff --git a/include/exec/memory.h b/include/exec/memory.h
>     index 5555567bc4c9fdb53e8f63487f1400980275687d..c129ee6db7162504bd72d4cfc69b5affb2cd87e8 100644
>     --- a/include/exec/memory.h
>     +++ b/include/exec/memory.h
>     @@ -2570,8 +2570,11 @@ void memory_listener_unregister(MemoryListener *listener);
>        * memory_global_dirty_log_start: begin dirty logging for all regions
>        *
>        * @flags: purpose of starting dirty log, migration or dirty rate
>     + * @errp: pointer to Error*, to store an error if it happens.
>     + *
>     + * Return: true on success, else false setting @errp with error.
>        */
>     -void memory_global_dirty_log_start(unsigned int flags);
>     +bool memory_global_dirty_log_start(unsigned int flags, Error **errp);
> 
>       /**
>        * memory_global_dirty_log_stop: end dirty logging for all regions
>     diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
>     index 0608ca99f5166fd6379ee674442484e805eff9c0..57cb7df50788a6c31eff68c95e8eaa856fdebede 100644
>     --- a/hw/i386/xen/xen-hvm.c
>     +++ b/hw/i386/xen/xen-hvm.c
>     @@ -654,7 +654,7 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
>       void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
>       {
>           if (enable) {
>     -        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>     +        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
>           } else {
>               memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
>           }
>     diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
>     index 1d2e85746fb7b10eb7f149976970f9a92125af8a..d02d70b7b4b86a29d4d5540ded416543536d8f98 100644
>     --- a/migration/dirtyrate.c
>     +++ b/migration/dirtyrate.c
>     @@ -90,9 +90,15 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
> 
>       void global_dirty_log_change(unsigned int flag, bool start)
>       {
>     +    Error *local_err = NULL;
>     +    bool ret;
>     +
>           bql_lock();
>           if (start) {
>     -        memory_global_dirty_log_start(flag);
>     +        ret = memory_global_dirty_log_start(flag, &local_err);
>     +        if (!ret) {
>     +            error_report_err(local_err);
>     +        }
>           } else {
>               memory_global_dirty_log_stop(flag);
>           }
>     @@ -608,9 +614,12 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
>       {
>           int64_t start_time;
>           DirtyPageRecord dirty_pages;
>     +    Error *local_err = NULL;
> 
>           bql_lock();
>     -    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
>     +    if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
>     +        error_report_err(local_err);
>     +    }
> 
>           /*
>            * 1'round of log sync may return all 1 bits with
>     diff --git a/migration/ram.c b/migration/ram.c
>     index c5149b7d717aefad7f590422af0ea4a40e7507be..397b4c0f218a66d194e44f9c5f9fe8e9885c48b6 100644
>     --- a/migration/ram.c
>     +++ b/migration/ram.c
>     @@ -2836,18 +2836,31 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
> 
>       static void ram_init_bitmaps(RAMState *rs)
>       {
>     +    Error *local_err = NULL;
>     +    bool ret = true;
>     +
>           qemu_mutex_lock_ramlist();
> 
>           WITH_RCU_READ_LOCK_GUARD() {
>               ram_list_init_bitmaps();
>               /* We don't use dirty log with background snapshots */
>               if (!migrate_background_snapshot()) {
>     -            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>     +            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
>     +                                                &local_err);
>     +            if (!ret) {
>     +                error_report_err(local_err);
>     +                goto out_unlock;
>     +            }
>                   migration_bitmap_sync_precopy(rs, false);
>               }
>           }
>     +out_unlock:
>           qemu_mutex_unlock_ramlist();
> 
>     +    if (!ret) {
>     +        return;
>     +    }
>     +
>           /*
>            * After an eventual first bitmap sync, fixup the initial bitmap
>            * containing all 1s to exclude any discarded pages from migration.
>     @@ -3631,6 +3644,8 @@ int colo_init_ram_cache(void)
>       void colo_incoming_start_dirty_log(void)
>       {
>           RAMBlock *block = NULL;
>     +    Error *local_err = NULL;
>     +
>           /* For memory_global_dirty_log_start below. */
>           bql_lock();
>           qemu_mutex_lock_ramlist();
>     @@ -3642,7 +3657,10 @@ void colo_incoming_start_dirty_log(void)
>                   /* Discard this dirty bitmap record */
>                   bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
>               }
>     -        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
>     +        if (!memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
>     +                                           &local_err)) {
>     +            error_report_err(local_err);
>     +        }
>           }
>           ram_state->migration_dirty_pages = 0;
>           qemu_mutex_unlock_ramlist();
>     diff --git a/system/memory.c b/system/memory.c
>     index 3600e716149407c10a1f6bf8f0a81c2611cf15ba..cbc098216b789f50460f1d1bc7ec122030693d9e 100644
>     --- a/system/memory.c
>     +++ b/system/memory.c
>     @@ -2931,10 +2931,9 @@ static void memory_global_dirty_log_rollback(MemoryListener *listener,
>           }
>       }
> 
>     -void memory_global_dirty_log_start(unsigned int flags)
>     +bool memory_global_dirty_log_start(unsigned int flags, Error **errp)
>       {
>           unsigned int old_flags;
>     -    Error *local_err = NULL;
> 
>           assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
> 
>     @@ -2946,7 +2945,7 @@ void memory_global_dirty_log_start(unsigned int flags)
> 
>           flags &= ~global_dirty_tracking;
>           if (!flags) {
>     -        return;
>     +        return true;
>           }
> 
>           old_flags = global_dirty_tracking;
>     @@ -2959,7 +2958,7 @@ void memory_global_dirty_log_start(unsigned int flags)
> 
>               QTAILQ_FOREACH(listener, &memory_listeners, link) {
>                   if (listener->log_global_start) {
>     -                ret = listener->log_global_start(listener, &local_err);
>     +                ret = listener->log_global_start(listener, errp);
>                       if (!ret) {
>                           break;
>                       }
>     @@ -2969,14 +2968,14 @@ void memory_global_dirty_log_start(unsigned int flags)
>               if (!ret) {
>                   memory_global_dirty_log_rollback(QTAILQ_PREV(listener, link),
>                                                    flags);
>     -            error_report_err(local_err);
>     -            return;
>     +            return false;
>               }
> 
>               memory_region_transaction_begin();
>               memory_region_update_pending = true;
>               memory_region_transaction_commit();
>           }
>     +    return true;
>       }
> 
>       static void memory_global_dirty_log_do_stop(unsigned int flags)
>     -- 
>     2.44.0
> 
> 
> 
> -- 
> Best regards
Peter Xu March 18, 2024, 4:31 p.m. UTC | #7
On Mon, Mar 18, 2024 at 05:08:13PM +0100, Cédric Le Goater wrote:
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -2836,18 +2836,31 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs)
> > >   static void ram_init_bitmaps(RAMState *rs)
> > >   {
> > > +    Error *local_err = NULL;
> > > +    bool ret = true;
> > > +
> > >       qemu_mutex_lock_ramlist();
> > >       WITH_RCU_READ_LOCK_GUARD() {
> > >           ram_list_init_bitmaps();
> 
> btw, should we use bitmap_try_new() to create the bitmaps instead of
> bitmap_new() which can abort() ?

I'm not sure how much it'll help in reality; if allocation can fail here I
would expect qemu crash sooner or later.. but I agree the try_new() seems
reasonable too to be used here if this can fail now, after all migration is
extra feature on top of VM's emulation functions, so it's optional.

Thanks,
diff mbox series

Patch

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 5555567bc4c9fdb53e8f63487f1400980275687d..c129ee6db7162504bd72d4cfc69b5affb2cd87e8 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2570,8 +2570,11 @@  void memory_listener_unregister(MemoryListener *listener);
  * memory_global_dirty_log_start: begin dirty logging for all regions
  *
  * @flags: purpose of starting dirty log, migration or dirty rate
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void memory_global_dirty_log_start(unsigned int flags);
+bool memory_global_dirty_log_start(unsigned int flags, Error **errp);
 
 /**
  * memory_global_dirty_log_stop: end dirty logging for all regions
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 0608ca99f5166fd6379ee674442484e805eff9c0..57cb7df50788a6c31eff68c95e8eaa856fdebede 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -654,7 +654,7 @@  void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t length)
 void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
 {
     if (enable) {
-        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
     } else {
         memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
     }
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index 1d2e85746fb7b10eb7f149976970f9a92125af8a..d02d70b7b4b86a29d4d5540ded416543536d8f98 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -90,9 +90,15 @@  static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
 
 void global_dirty_log_change(unsigned int flag, bool start)
 {
+    Error *local_err = NULL;
+    bool ret;
+
     bql_lock();
     if (start) {
-        memory_global_dirty_log_start(flag);
+        ret = memory_global_dirty_log_start(flag, &local_err);
+        if (!ret) {
+            error_report_err(local_err);
+        }
     } else {
         memory_global_dirty_log_stop(flag);
     }
@@ -608,9 +614,12 @@  static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
 {
     int64_t start_time;
     DirtyPageRecord dirty_pages;
+    Error *local_err = NULL;
 
     bql_lock();
-    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+    if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
+        error_report_err(local_err);
+    }
 
     /*
      * 1'round of log sync may return all 1 bits with
diff --git a/migration/ram.c b/migration/ram.c
index c5149b7d717aefad7f590422af0ea4a40e7507be..397b4c0f218a66d194e44f9c5f9fe8e9885c48b6 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2836,18 +2836,31 @@  static void migration_bitmap_clear_discarded_pages(RAMState *rs)
 
 static void ram_init_bitmaps(RAMState *rs)
 {
+    Error *local_err = NULL;
+    bool ret = true;
+
     qemu_mutex_lock_ramlist();
 
     WITH_RCU_READ_LOCK_GUARD() {
         ram_list_init_bitmaps();
         /* We don't use dirty log with background snapshots */
         if (!migrate_background_snapshot()) {
-            memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+            ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
+                                                &local_err);
+            if (!ret) {
+                error_report_err(local_err);
+                goto out_unlock;
+            }
             migration_bitmap_sync_precopy(rs, false);
         }
     }
+out_unlock:
     qemu_mutex_unlock_ramlist();
 
+    if (!ret) {
+        return;
+    }
+
     /*
      * After an eventual first bitmap sync, fixup the initial bitmap
      * containing all 1s to exclude any discarded pages from migration.
@@ -3631,6 +3644,8 @@  int colo_init_ram_cache(void)
 void colo_incoming_start_dirty_log(void)
 {
     RAMBlock *block = NULL;
+    Error *local_err = NULL;
+
     /* For memory_global_dirty_log_start below. */
     bql_lock();
     qemu_mutex_lock_ramlist();
@@ -3642,7 +3657,10 @@  void colo_incoming_start_dirty_log(void)
             /* Discard this dirty bitmap record */
             bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
         }
-        memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+        if (!memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
+                                           &local_err)) {
+            error_report_err(local_err);
+        }
     }
     ram_state->migration_dirty_pages = 0;
     qemu_mutex_unlock_ramlist();
diff --git a/system/memory.c b/system/memory.c
index 3600e716149407c10a1f6bf8f0a81c2611cf15ba..cbc098216b789f50460f1d1bc7ec122030693d9e 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2931,10 +2931,9 @@  static void memory_global_dirty_log_rollback(MemoryListener *listener,
     }
 }
 
-void memory_global_dirty_log_start(unsigned int flags)
+bool memory_global_dirty_log_start(unsigned int flags, Error **errp)
 {
     unsigned int old_flags;
-    Error *local_err = NULL;
 
     assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
 
@@ -2946,7 +2945,7 @@  void memory_global_dirty_log_start(unsigned int flags)
 
     flags &= ~global_dirty_tracking;
     if (!flags) {
-        return;
+        return true;
     }
 
     old_flags = global_dirty_tracking;
@@ -2959,7 +2958,7 @@  void memory_global_dirty_log_start(unsigned int flags)
 
         QTAILQ_FOREACH(listener, &memory_listeners, link) {
             if (listener->log_global_start) {
-                ret = listener->log_global_start(listener, &local_err);
+                ret = listener->log_global_start(listener, errp);
                 if (!ret) {
                     break;
                 }
@@ -2969,14 +2968,14 @@  void memory_global_dirty_log_start(unsigned int flags)
         if (!ret) {
             memory_global_dirty_log_rollback(QTAILQ_PREV(listener, link),
                                              flags);
-            error_report_err(local_err);
-            return;
+            return false;
         }
 
         memory_region_transaction_begin();
         memory_region_update_pending = true;
         memory_region_transaction_commit();
     }
+    return true;
 }
 
 static void memory_global_dirty_log_do_stop(unsigned int flags)