diff mbox

[v2] migrate: Introduce zero RAM checks to skip RAM migration

Message ID 1486118198-8965-1-git-send-email-ashijeetacharya@gmail.com
State New
Headers show

Commit Message

Ashijeet Acharya Feb. 3, 2017, 10:36 a.m. UTC
Migration of a "none" machine with no RAM crashes abruptly as
bitmap_new() fails and thus aborts. Instead place zero RAM checks at
appropriate places to skip migration of RAM in this case and complete
migration successfully for devices only.

Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
---
Changes in v2:
- try to migrate successfully by skipping RAM (Paolo, Greg)
- drop the idea of erroring out and failing nicely
 migration/ram.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

Comments

Paolo Bonzini Feb. 3, 2017, 5:14 p.m. UTC | #1
On 03/02/2017 02:36, Ashijeet Acharya wrote:
> Migration of a "none" machine with no RAM crashes abruptly as
> bitmap_new() fails and thus aborts. Instead place zero RAM checks at
> appropriate places to skip migration of RAM in this case and complete
> migration successfully for devices only.
> 
> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
> ---
> Changes in v2:
> - try to migrate successfully by skipping RAM (Paolo, Greg)
> - drop the idea of erroring out and failing nicely
>  migration/ram.c | 22 +++++++++++++++-------
>  1 file changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index ef8fadf..2f19566 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1325,6 +1325,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>                                   ram_addr_t space */
>  
> +    /* No dirty page as there is zero RAM */
> +    if (!ram_bytes_total()) {
> +        return pages;
> +    }
> +
>      pss.block = last_seen_block;
>      pss.offset = last_offset;
>      pss.complete_round = false;
> @@ -1912,14 +1917,17 @@ static int ram_save_init_globals(void)
>      bytes_transferred = 0;
>      reset_ram_globals();
>  
> -    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> -    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
> -    migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
> -    bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
> +    /* Skip setting bitmap if there is no RAM */
> +    if (ram_bytes_total()) {
> +        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> +        migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
> +        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
> +        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>  
> -    if (migrate_postcopy_ram()) {
> -        migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
> -        bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
> +        if (migrate_postcopy_ram()) {
> +            migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
> +            bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
> +        }
>      }
>  
>      /*
> 

I didn't test it, but it looks good.

Paolo
Ashijeet Acharya Feb. 3, 2017, 5:17 p.m. UTC | #2
On Fri, Feb 3, 2017 at 10:44 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 03/02/2017 02:36, Ashijeet Acharya wrote:
>> Migration of a "none" machine with no RAM crashes abruptly as
>> bitmap_new() fails and thus aborts. Instead place zero RAM checks at
>> appropriate places to skip migration of RAM in this case and complete
>> migration successfully for devices only.
>>
>> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
>> ---
>> Changes in v2:
>> - try to migrate successfully by skipping RAM (Paolo, Greg)
>> - drop the idea of erroring out and failing nicely
>>  migration/ram.c | 22 +++++++++++++++-------
>>  1 file changed, 15 insertions(+), 7 deletions(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index ef8fadf..2f19566 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1325,6 +1325,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>>                                   ram_addr_t space */
>>
>> +    /* No dirty page as there is zero RAM */
>> +    if (!ram_bytes_total()) {
>> +        return pages;
>> +    }
>> +
>>      pss.block = last_seen_block;
>>      pss.offset = last_offset;
>>      pss.complete_round = false;
>> @@ -1912,14 +1917,17 @@ static int ram_save_init_globals(void)
>>      bytes_transferred = 0;
>>      reset_ram_globals();
>>
>> -    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>> -    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
>> -    migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
>> -    bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>> +    /* Skip setting bitmap if there is no RAM */
>> +    if (ram_bytes_total()) {
>> +        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>> +        migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
>> +        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
>> +        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>>
>> -    if (migrate_postcopy_ram()) {
>> -        migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
>> -        bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
>> +        if (migrate_postcopy_ram()) {
>> +            migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
>> +            bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
>> +        }
>>      }
>>
>>      /*
>>
>
> I didn't test it, but it looks good.

I did test it and it looks to be fine. Migration status shows
'complete' and transfers zero RAM as expected (I checked this with
'info migrate' on HMP) and Dave assured me that 'complete' means that
it completed successfully.

Ashijeet
>
> Paolo
Dr. David Alan Gilbert Feb. 8, 2017, 11:25 a.m. UTC | #3
* Ashijeet Acharya (ashijeetacharya@gmail.com) wrote:
> Migration of a "none" machine with no RAM crashes abruptly as
> bitmap_new() fails and thus aborts. Instead place zero RAM checks at
> appropriate places to skip migration of RAM in this case and complete
> migration successfully for devices only.
> 
> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>

Yes it seems to work, but there is a small bug,
if we look at:

static void ram_migration_cleanup(void *opaque)
{
    /* caller have hold iothread lock or is in a bh, so there is
     * no writing race against this migration_bitmap
     */
    struct BitmapRcu *bitmap = migration_bitmap_rcu;
    atomic_rcu_set(&migration_bitmap_rcu, NULL);
    if (bitmap) {
        memory_global_dirty_log_stop();
        call_rcu(bitmap, migration_bitmap_free, rcu);
    }


we see it doesn't cause memory_global_dirty_log_stop
if migration_bitmap_rcu is NULL; so we end up
still calling dirty_log_start in ram_save_init but
never call stop.

I suggest you change it to:

    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
    if (ram_bytes_total()) {
        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);

The cleanup routine still causes the dirty_log_stop() then.

Dave

> ---
> Changes in v2:
> - try to migrate successfully by skipping RAM (Paolo, Greg)
> - drop the idea of erroring out and failing nicely
>  migration/ram.c | 22 +++++++++++++++-------
>  1 file changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index ef8fadf..2f19566 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1325,6 +1325,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>                                   ram_addr_t space */
>  
> +    /* No dirty page as there is zero RAM */
> +    if (!ram_bytes_total()) {
> +        return pages;
> +    }
> +
>      pss.block = last_seen_block;
>      pss.offset = last_offset;
>      pss.complete_round = false;
> @@ -1912,14 +1917,17 @@ static int ram_save_init_globals(void)
>      bytes_transferred = 0;
>      reset_ram_globals();
>  
> -    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> -    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
> -    migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
> -    bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
> +    /* Skip setting bitmap if there is no RAM */
> +    if (ram_bytes_total()) {
> +        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> +        migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
> +        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
> +        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>  
> -    if (migrate_postcopy_ram()) {
> -        migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
> -        bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
> +        if (migrate_postcopy_ram()) {
> +            migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
> +            bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
> +        }
>      }
>  
>      /*
> -- 
> 2.6.2
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Thomas Huth Feb. 8, 2017, 11:49 a.m. UTC | #4
On 03.02.2017 11:36, Ashijeet Acharya wrote:
> Migration of a "none" machine with no RAM crashes abruptly as
> bitmap_new() fails and thus aborts. Instead place zero RAM checks at
> appropriate places to skip migration of RAM in this case and complete
> migration successfully for devices only.
> 
> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
> ---
> Changes in v2:
> - try to migrate successfully by skipping RAM (Paolo, Greg)
> - drop the idea of erroring out and failing nicely
>  migration/ram.c | 22 +++++++++++++++-------
>  1 file changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/migration/ram.c b/migration/ram.c
> index ef8fadf..2f19566 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1325,6 +1325,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>                                   ram_addr_t space */
>  
> +    /* No dirty page as there is zero RAM */
> +    if (!ram_bytes_total()) {
> +        return pages;
> +    }
> +
>      pss.block = last_seen_block;
>      pss.offset = last_offset;
>      pss.complete_round = false;
> @@ -1912,14 +1917,17 @@ static int ram_save_init_globals(void)
>      bytes_transferred = 0;
>      reset_ram_globals();
>  
> -    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> -    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
> -    migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
> -    bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
> +    /* Skip setting bitmap if there is no RAM */
> +    if (ram_bytes_total()) {
> +        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> +        migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
> +        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
> +        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);

I guess the problem was here that ram_bitmap_pages was 0 ? So instead of
checking "if (ram_bytes_total())", would it maybe be nicer to check for
"if (ram_bitmap_pages)" instead?

 Thomas
Ashijeet Acharya Feb. 8, 2017, 1:07 p.m. UTC | #5
On Wed, Feb 8, 2017 at 4:55 PM, Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
> * Ashijeet Acharya (ashijeetacharya@gmail.com) wrote:
>> Migration of a "none" machine with no RAM crashes abruptly as
>> bitmap_new() fails and thus aborts. Instead place zero RAM checks at
>> appropriate places to skip migration of RAM in this case and complete
>> migration successfully for devices only.
>>
>> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
>
> Yes it seems to work, but there is a small bug,
> if we look at:
>
> static void ram_migration_cleanup(void *opaque)
> {
>     /* caller have hold iothread lock or is in a bh, so there is
>      * no writing race against this migration_bitmap
>      */
>     struct BitmapRcu *bitmap = migration_bitmap_rcu;
>     atomic_rcu_set(&migration_bitmap_rcu, NULL);
>     if (bitmap) {
>         memory_global_dirty_log_stop();
>         call_rcu(bitmap, migration_bitmap_free, rcu);
>     }
>
>
> we see it doesn't cause memory_global_dirty_log_stop
> if migration_bitmap_rcu is NULL; so we end up
> still calling dirty_log_start in ram_save_init but
> never call stop.
>

Exactly. This is what kept bugging me too, which is why I asked you on
the IRC the other day (if you vaguely remember) about whether I will
have to handle cleanup separately...Maybe I didn't frame it well that
day.

> I suggest you change it to:
>
>     migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
>     if (ram_bytes_total()) {
>         ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>         migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
>         bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>
> The cleanup routine still causes the dirty_log_stop() then.

Right, I will do that.

Ashijeet

> Dave
>
>> ---
>> Changes in v2:
>> - try to migrate successfully by skipping RAM (Paolo, Greg)
>> - drop the idea of erroring out and failing nicely
>>  migration/ram.c | 22 +++++++++++++++-------
>>  1 file changed, 15 insertions(+), 7 deletions(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index ef8fadf..2f19566 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1325,6 +1325,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>>                                   ram_addr_t space */
>>
>> +    /* No dirty page as there is zero RAM */
>> +    if (!ram_bytes_total()) {
>> +        return pages;
>> +    }
>> +
>>      pss.block = last_seen_block;
>>      pss.offset = last_offset;
>>      pss.complete_round = false;
>> @@ -1912,14 +1917,17 @@ static int ram_save_init_globals(void)
>>      bytes_transferred = 0;
>>      reset_ram_globals();
>>
>> -    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>> -    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
>> -    migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
>> -    bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>> +    /* Skip setting bitmap if there is no RAM */
>> +    if (ram_bytes_total()) {
>> +        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>> +        migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
>> +        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
>> +        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>>
>> -    if (migrate_postcopy_ram()) {
>> -        migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
>> -        bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
>> +        if (migrate_postcopy_ram()) {
>> +            migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
>> +            bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
>> +        }
>>      }
>>
>>      /*
>> --
>> 2.6.2
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Ashijeet Acharya Feb. 8, 2017, 1:48 p.m. UTC | #6
On Wed, Feb 8, 2017 at 5:19 PM, Thomas Huth <thuth@redhat.com> wrote:
> On 03.02.2017 11:36, Ashijeet Acharya wrote:
>> Migration of a "none" machine with no RAM crashes abruptly as
>> bitmap_new() fails and thus aborts. Instead place zero RAM checks at
>> appropriate places to skip migration of RAM in this case and complete
>> migration successfully for devices only.
>>
>> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>
>> ---
>> Changes in v2:
>> - try to migrate successfully by skipping RAM (Paolo, Greg)
>> - drop the idea of erroring out and failing nicely
>>  migration/ram.c | 22 +++++++++++++++-------
>>  1 file changed, 15 insertions(+), 7 deletions(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index ef8fadf..2f19566 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1325,6 +1325,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
>>      ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
>>                                   ram_addr_t space */
>>
>> +    /* No dirty page as there is zero RAM */
>> +    if (!ram_bytes_total()) {
>> +        return pages;
>> +    }
>> +
>>      pss.block = last_seen_block;
>>      pss.offset = last_offset;
>>      pss.complete_round = false;
>> @@ -1912,14 +1917,17 @@ static int ram_save_init_globals(void)
>>      bytes_transferred = 0;
>>      reset_ram_globals();
>>
>> -    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>> -    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
>> -    migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
>> -    bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>> +    /* Skip setting bitmap if there is no RAM */
>> +    if (ram_bytes_total()) {
>> +        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
>> +        migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
>> +        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
>> +        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
>
> I guess the problem was here that ram_bitmap_pages was 0 ? So instead of
> checking "if (ram_bytes_total())", would it maybe be nicer to check for
> "if (ram_bitmap_pages)" instead?
>
I thought of using that first, but ram_bytes_total() works for both
the places? No?

Ashijeet

>  Thomas
>
diff mbox

Patch

diff --git a/migration/ram.c b/migration/ram.c
index ef8fadf..2f19566 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1325,6 +1325,11 @@  static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
     ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
                                  ram_addr_t space */
 
+    /* No dirty page as there is zero RAM */
+    if (!ram_bytes_total()) {
+        return pages;
+    }
+
     pss.block = last_seen_block;
     pss.offset = last_offset;
     pss.complete_round = false;
@@ -1912,14 +1917,17 @@  static int ram_save_init_globals(void)
     bytes_transferred = 0;
     reset_ram_globals();
 
-    ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
-    migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
-    migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
-    bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
+    /* Skip setting bitmap if there is no RAM */
+    if (ram_bytes_total()) {
+        ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+        migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
+        migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
+        bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
 
-    if (migrate_postcopy_ram()) {
-        migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
-        bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
+        if (migrate_postcopy_ram()) {
+            migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
+            bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
+        }
     }
 
     /*