diff mbox series

[v4,10/14] migration: add support to migrate shared regions list

Message ID 9236f522e48b67fe7136de7620276f7dc193be37.1628076205.git.ashish.kalra@amd.com
State New
Headers show
Series Add SEV guest live migration support | expand

Commit Message

Ashish Kalra Aug. 4, 2021, 11:57 a.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

When memory encryption is enabled, the hypervisor maintains a shared
regions list which is referred by hypervisor during migration to check
if page is private or shared. This list is built during the VM bootup and
must be migrated to the target host so that hypervisor on target host can
use it for future migration.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
---
 include/sysemu/sev.h |  2 ++
 target/i386/sev.c    | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

Comments

Wang, Wei W Sept. 10, 2021, 7:54 a.m. UTC | #1
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> When memory encryption is enabled, the hypervisor maintains a shared
> regions list which is referred by hypervisor during migration to check if page is
> private or shared. This list is built during the VM bootup and must be migrated
> to the target host so that hypervisor on target host can use it for future
> migration.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> ---
>  include/sysemu/sev.h |  2 ++
>  target/i386/sev.c    | 43
> +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 45 insertions(+)
> 
> diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h index
> 3b913518c0..118ee66406 100644
> --- a/include/sysemu/sev.h
> +++ b/include/sysemu/sev.h
> @@ -32,5 +32,7 @@ void sev_es_set_reset_vector(CPUState *cpu);  int
> sev_remove_shared_regions_list(unsigned long gfn_start,
>                                     unsigned long gfn_end);  int
> sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end);
> +int sev_save_outgoing_shared_regions_list(QEMUFile *f); int
> +sev_load_incoming_shared_regions_list(QEMUFile *f);
> 
>  #endif
> diff --git a/target/i386/sev.c b/target/i386/sev.c index
> 6d44b7ad21..789051f7b4 100644
> --- a/target/i386/sev.c
> +++ b/target/i386/sev.c
> @@ -135,10 +135,15 @@ static const char *const sev_fw_errlist[] = {
> 
>  #define SEV_FW_BLOB_MAX_SIZE            0x4000          /* 16KB
> */
> 
> +#define SHARED_REGION_LIST_CONT     0x1
> +#define SHARED_REGION_LIST_END      0x2
> +
>  static struct ConfidentialGuestMemoryEncryptionOps
> sev_memory_encryption_ops = {
>      .save_setup = sev_save_setup,
>      .save_outgoing_page = sev_save_outgoing_page,
>      .load_incoming_page = sev_load_incoming_page,
> +    .save_outgoing_shared_regions_list =
> sev_save_outgoing_shared_regions_list,
> +    .load_incoming_shared_regions_list =
> + sev_load_incoming_shared_regions_list,

Hi Ashish,
I have some questions about the callbacks:

1) why using a list of shared regions, instead of bitmaps to record private/shared state?
I saw that the KVM side implementation used bitmaps in the first place and changed to
shared regions since v10, but don't find the reason.

2) why is the save/load of shared region list (or bitmap) made vendor specific?
I think it can be a common interface and data structure, e.g. KVM maintains a per memory slot
bitmap to be obtained by QEMU.

Best,
Wei
Ashish Kalra Sept. 10, 2021, 8:47 a.m. UTC | #2
Hello Wang, 

On Fri, Sep 10, 2021 at 07:54:10AM +0000, Wang, Wei W wrote:
> > From: Brijesh Singh <brijesh.singh@amd.com>
> > 
> > When memory encryption is enabled, the hypervisor maintains a shared
> > regions list which is referred by hypervisor during migration to check if page is
> > private or shared. This list is built during the VM bootup and must be migrated
> > to the target host so that hypervisor on target host can use it for future
> > migration.
> > 
> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> > Co-developed-by: Ashish Kalra <ashish.kalra@amd.com>
> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> > ---
> >  include/sysemu/sev.h |  2 ++
> >  target/i386/sev.c    | 43
> > +++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 45 insertions(+)
> > 
> > diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h index
> > 3b913518c0..118ee66406 100644
> > --- a/include/sysemu/sev.h
> > +++ b/include/sysemu/sev.h
> > @@ -32,5 +32,7 @@ void sev_es_set_reset_vector(CPUState *cpu);  int
> > sev_remove_shared_regions_list(unsigned long gfn_start,
> >                                     unsigned long gfn_end);  int
> > sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end);
> > +int sev_save_outgoing_shared_regions_list(QEMUFile *f); int
> > +sev_load_incoming_shared_regions_list(QEMUFile *f);
> > 
> >  #endif
> > diff --git a/target/i386/sev.c b/target/i386/sev.c index
> > 6d44b7ad21..789051f7b4 100644
> > --- a/target/i386/sev.c
> > +++ b/target/i386/sev.c
> > @@ -135,10 +135,15 @@ static const char *const sev_fw_errlist[] = {
> > 
> >  #define SEV_FW_BLOB_MAX_SIZE            0x4000          /* 16KB
> > */
> > 
> > +#define SHARED_REGION_LIST_CONT     0x1
> > +#define SHARED_REGION_LIST_END      0x2
> > +
> >  static struct ConfidentialGuestMemoryEncryptionOps
> > sev_memory_encryption_ops = {
> >      .save_setup = sev_save_setup,
> >      .save_outgoing_page = sev_save_outgoing_page,
> >      .load_incoming_page = sev_load_incoming_page,
> > +    .save_outgoing_shared_regions_list =
> > sev_save_outgoing_shared_regions_list,
> > +    .load_incoming_shared_regions_list =
> > + sev_load_incoming_shared_regions_list,
> 
> Hi Ashish,
> I have some questions about the callbacks:
> 
> 1) why using a list of shared regions, instead of bitmaps to record private/shared state?
> I saw that the KVM side implementation used bitmaps in the first place and changed to
> shared regions since v10, but don't find the reason.
> 

There has been a long discussion on this implementation on KVM mailing
list. Tracking shared memory via a list of ranges instead of using bitmap
is more optimal. Most of the guest memory will be private and the
unencrypted/shared regions are basically ranges/intervals, so easy to
implement and maintain using lists.

A list will consume much less memory than a bitmap. 

The bitmap will consume more memory as it will need to be sized as per
guest RAM size and will remain sparsely populated due to limited amount
of shared/unencrypted guest memory regions.

> 2) why is the save/load of shared region list (or bitmap) made vendor specific?
> I think it can be a common interface and data structure, e.g. KVM maintains a per memory slot
> bitmap to be obtained by QEMU.
> 

As the shared regions list current implementation is vendor specific,
it's migration is also vendor specific.

But you are right, this can be implemented as a common interface and
data stucture in qemu, but it is a separate data structure and not
maintained as the per memory slot bitmap in KVM and obtained as such by
qemu.

Thanks,
Ashish
Wang, Wei W Sept. 10, 2021, 9:11 a.m. UTC | #3
On Friday, September 10, 2021 4:48 PM, Ashish Kalra wrote:
> On Fri, Sep 10, 2021 at 07:54:10AM +0000, Wang, Wei W wrote:
> There has been a long discussion on this implementation on KVM mailing list.
> Tracking shared memory via a list of ranges instead of using bitmap is more
> optimal. Most of the guest memory will be private and the unencrypted/shared
> regions are basically ranges/intervals, so easy to implement and maintain using
> lists.

OK. At which version did you discuss this or do you have a link? (I didn't find it in v9 KVM patches)

> A list will consume much less memory than a bitmap.
> 
> The bitmap will consume more memory as it will need to be sized as per guest
> RAM size and will remain sparsely populated due to limited amount of
> shared/unencrypted guest memory regions.

I also thought about this. It depends on the guest.
I think "A list will consume much less memory" is true when we assume most of guest pages are private pages.
From design perspective, what if guest chooses to have most of its pages being shared?
Lists might consume much more memory than bitmaps in some cases, I think.
(Probably I can check your previous discussions first)

Best,
Wei
Ashish Kalra Sept. 10, 2021, 9:42 a.m. UTC | #4
On Fri, Sep 10, 2021 at 09:11:09AM +0000, Wang, Wei W wrote:
> On Friday, September 10, 2021 4:48 PM, Ashish Kalra wrote:
> > On Fri, Sep 10, 2021 at 07:54:10AM +0000, Wang, Wei W wrote:
> > There has been a long discussion on this implementation on KVM mailing list.
> > Tracking shared memory via a list of ranges instead of using bitmap is more
> > optimal. Most of the guest memory will be private and the unencrypted/shared
> > regions are basically ranges/intervals, so easy to implement and maintain using
> > lists.
> 
> OK. At which version did you discuss this or do you have a link? (I didn't find it in v9 KVM patches)
> 

You can follow this email thread:
https://lore.kernel.org/kvm/20201211225542.GA30409@ashkalra_ubuntu_server/

> > A list will consume much less memory than a bitmap.
> > 
> > The bitmap will consume more memory as it will need to be sized as per guest
> > RAM size and will remain sparsely populated due to limited amount of
> > shared/unencrypted guest memory regions.
> 
> I also thought about this. It depends on the guest.
> I think "A list will consume much less memory" is true when we assume most of guest pages are private pages.
> From design perspective, what if guest chooses to have most of its pages being shared?
> Lists might consume much more memory than bitmaps in some cases, I think.
> (Probably I can check your previous discussions first)
> 

This will probably depend on the most common use case and scenario, i
think that most common use case will be a mostly encrypted guest.

Thanks,
Ashish
diff mbox series

Patch

diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
index 3b913518c0..118ee66406 100644
--- a/include/sysemu/sev.h
+++ b/include/sysemu/sev.h
@@ -32,5 +32,7 @@  void sev_es_set_reset_vector(CPUState *cpu);
 int sev_remove_shared_regions_list(unsigned long gfn_start,
                                    unsigned long gfn_end);
 int sev_add_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end);
+int sev_save_outgoing_shared_regions_list(QEMUFile *f);
+int sev_load_incoming_shared_regions_list(QEMUFile *f);
 
 #endif
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 6d44b7ad21..789051f7b4 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -135,10 +135,15 @@  static const char *const sev_fw_errlist[] = {
 
 #define SEV_FW_BLOB_MAX_SIZE            0x4000          /* 16KB */
 
+#define SHARED_REGION_LIST_CONT     0x1
+#define SHARED_REGION_LIST_END      0x2
+
 static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = {
     .save_setup = sev_save_setup,
     .save_outgoing_page = sev_save_outgoing_page,
     .load_incoming_page = sev_load_incoming_page,
+    .save_outgoing_shared_regions_list = sev_save_outgoing_shared_regions_list,
+    .load_incoming_shared_regions_list = sev_load_incoming_shared_regions_list,
 };
 
 static int
@@ -1604,6 +1609,44 @@  int sev_add_shared_regions_list(unsigned long start, unsigned long end)
     return 1;
 }
 
+int sev_save_outgoing_shared_regions_list(QEMUFile *f)
+{
+    SevGuestState *s = sev_guest;
+    struct shared_region *pos;
+
+    QTAILQ_FOREACH(pos, &s->shared_regions_list, list) {
+        qemu_put_be32(f, SHARED_REGION_LIST_CONT);
+        qemu_put_be32(f, pos->gfn_start);
+        qemu_put_be32(f, pos->gfn_end);
+    }
+
+    qemu_put_be32(f, SHARED_REGION_LIST_END);
+    return 0;
+}
+
+int sev_load_incoming_shared_regions_list(QEMUFile *f)
+{
+    SevGuestState *s = sev_guest;
+    struct shared_region *shrd_region;
+    int status;
+
+    status = qemu_get_be32(f);
+    while (status == SHARED_REGION_LIST_CONT) {
+
+        shrd_region = g_malloc0(sizeof(*shrd_region));
+        if (!shrd_region) {
+            return 0;
+        }
+        shrd_region->gfn_start = qemu_get_be32(f);
+        shrd_region->gfn_end = qemu_get_be32(f);
+
+        QTAILQ_INSERT_TAIL(&s->shared_regions_list, shrd_region, list);
+
+        status = qemu_get_be32(f);
+    }
+    return 0;
+}
+
 static void
 sev_register_types(void)
 {