diff mbox series

[RFC,v2,02/21] RAMBlock: Add support of KVM private gmem

Message ID 20230914035117.3285885-3-xiaoyao.li@intel.com
State New
Headers show
Series QEMU gmem implemention | expand

Commit Message

Xiaoyao Li Sept. 14, 2023, 3:50 a.m. UTC
From: Chao Peng <chao.p.peng@linux.intel.com>

Add KVM gmem support to RAMBlock so both normal hva based memory
and kvm gmem fd based private memory can be associated in one RAMBlock.

Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
gmem for the RAMBlock when it's set.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
 accel/kvm/kvm-all.c     | 17 +++++++++++++++++
 include/exec/memory.h   |  3 +++
 include/exec/ramblock.h |  1 +
 include/sysemu/kvm.h    |  2 ++
 softmmu/physmem.c       | 18 +++++++++++++++---
 5 files changed, 38 insertions(+), 3 deletions(-)

Comments

Lei Wang Sept. 15, 2023, 2:04 a.m. UTC | #1
On 9/14/2023 11:50, Xiaoyao Li wrote:
> From: Chao Peng <chao.p.peng@linux.intel.com>
> 
> Add KVM gmem support to RAMBlock so both normal hva based memory
> and kvm gmem fd based private memory can be associated in one RAMBlock.
> 
> Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
> gmem for the RAMBlock when it's set.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>

Kindly reminding the author's Signed-off-by is missing.

> ---
>  accel/kvm/kvm-all.c     | 17 +++++++++++++++++
>  include/exec/memory.h   |  3 +++
>  include/exec/ramblock.h |  1 +
>  include/sysemu/kvm.h    |  2 ++
>  softmmu/physmem.c       | 18 +++++++++++++++---
>  5 files changed, 38 insertions(+), 3 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 60aacd925393..185ae16d9620 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -4225,3 +4225,20 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
>          query_stats_schema_vcpu(first_cpu, &stats_args);
>      }
>  }
> +
> +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
> +{
> +    int fd;
> +    struct kvm_create_guest_memfd gmem = {
> +        .size = size,
> +        /* TODO: to decide whether KVM_GUEST_MEMFD_ALLOW_HUGEPAGE is supported */
> +        .flags = flags,
> +    };
> +
> +    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &gmem);
> +    if (fd < 0) {
> +        error_setg_errno(errp, errno, "%s: error creating kvm gmem\n", __func__);
> +    }
> +
> +    return fd;
> +}
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 68284428f87c..227cb2578e95 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -235,6 +235,9 @@ typedef struct IOMMUTLBEvent {
>  /* RAM is an mmap-ed named file */
>  #define RAM_NAMED_FILE (1 << 9)
>  
> +/* RAM can be private that has kvm gmem backend */
> +#define RAM_KVM_GMEM    (1 << 10)
> +
>  static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
>                                         IOMMUNotifierFlag flags,
>                                         hwaddr start, hwaddr end,
> diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
> index 69c6a5390293..0d158b3909c9 100644
> --- a/include/exec/ramblock.h
> +++ b/include/exec/ramblock.h
> @@ -41,6 +41,7 @@ struct RAMBlock {
>      QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
>      int fd;
>      uint64_t fd_offset;
> +    int gmem_fd;
>      size_t page_size;
>      /* dirty bitmap used during migration */
>      unsigned long *bmap;
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index 115f0cca79d1..f5b74c8dd8c5 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -580,4 +580,6 @@ bool kvm_arch_cpu_check_are_resettable(void);
>  bool kvm_dirty_ring_enabled(void);
>  
>  uint32_t kvm_dirty_ring_size(void);
> +
> +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
>  #endif
> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> index 3df73542e1fe..2d98a88f41f0 100644
> --- a/softmmu/physmem.c
> +++ b/softmmu/physmem.c
> @@ -1824,6 +1824,16 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>          }
>      }
>  
> +    if (kvm_enabled() && new_block->flags & RAM_KVM_GMEM &&
> +        new_block->gmem_fd < 0) {
> +        new_block->gmem_fd = kvm_create_guest_memfd(new_block->max_length,
> +                                                    0, errp);
> +        if (new_block->gmem_fd < 0) {
> +            qemu_mutex_unlock_ramlist();
> +            return;
> +        }
> +    }
> +
>      new_ram_size = MAX(old_ram_size,
>                (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
>      if (new_ram_size > old_ram_size) {
> @@ -1885,7 +1895,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
>  
>      /* Just support these ram flags by now. */
>      assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
> -                          RAM_PROTECTED | RAM_NAMED_FILE)) == 0);
> +                          RAM_PROTECTED | RAM_NAMED_FILE | RAM_KVM_GMEM)) == 0);
>  
>      if (xen_enabled()) {
>          error_setg(errp, "-mem-path not supported with Xen");
> @@ -1920,6 +1930,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
>      new_block->used_length = size;
>      new_block->max_length = size;
>      new_block->flags = ram_flags;
> +    new_block->gmem_fd = -1;
>      new_block->host = file_ram_alloc(new_block, size, fd, readonly,
>                                       !file_size, offset, errp);
>      if (!new_block->host) {
> @@ -1978,7 +1989,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
>      Error *local_err = NULL;
>  
>      assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
> -                          RAM_NORESERVE)) == 0);
> +                          RAM_NORESERVE| RAM_KVM_GMEM)) == 0);
>      assert(!host ^ (ram_flags & RAM_PREALLOC));
>  
>      size = HOST_PAGE_ALIGN(size);
> @@ -1990,6 +2001,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
>      new_block->max_length = max_size;
>      assert(max_size >= size);
>      new_block->fd = -1;
> +    new_block->gmem_fd = -1;
>      new_block->page_size = qemu_real_host_page_size();
>      new_block->host = host;
>      new_block->flags = ram_flags;
> @@ -2012,7 +2024,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>  RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
>                           MemoryRegion *mr, Error **errp)
>  {
> -    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
> +    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_KVM_GMEM)) == 0);
>      return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
>  }
>
Xiaoyao Li Sept. 15, 2023, 3:45 a.m. UTC | #2
On 9/15/2023 10:04 AM, Wang, Lei wrote:
> On 9/14/2023 11:50, Xiaoyao Li wrote:
>> From: Chao Peng<chao.p.peng@linux.intel.com>
>>
>> Add KVM gmem support to RAMBlock so both normal hva based memory
>> and kvm gmem fd based private memory can be associated in one RAMBlock.
>>
>> Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
>> gmem for the RAMBlock when it's set.
>>
>> Signed-off-by: Xiaoyao Li<xiaoyao.li@intel.com>
> Kindly reminding the author's Signed-off-by is missing.

I will fix it.
Thanks!
David Hildenbrand Sept. 21, 2023, 8:55 a.m. UTC | #3
On 14.09.23 05:50, Xiaoyao Li wrote:
> From: Chao Peng <chao.p.peng@linux.intel.com>
> 
> Add KVM gmem support to RAMBlock so both normal hva based memory
> and kvm gmem fd based private memory can be associated in one RAMBlock.
> 
> Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
> gmem for the RAMBlock when it's set.


But who sets RAM_KVM_GMEM and when? Don't we simply allocate it for all 
RAMBlocks under such special VMs? What's the downside of doing that?
Xiaoyao Li Sept. 22, 2023, 12:22 a.m. UTC | #4
On 9/21/2023 4:55 PM, David Hildenbrand wrote:
> On 14.09.23 05:50, Xiaoyao Li wrote:
>> From: Chao Peng <chao.p.peng@linux.intel.com>
>>
>> Add KVM gmem support to RAMBlock so both normal hva based memory
>> and kvm gmem fd based private memory can be associated in one RAMBlock.
>>
>> Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
>> gmem for the RAMBlock when it's set.
> 
> 
> But who sets RAM_KVM_GMEM and when? 

The answer is in the next patch. When `private` property of memory 
backend is set to true, it will pass RAM_KVM_GMEM flag to 
memory_region_init_ram_*()

> Don't we simply allocate it for all 
> RAMBlocks under such special VMs? 

yes, this is the direction after your comments.

I'll try to figure out how to achieve it.

> What's the downside of doing that?

As far as I see, for TDX, no downside.
David Hildenbrand Sept. 22, 2023, 7:08 a.m. UTC | #5
On 22.09.23 02:22, Xiaoyao Li wrote:
> On 9/21/2023 4:55 PM, David Hildenbrand wrote:
>> On 14.09.23 05:50, Xiaoyao Li wrote:
>>> From: Chao Peng <chao.p.peng@linux.intel.com>
>>>
>>> Add KVM gmem support to RAMBlock so both normal hva based memory
>>> and kvm gmem fd based private memory can be associated in one RAMBlock.
>>>
>>> Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
>>> gmem for the RAMBlock when it's set.
>>
>>
>> But who sets RAM_KVM_GMEM and when?
> 
> The answer is in the next patch. When `private` property of memory
> backend is set to true, it will pass RAM_KVM_GMEM flag to
> memory_region_init_ram_*()

Okay, assuming that patch (and property) will go away, I assume this 
flag can also go away, right?
Daniel P. Berrangé Oct. 6, 2023, 11:07 a.m. UTC | #6
On Wed, Sep 13, 2023 at 11:50:58PM -0400, Xiaoyao Li wrote:
> From: Chao Peng <chao.p.peng@linux.intel.com>
> 
> Add KVM gmem support to RAMBlock so both normal hva based memory
> and kvm gmem fd based private memory can be associated in one RAMBlock.
> 
> Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
> gmem for the RAMBlock when it's set.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
>  accel/kvm/kvm-all.c     | 17 +++++++++++++++++
>  include/exec/memory.h   |  3 +++
>  include/exec/ramblock.h |  1 +
>  include/sysemu/kvm.h    |  2 ++
>  softmmu/physmem.c       | 18 +++++++++++++++---
>  5 files changed, 38 insertions(+), 3 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 60aacd925393..185ae16d9620 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -4225,3 +4225,20 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
>          query_stats_schema_vcpu(first_cpu, &stats_args);
>      }
>  }
> +
> +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
> +{
> +    int fd;
> +    struct kvm_create_guest_memfd gmem = {
> +        .size = size,
> +        /* TODO: to decide whether KVM_GUEST_MEMFD_ALLOW_HUGEPAGE is supported */
> +        .flags = flags,
> +    };
> +
> +    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &gmem);
> +    if (fd < 0) {
> +        error_setg_errno(errp, errno, "%s: error creating kvm gmem\n", __func__);
> +    }
> +
> +    return fd;
> +}
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 68284428f87c..227cb2578e95 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -235,6 +235,9 @@ typedef struct IOMMUTLBEvent {
>  /* RAM is an mmap-ed named file */
>  #define RAM_NAMED_FILE (1 << 9)
>  
> +/* RAM can be private that has kvm gmem backend */
> +#define RAM_KVM_GMEM    (1 << 10)
> +
>  static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
>                                         IOMMUNotifierFlag flags,
>                                         hwaddr start, hwaddr end,
> diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
> index 69c6a5390293..0d158b3909c9 100644
> --- a/include/exec/ramblock.h
> +++ b/include/exec/ramblock.h
> @@ -41,6 +41,7 @@ struct RAMBlock {
>      QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
>      int fd;
>      uint64_t fd_offset;
> +    int gmem_fd;
>      size_t page_size;
>      /* dirty bitmap used during migration */
>      unsigned long *bmap;

You're adding a file descriptor to RAMBlock, but I don't see
anything in this patch that ever calls close(gmem_fd) when the
RAMBlock is released. Presuambly reclaim_ramblock() needs to
deal with this ?


With regards,
Daniel
Xiaoyao Li Oct. 8, 2023, 2:59 a.m. UTC | #7
On 9/22/2023 3:08 PM, David Hildenbrand wrote:
> On 22.09.23 02:22, Xiaoyao Li wrote:
>> On 9/21/2023 4:55 PM, David Hildenbrand wrote:
>>> On 14.09.23 05:50, Xiaoyao Li wrote:
>>>> From: Chao Peng <chao.p.peng@linux.intel.com>
>>>>
>>>> Add KVM gmem support to RAMBlock so both normal hva based memory
>>>> and kvm gmem fd based private memory can be associated in one RAMBlock.
>>>>
>>>> Introduce new flag RAM_KVM_GMEM. It calls KVM ioctl to create private
>>>> gmem for the RAMBlock when it's set.
>>>
>>>
>>> But who sets RAM_KVM_GMEM and when?
>>
>> The answer is in the next patch. When `private` property of memory
>> backend is set to true, it will pass RAM_KVM_GMEM flag to
>> memory_region_init_ram_*()
> 
> Okay, assuming that patch (and property) will go away, I assume this 
> flag can also go away, right?
> 

If dropping the flag RAM_KVM_GMEM, it seems we need go back to the 
approach of rfc v1[1][2], that allocating gmem inside .region_add() 
callback. Is it accepted by you?

Another option is allocating gmem inside ram_block_add() by checking the 
vm_type (it looks hacky for me). What's your opinion on this option?

One more option is, we keep the RAM_KVM_GMEM as this patch, and change 
"private" property of memory backend into "need_kvm_gmem" field (make it 
not user settable) and "need_kvm_gmem" field will be set to true in 
tdx_kvm_init() specific cgs initialization function.


[1] 
https://lore.kernel.org/qemu-devel/a154c33d-b24d-b713-0dc0-027d54f2340f@redhat.com/
[2] 
https://lore.kernel.org/qemu-devel/20230731162201.271114-10-xiaoyao.li@intel.com/
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 60aacd925393..185ae16d9620 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -4225,3 +4225,20 @@  void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
         query_stats_schema_vcpu(first_cpu, &stats_args);
     }
 }
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
+{
+    int fd;
+    struct kvm_create_guest_memfd gmem = {
+        .size = size,
+        /* TODO: to decide whether KVM_GUEST_MEMFD_ALLOW_HUGEPAGE is supported */
+        .flags = flags,
+    };
+
+    fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &gmem);
+    if (fd < 0) {
+        error_setg_errno(errp, errno, "%s: error creating kvm gmem\n", __func__);
+    }
+
+    return fd;
+}
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 68284428f87c..227cb2578e95 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -235,6 +235,9 @@  typedef struct IOMMUTLBEvent {
 /* RAM is an mmap-ed named file */
 #define RAM_NAMED_FILE (1 << 9)
 
+/* RAM can be private that has kvm gmem backend */
+#define RAM_KVM_GMEM    (1 << 10)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 69c6a5390293..0d158b3909c9 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -41,6 +41,7 @@  struct RAMBlock {
     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
     int fd;
     uint64_t fd_offset;
+    int gmem_fd;
     size_t page_size;
     /* dirty bitmap used during migration */
     unsigned long *bmap;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 115f0cca79d1..f5b74c8dd8c5 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -580,4 +580,6 @@  bool kvm_arch_cpu_check_are_resettable(void);
 bool kvm_dirty_ring_enabled(void);
 
 uint32_t kvm_dirty_ring_size(void);
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
 #endif
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 3df73542e1fe..2d98a88f41f0 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1824,6 +1824,16 @@  static void ram_block_add(RAMBlock *new_block, Error **errp)
         }
     }
 
+    if (kvm_enabled() && new_block->flags & RAM_KVM_GMEM &&
+        new_block->gmem_fd < 0) {
+        new_block->gmem_fd = kvm_create_guest_memfd(new_block->max_length,
+                                                    0, errp);
+        if (new_block->gmem_fd < 0) {
+            qemu_mutex_unlock_ramlist();
+            return;
+        }
+    }
+
     new_ram_size = MAX(old_ram_size,
               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
     if (new_ram_size > old_ram_size) {
@@ -1885,7 +1895,7 @@  RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
 
     /* Just support these ram flags by now. */
     assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
-                          RAM_PROTECTED | RAM_NAMED_FILE)) == 0);
+                          RAM_PROTECTED | RAM_NAMED_FILE | RAM_KVM_GMEM)) == 0);
 
     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");
@@ -1920,6 +1930,7 @@  RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     new_block->used_length = size;
     new_block->max_length = size;
     new_block->flags = ram_flags;
+    new_block->gmem_fd = -1;
     new_block->host = file_ram_alloc(new_block, size, fd, readonly,
                                      !file_size, offset, errp);
     if (!new_block->host) {
@@ -1978,7 +1989,7 @@  RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
     Error *local_err = NULL;
 
     assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
-                          RAM_NORESERVE)) == 0);
+                          RAM_NORESERVE| RAM_KVM_GMEM)) == 0);
     assert(!host ^ (ram_flags & RAM_PREALLOC));
 
     size = HOST_PAGE_ALIGN(size);
@@ -1990,6 +2001,7 @@  RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
     new_block->max_length = max_size;
     assert(max_size >= size);
     new_block->fd = -1;
+    new_block->gmem_fd = -1;
     new_block->page_size = qemu_real_host_page_size();
     new_block->host = host;
     new_block->flags = ram_flags;
@@ -2012,7 +2024,7 @@  RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
                          MemoryRegion *mr, Error **errp)
 {
-    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_KVM_GMEM)) == 0);
     return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
 }