diff mbox series

[V5,07/25] machine: memfd-alloc option

Message ID 1625678434-240960-8-git-send-email-steven.sistare@oracle.com
State New
Headers show
Series Live Update | expand

Commit Message

Steven Sistare July 7, 2021, 5:20 p.m. UTC
Allocate anonymous memory using memfd_create if the memfd-alloc machine
option is set.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 hw/core/machine.c   | 19 +++++++++++++++++++
 include/hw/boards.h |  1 +
 qemu-options.hx     |  5 +++++
 softmmu/physmem.c   | 42 +++++++++++++++++++++++++++++++++---------
 trace-events        |  1 +
 util/qemu-config.c  |  4 ++++
 6 files changed, 63 insertions(+), 9 deletions(-)

Comments

Marc-André Lureau July 8, 2021, 2:20 p.m. UTC | #1
Hi

On Wed, Jul 7, 2021 at 9:39 PM Steve Sistare <steven.sistare@oracle.com>
wrote:

> Allocate anonymous memory using memfd_create if the memfd-alloc machine
> option is set.
>

Nice, I'd suggest you send this patch separately. (we had discussions about
an option like this several times)


> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  hw/core/machine.c   | 19 +++++++++++++++++++
>  include/hw/boards.h |  1 +
>  qemu-options.hx     |  5 +++++
>  softmmu/physmem.c   | 42 +++++++++++++++++++++++++++++++++---------
>  trace-events        |  1 +
>  util/qemu-config.c  |  4 ++++
>  6 files changed, 63 insertions(+), 9 deletions(-)
>
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 57c18f9..f0656a8 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -383,6 +383,20 @@ static void machine_set_mem_merge(Object *obj, bool
> value, Error **errp)
>      ms->mem_merge = value;
>  }
>
> +static bool machine_get_memfd_alloc(Object *obj, Error **errp)
> +{
> +    MachineState *ms = MACHINE(obj);
> +
> +    return ms->memfd_alloc;
> +}
> +
> +static void machine_set_memfd_alloc(Object *obj, bool value, Error **errp)
> +{
> +    MachineState *ms = MACHINE(obj);
> +
> +    ms->memfd_alloc = value;
> +}
> +
>  static bool machine_get_usb(Object *obj, Error **errp)
>  {
>      MachineState *ms = MACHINE(obj);
> @@ -917,6 +931,11 @@ static void machine_class_init(ObjectClass *oc, void
> *data)
>      object_class_property_set_description(oc, "mem-merge",
>          "Enable/disable memory merge support");
>
> +    object_class_property_add_bool(oc, "memfd-alloc",
> +        machine_get_memfd_alloc, machine_set_memfd_alloc);
> +    object_class_property_set_description(oc, "memfd-alloc",
> +        "Enable/disable allocating anonymous memory using memfd_create");
> +
>      object_class_property_add_bool(oc, "usb",
>          machine_get_usb, machine_set_usb);
>      object_class_property_set_description(oc, "usb",
> diff --git a/include/hw/boards.h b/include/hw/boards.h
> index accd6ef..299e1ca 100644
> --- a/include/hw/boards.h
> +++ b/include/hw/boards.h
> @@ -305,6 +305,7 @@ struct MachineState {
>      char *dt_compatible;
>      bool dump_guest_core;
>      bool mem_merge;
> +    bool memfd_alloc;
>      bool usb;
>      bool usb_disabled;
>      char *firmware;
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 8965dab..fa53734 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -30,6 +30,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
>      "                vmport=on|off|auto controls emulation of vmport
> (default: auto)\n"
>      "                dump-guest-core=on|off include guest memory in a
> core dump (default=on)\n"
>      "                mem-merge=on|off controls memory merge support
> (default: on)\n"
> +    "                memfd-alloc=on|off controls allocating anonymous
> memory using memfd_create (default: off)\n"
>      "                aes-key-wrap=on|off controls support for AES key
> wrapping (default=on)\n"
>      "                dea-key-wrap=on|off controls support for DEA key
> wrapping (default=on)\n"
>      "                suppress-vmdesc=on|off disables self-describing
> migration (default=off)\n"
> @@ -76,6 +77,10 @@ SRST
>          supported by the host, de-duplicates identical memory pages
>          among VMs instances (enabled by default).
>
> +    ``memfd-alloc=on|off``
> +        Enables or disables allocation of anonymous memory using
> memfd_create.
> +        (disabled by default).
> +
>      ``aes-key-wrap=on|off``
>          Enables or disables AES key wrapping support on s390-ccw hosts.
>          This feature controls whether AES wrapping keys will be created
> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> index 9b171c9..b149250 100644
> --- a/softmmu/physmem.c
> +++ b/softmmu/physmem.c
> @@ -64,6 +64,7 @@
>
>  #include "qemu/pmem.h"
>
> +#include "qemu/memfd.h"
>  #include "migration/vmstate.h"
>
>  #include "qemu/range.h"
> @@ -1960,35 +1961,58 @@ static void ram_block_add(RAMBlock *new_block,
> Error **errp)
>      const bool shared = qemu_ram_is_shared(new_block);
>      RAMBlock *block;
>      RAMBlock *last_block = NULL;
> +    struct MemoryRegion *mr = new_block->mr;
>      ram_addr_t old_ram_size, new_ram_size;
>      Error *err = NULL;
> +    const char *name;
> +    void *addr = 0;
> +    size_t maxlen;
> +    MachineState *ms = MACHINE(qdev_get_machine());
>
>      old_ram_size = last_ram_page();
>
>      qemu_mutex_lock_ramlist();
> -    new_block->offset = find_ram_offset(new_block->max_length);
> +    maxlen = new_block->max_length;
> +    new_block->offset = find_ram_offset(maxlen);
>
>      if (!new_block->host) {
>          if (xen_enabled()) {
> -            xen_ram_alloc(new_block->offset, new_block->max_length,
> -                          new_block->mr, &err);
> +            xen_ram_alloc(new_block->offset, maxlen, new_block->mr, &err);
>              if (err) {
>                  error_propagate(errp, err);
>                  qemu_mutex_unlock_ramlist();
>                  return;
>              }
>          } else {
> -            new_block->host = qemu_anon_ram_alloc(new_block->max_length,
> -                                                  &new_block->mr->align,
> -                                                  shared, noreserve);
> -            if (!new_block->host) {
> +            name = memory_region_name(new_block->mr);
> +            if (ms->memfd_alloc) {
> +                int mfd = -1;          /* placeholder until next patch */
> +                mr->align = QEMU_VMALLOC_ALIGN;
> +                if (mfd < 0) {
> +                    mfd = qemu_memfd_create(name, maxlen + mr->align,
> +                                            0, 0, 0, &err);
> +                    if (mfd < 0) {
> +                        return;
> +                    }
> +                }
> +                new_block->flags |= RAM_SHARED;
>

I wonder if ram_backend_memory_alloc() shouldn't be updated to reflect that
the memory backend is "share" = true. And I would say so in the doc as well.


+                addr = file_ram_alloc(new_block, maxlen, mfd,
> +                                      false, false, 0, errp);
> +                trace_anon_memfd_alloc(name, maxlen, addr, mfd);
> +            } else {
> +                addr = qemu_anon_ram_alloc(maxlen, &mr->align,
> +                                           shared, noreserve);
> +            }
> +
> +            if (!addr) {
>                  error_setg_errno(errp, errno,
>                                   "cannot set up guest memory '%s'",
> -                                 memory_region_name(new_block->mr));
> +                                 name);
>                  qemu_mutex_unlock_ramlist();
>                  return;
>              }
> -            memory_try_enable_merging(new_block->host,
> new_block->max_length);
> +            memory_try_enable_merging(addr, maxlen);
> +            new_block->host = addr;
>          }
>      }
>
> diff --git a/trace-events b/trace-events
> index 765fe25..6dbcd0e 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -40,6 +40,7 @@ ram_block_discard_range(const char *rbname, void *hva,
> size_t length, bool need_
>  # accel/tcg/cputlb.c
>  memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned
> size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
>  memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
> +anon_memfd_alloc(const char *name, size_t size, void *ptr, int fd) "%s
> size %zu ptr %p fd %d"
>
>  # gdbstub.c
>  gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
> diff --git a/util/qemu-config.c b/util/qemu-config.c
> index 84ee6dc..6162b4d 100644
> --- a/util/qemu-config.c
> +++ b/util/qemu-config.c
> @@ -207,6 +207,10 @@ static QemuOptsList machine_opts = {
>              .type = QEMU_OPT_BOOL,
>              .help = "enable/disable memory merge support",
>          },{
> +            .name = "memfd-alloc",
> +            .type = QEMU_OPT_BOOL,
> +            .help = "enable/disable memfd_create for anonymous memory",
> +        },{
>              .name = "usb",
>              .type = QEMU_OPT_BOOL,
>              .help = "Set on/off to enable/disable usb",
> --
> 1.8.3.1
>
>
>
Steven Sistare July 12, 2021, 5:07 p.m. UTC | #2
On 7/8/2021 10:20 AM, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Jul 7, 2021 at 9:39 PM Steve Sistare <steven.sistare@oracle.com <mailto:steven.sistare@oracle.com>> wrote:
> 
>     Allocate anonymous memory using memfd_create if the memfd-alloc machine
>     option is set.
> 
> 
> Nice, I'd suggest you send this patch separately. (we had discussions about an option like this several times)

I would like to keep it with this series to make sure it meets our needs as the patches are
reviewed and evolve.  We can always push it solo later if the series stalls.

>     Signed-off-by: Steve Sistare <steven.sistare@oracle.com <mailto:steven.sistare@oracle.com>>
>     ---
>      hw/core/machine.c   | 19 +++++++++++++++++++
>      include/hw/boards.h |  1 +
>      qemu-options.hx     |  5 +++++
>      softmmu/physmem.c   | 42 +++++++++++++++++++++++++++++++++---------
>      trace-events        |  1 +
>      util/qemu-config.c  |  4 ++++
>      6 files changed, 63 insertions(+), 9 deletions(-)
> 
>     diff --git a/hw/core/machine.c b/hw/core/machine.c
>     index 57c18f9..f0656a8 100644
>     --- a/hw/core/machine.c
>     +++ b/hw/core/machine.c
>     @@ -383,6 +383,20 @@ static void machine_set_mem_merge(Object *obj, bool value, Error **errp)
>          ms->mem_merge = value;
>      }
> 
>     +static bool machine_get_memfd_alloc(Object *obj, Error **errp)
>     +{
>     +    MachineState *ms = MACHINE(obj);
>     +
>     +    return ms->memfd_alloc;
>     +}
>     +
>     +static void machine_set_memfd_alloc(Object *obj, bool value, Error **errp)
>     +{
>     +    MachineState *ms = MACHINE(obj);
>     +
>     +    ms->memfd_alloc = value;
>     +}
>     +
>      static bool machine_get_usb(Object *obj, Error **errp)
>      {
>          MachineState *ms = MACHINE(obj);
>     @@ -917,6 +931,11 @@ static void machine_class_init(ObjectClass *oc, void *data)
>          object_class_property_set_description(oc, "mem-merge",
>              "Enable/disable memory merge support");
> 
>     +    object_class_property_add_bool(oc, "memfd-alloc",
>     +        machine_get_memfd_alloc, machine_set_memfd_alloc);
>     +    object_class_property_set_description(oc, "memfd-alloc",
>     +        "Enable/disable allocating anonymous memory using memfd_create");
>     +
>          object_class_property_add_bool(oc, "usb",
>              machine_get_usb, machine_set_usb);
>          object_class_property_set_description(oc, "usb",
>     diff --git a/include/hw/boards.h b/include/hw/boards.h
>     index accd6ef..299e1ca 100644
>     --- a/include/hw/boards.h
>     +++ b/include/hw/boards.h
>     @@ -305,6 +305,7 @@ struct MachineState {
>          char *dt_compatible;
>          bool dump_guest_core;
>          bool mem_merge;
>     +    bool memfd_alloc;
>          bool usb;
>          bool usb_disabled;
>          char *firmware;
>     diff --git a/qemu-options.hx b/qemu-options.hx
>     index 8965dab..fa53734 100644
>     --- a/qemu-options.hx
>     +++ b/qemu-options.hx
>     @@ -30,6 +30,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
>          "                vmport=on|off|auto controls emulation of vmport (default: auto)\n"
>          "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
>          "                mem-merge=on|off controls memory merge support (default: on)\n"
>     +    "                memfd-alloc=on|off controls allocating anonymous memory using memfd_create (default: off)\n"
>          "                aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n"
>          "                dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n"
>          "                suppress-vmdesc=on|off disables self-describing migration (default=off)\n"
>     @@ -76,6 +77,10 @@ SRST
>              supported by the host, de-duplicates identical memory pages
>              among VMs instances (enabled by default).
> 
>     +    ``memfd-alloc=on|off``
>     +        Enables or disables allocation of anonymous memory using memfd_create.
>     +        (disabled by default).
>     +
>          ``aes-key-wrap=on|off``
>              Enables or disables AES key wrapping support on s390-ccw hosts.
>              This feature controls whether AES wrapping keys will be created
>     diff --git a/softmmu/physmem.c b/softmmu/physmem.c
>     index 9b171c9..b149250 100644
>     --- a/softmmu/physmem.c
>     +++ b/softmmu/physmem.c
>     @@ -64,6 +64,7 @@
> 
>      #include "qemu/pmem.h"
> 
>     +#include "qemu/memfd.h"
>      #include "migration/vmstate.h"
> 
>      #include "qemu/range.h"
>     @@ -1960,35 +1961,58 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>          const bool shared = qemu_ram_is_shared(new_block);
>          RAMBlock *block;
>          RAMBlock *last_block = NULL;
>     +    struct MemoryRegion *mr = new_block->mr;
>          ram_addr_t old_ram_size, new_ram_size;
>          Error *err = NULL;
>     +    const char *name;
>     +    void *addr = 0;
>     +    size_t maxlen;
>     +    MachineState *ms = MACHINE(qdev_get_machine());
> 
>          old_ram_size = last_ram_page();
> 
>          qemu_mutex_lock_ramlist();
>     -    new_block->offset = find_ram_offset(new_block->max_length);
>     +    maxlen = new_block->max_length;
>     +    new_block->offset = find_ram_offset(maxlen);
> 
>          if (!new_block->host) {
>              if (xen_enabled()) {
>     -            xen_ram_alloc(new_block->offset, new_block->max_length,
>     -                          new_block->mr, &err);
>     +            xen_ram_alloc(new_block->offset, maxlen, new_block->mr, &err);
>                  if (err) {
>                      error_propagate(errp, err);
>                      qemu_mutex_unlock_ramlist();
>                      return;
>                  }
>              } else {
>     -            new_block->host = qemu_anon_ram_alloc(new_block->max_length,
>     -                                                  &new_block->mr->align,
>     -                                                  shared, noreserve);
>     -            if (!new_block->host) {
>     +            name = memory_region_name(new_block->mr);
>     +            if (ms->memfd_alloc) {
>     +                int mfd = -1;          /* placeholder until next patch */
>     +                mr->align = QEMU_VMALLOC_ALIGN;
>     +                if (mfd < 0) {
>     +                    mfd = qemu_memfd_create(name, maxlen + mr->align,
>     +                                            0, 0, 0, &err);
>     +                    if (mfd < 0) {
>     +                        return;
>     +                    }
>     +                }
>     +                new_block->flags |= RAM_SHARED;
> 
> 
> I wonder if ram_backend_memory_alloc() shouldn't be updated to reflect that the memory backend is "share" = true. 

It already does this:
  ram_flags = backend->share ? RAM_SHARED : 0;
Did you have something else in mind?

> And I would say so in the doc as well.

Will do.

- Steve

>     +                addr = file_ram_alloc(new_block, maxlen, mfd,
>     +                                      false, false, 0, errp);
>     +                trace_anon_memfd_alloc(name, maxlen, addr, mfd);
>     +            } else {
>     +                addr = qemu_anon_ram_alloc(maxlen, &mr->align,
>     +                                           shared, noreserve);
>     +            }
>     +
>     +            if (!addr) {
>                      error_setg_errno(errp, errno,
>                                       "cannot set up guest memory '%s'",
>     -                                 memory_region_name(new_block->mr));
>     +                                 name);
>                      qemu_mutex_unlock_ramlist();
>                      return;
>                  }
>     -            memory_try_enable_merging(new_block->host, new_block->max_length);
>     +            memory_try_enable_merging(addr, maxlen);
>     +            new_block->host = addr;
>              }
>          }
> 
>     diff --git a/trace-events b/trace-events
>     index 765fe25..6dbcd0e 100644
>     --- a/trace-events
>     +++ b/trace-events
>     @@ -40,6 +40,7 @@ ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_
>      # accel/tcg/cputlb.c
>      memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
>      memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
>     +anon_memfd_alloc(const char *name, size_t size, void *ptr, int fd) "%s size %zu ptr %p fd %d"
> 
>      # gdbstub.c
>      gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
>     diff --git a/util/qemu-config.c b/util/qemu-config.c
>     index 84ee6dc..6162b4d 100644
>     --- a/util/qemu-config.c
>     +++ b/util/qemu-config.c
>     @@ -207,6 +207,10 @@ static QemuOptsList machine_opts = {
>                  .type = QEMU_OPT_BOOL,
>                  .help = "enable/disable memory merge support",
>              },{
>     +            .name = "memfd-alloc",
>     +            .type = QEMU_OPT_BOOL,
>     +            .help = "enable/disable memfd_create for anonymous memory",
>     +        },{
>                  .name = "usb",
>                  .type = QEMU_OPT_BOOL,
>                  .help = "Set on/off to enable/disable usb",
>     -- 
>     1.8.3.1
> 
> 
> 
> 
> -- 
> Marc-André Lureau
Marc-André Lureau July 12, 2021, 5:45 p.m. UTC | #3
Hi

On Mon, Jul 12, 2021 at 9:07 PM Steven Sistare <steven.sistare@oracle.com>
wrote:

> On 7/8/2021 10:20 AM, Marc-André Lureau wrote:
> > Hi
> >
> > On Wed, Jul 7, 2021 at 9:39 PM Steve Sistare <steven.sistare@oracle.com
> <mailto:steven.sistare@oracle.com>> wrote:
> >
> >     Allocate anonymous memory using memfd_create if the memfd-alloc
> machine
> >     option is set.
> >
> >
> > Nice, I'd suggest you send this patch separately. (we had discussions
> about an option like this several times)
>
> I would like to keep it with this series to make sure it meets our needs
> as the patches are
> reviewed and evolve.  We can always push it solo later if the series
> stalls.
>
> >     Signed-off-by: Steve Sistare <steven.sistare@oracle.com <mailto:
> steven.sistare@oracle.com>>
> >     ---
> >      hw/core/machine.c   | 19 +++++++++++++++++++
> >      include/hw/boards.h |  1 +
> >      qemu-options.hx     |  5 +++++
> >      softmmu/physmem.c   | 42 +++++++++++++++++++++++++++++++++---------
> >      trace-events        |  1 +
> >      util/qemu-config.c  |  4 ++++
> >      6 files changed, 63 insertions(+), 9 deletions(-)
> >
> >     diff --git a/hw/core/machine.c b/hw/core/machine.c
> >     index 57c18f9..f0656a8 100644
> >     --- a/hw/core/machine.c
> >     +++ b/hw/core/machine.c
> >     @@ -383,6 +383,20 @@ static void machine_set_mem_merge(Object *obj,
> bool value, Error **errp)
> >          ms->mem_merge = value;
> >      }
> >
> >     +static bool machine_get_memfd_alloc(Object *obj, Error **errp)
> >     +{
> >     +    MachineState *ms = MACHINE(obj);
> >     +
> >     +    return ms->memfd_alloc;
> >     +}
> >     +
> >     +static void machine_set_memfd_alloc(Object *obj, bool value, Error
> **errp)
> >     +{
> >     +    MachineState *ms = MACHINE(obj);
> >     +
> >     +    ms->memfd_alloc = value;
> >     +}
> >     +
> >      static bool machine_get_usb(Object *obj, Error **errp)
> >      {
> >          MachineState *ms = MACHINE(obj);
> >     @@ -917,6 +931,11 @@ static void machine_class_init(ObjectClass *oc,
> void *data)
> >          object_class_property_set_description(oc, "mem-merge",
> >              "Enable/disable memory merge support");
> >
> >     +    object_class_property_add_bool(oc, "memfd-alloc",
> >     +        machine_get_memfd_alloc, machine_set_memfd_alloc);
> >     +    object_class_property_set_description(oc, "memfd-alloc",
> >     +        "Enable/disable allocating anonymous memory using
> memfd_create");
> >     +
> >          object_class_property_add_bool(oc, "usb",
> >              machine_get_usb, machine_set_usb);
> >          object_class_property_set_description(oc, "usb",
> >     diff --git a/include/hw/boards.h b/include/hw/boards.h
> >     index accd6ef..299e1ca 100644
> >     --- a/include/hw/boards.h
> >     +++ b/include/hw/boards.h
> >     @@ -305,6 +305,7 @@ struct MachineState {
> >          char *dt_compatible;
> >          bool dump_guest_core;
> >          bool mem_merge;
> >     +    bool memfd_alloc;
> >          bool usb;
> >          bool usb_disabled;
> >          char *firmware;
> >     diff --git a/qemu-options.hx b/qemu-options.hx
> >     index 8965dab..fa53734 100644
> >     --- a/qemu-options.hx
> >     +++ b/qemu-options.hx
> >     @@ -30,6 +30,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
> >          "                vmport=on|off|auto controls emulation of
> vmport (default: auto)\n"
> >          "                dump-guest-core=on|off include guest memory in
> a core dump (default=on)\n"
> >          "                mem-merge=on|off controls memory merge support
> (default: on)\n"
> >     +    "                memfd-alloc=on|off controls allocating
> anonymous memory using memfd_create (default: off)\n"
> >          "                aes-key-wrap=on|off controls support for AES
> key wrapping (default=on)\n"
> >          "                dea-key-wrap=on|off controls support for DEA
> key wrapping (default=on)\n"
> >          "                suppress-vmdesc=on|off disables
> self-describing migration (default=off)\n"
> >     @@ -76,6 +77,10 @@ SRST
> >              supported by the host, de-duplicates identical memory pages
> >              among VMs instances (enabled by default).
> >
> >     +    ``memfd-alloc=on|off``
> >     +        Enables or disables allocation of anonymous memory using
> memfd_create.
> >     +        (disabled by default).
> >     +
> >          ``aes-key-wrap=on|off``
> >              Enables or disables AES key wrapping support on s390-ccw
> hosts.
> >              This feature controls whether AES wrapping keys will be
> created
> >     diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> >     index 9b171c9..b149250 100644
> >     --- a/softmmu/physmem.c
> >     +++ b/softmmu/physmem.c
> >     @@ -64,6 +64,7 @@
> >
> >      #include "qemu/pmem.h"
> >
> >     +#include "qemu/memfd.h"
> >      #include "migration/vmstate.h"
> >
> >      #include "qemu/range.h"
> >     @@ -1960,35 +1961,58 @@ static void ram_block_add(RAMBlock
> *new_block, Error **errp)
> >          const bool shared = qemu_ram_is_shared(new_block);
> >          RAMBlock *block;
> >          RAMBlock *last_block = NULL;
> >     +    struct MemoryRegion *mr = new_block->mr;
> >          ram_addr_t old_ram_size, new_ram_size;
> >          Error *err = NULL;
> >     +    const char *name;
> >     +    void *addr = 0;
> >     +    size_t maxlen;
> >     +    MachineState *ms = MACHINE(qdev_get_machine());
> >
> >          old_ram_size = last_ram_page();
> >
> >          qemu_mutex_lock_ramlist();
> >     -    new_block->offset = find_ram_offset(new_block->max_length);
> >     +    maxlen = new_block->max_length;
> >     +    new_block->offset = find_ram_offset(maxlen);
> >
> >          if (!new_block->host) {
> >              if (xen_enabled()) {
> >     -            xen_ram_alloc(new_block->offset, new_block->max_length,
> >     -                          new_block->mr, &err);
> >     +            xen_ram_alloc(new_block->offset, maxlen, new_block->mr,
> &err);
> >                  if (err) {
> >                      error_propagate(errp, err);
> >                      qemu_mutex_unlock_ramlist();
> >                      return;
> >                  }
> >              } else {
> >     -            new_block->host =
> qemu_anon_ram_alloc(new_block->max_length,
> >     -
> &new_block->mr->align,
> >     -                                                  shared,
> noreserve);
> >     -            if (!new_block->host) {
> >     +            name = memory_region_name(new_block->mr);
> >     +            if (ms->memfd_alloc) {
> >     +                int mfd = -1;          /* placeholder until next
> patch */
> >     +                mr->align = QEMU_VMALLOC_ALIGN;
> >     +                if (mfd < 0) {
> >     +                    mfd = qemu_memfd_create(name, maxlen +
> mr->align,
> >     +                                            0, 0, 0, &err);
> >     +                    if (mfd < 0) {
> >     +                        return;
> >     +                    }
> >     +                }
> >     +                new_block->flags |= RAM_SHARED;
> >
> >
> > I wonder if ram_backend_memory_alloc() shouldn't be updated to reflect
> that the memory backend is "share" = true.
>
> It already does this:
>   ram_flags = backend->share ? RAM_SHARED : 0;
> Did you have something else in mind?
>

I mean the backend->share value should be updated, as it's always
RAM_SHARED.


> > And I would say so in the doc as well.
>
> Will do.
>
> - Steve
>
> >     +                addr = file_ram_alloc(new_block, maxlen, mfd,
> >     +                                      false, false, 0, errp);
> >     +                trace_anon_memfd_alloc(name, maxlen, addr, mfd);
> >     +            } else {
> >     +                addr = qemu_anon_ram_alloc(maxlen, &mr->align,
> >     +                                           shared, noreserve);
> >     +            }
> >     +
> >     +            if (!addr) {
> >                      error_setg_errno(errp, errno,
> >                                       "cannot set up guest memory '%s'",
> >     -                                 memory_region_name(new_block->mr));
> >     +                                 name);
> >                      qemu_mutex_unlock_ramlist();
> >                      return;
> >                  }
> >     -            memory_try_enable_merging(new_block->host,
> new_block->max_length);
> >     +            memory_try_enable_merging(addr, maxlen);
> >     +            new_block->host = addr;
> >              }
> >          }
> >
> >     diff --git a/trace-events b/trace-events
> >     index 765fe25..6dbcd0e 100644
> >     --- a/trace-events
> >     +++ b/trace-events
> >     @@ -40,6 +40,7 @@ ram_block_discard_range(const char *rbname, void
> *hva, size_t length, bool need_
> >      # accel/tcg/cputlb.c
> >      memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr,
> unsigned size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
> >      memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
> >     +anon_memfd_alloc(const char *name, size_t size, void *ptr, int fd)
> "%s size %zu ptr %p fd %d"
> >
> >      # gdbstub.c
> >      gdbstub_op_start(const char *device) "Starting gdbstub using device
> %s"
> >     diff --git a/util/qemu-config.c b/util/qemu-config.c
> >     index 84ee6dc..6162b4d 100644
> >     --- a/util/qemu-config.c
> >     +++ b/util/qemu-config.c
> >     @@ -207,6 +207,10 @@ static QemuOptsList machine_opts = {
> >                  .type = QEMU_OPT_BOOL,
> >                  .help = "enable/disable memory merge support",
> >              },{
> >     +            .name = "memfd-alloc",
> >     +            .type = QEMU_OPT_BOOL,
> >     +            .help = "enable/disable memfd_create for anonymous
> memory",
> >     +        },{
> >                  .name = "usb",
> >                  .type = QEMU_OPT_BOOL,
> >                  .help = "Set on/off to enable/disable usb",
> >     --
> >     1.8.3.1
> >
> >
> >
> >
> > --
> > Marc-André Lureau
>
diff mbox series

Patch

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 57c18f9..f0656a8 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -383,6 +383,20 @@  static void machine_set_mem_merge(Object *obj, bool value, Error **errp)
     ms->mem_merge = value;
 }
 
+static bool machine_get_memfd_alloc(Object *obj, Error **errp)
+{
+    MachineState *ms = MACHINE(obj);
+
+    return ms->memfd_alloc;
+}
+
+static void machine_set_memfd_alloc(Object *obj, bool value, Error **errp)
+{
+    MachineState *ms = MACHINE(obj);
+
+    ms->memfd_alloc = value;
+}
+
 static bool machine_get_usb(Object *obj, Error **errp)
 {
     MachineState *ms = MACHINE(obj);
@@ -917,6 +931,11 @@  static void machine_class_init(ObjectClass *oc, void *data)
     object_class_property_set_description(oc, "mem-merge",
         "Enable/disable memory merge support");
 
+    object_class_property_add_bool(oc, "memfd-alloc",
+        machine_get_memfd_alloc, machine_set_memfd_alloc);
+    object_class_property_set_description(oc, "memfd-alloc",
+        "Enable/disable allocating anonymous memory using memfd_create");
+
     object_class_property_add_bool(oc, "usb",
         machine_get_usb, machine_set_usb);
     object_class_property_set_description(oc, "usb",
diff --git a/include/hw/boards.h b/include/hw/boards.h
index accd6ef..299e1ca 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -305,6 +305,7 @@  struct MachineState {
     char *dt_compatible;
     bool dump_guest_core;
     bool mem_merge;
+    bool memfd_alloc;
     bool usb;
     bool usb_disabled;
     char *firmware;
diff --git a/qemu-options.hx b/qemu-options.hx
index 8965dab..fa53734 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -30,6 +30,7 @@  DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "                vmport=on|off|auto controls emulation of vmport (default: auto)\n"
     "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
     "                mem-merge=on|off controls memory merge support (default: on)\n"
+    "                memfd-alloc=on|off controls allocating anonymous memory using memfd_create (default: off)\n"
     "                aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n"
     "                dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n"
     "                suppress-vmdesc=on|off disables self-describing migration (default=off)\n"
@@ -76,6 +77,10 @@  SRST
         supported by the host, de-duplicates identical memory pages
         among VMs instances (enabled by default).
 
+    ``memfd-alloc=on|off``
+        Enables or disables allocation of anonymous memory using memfd_create.
+        (disabled by default).
+
     ``aes-key-wrap=on|off``
         Enables or disables AES key wrapping support on s390-ccw hosts.
         This feature controls whether AES wrapping keys will be created
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 9b171c9..b149250 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -64,6 +64,7 @@ 
 
 #include "qemu/pmem.h"
 
+#include "qemu/memfd.h"
 #include "migration/vmstate.h"
 
 #include "qemu/range.h"
@@ -1960,35 +1961,58 @@  static void ram_block_add(RAMBlock *new_block, Error **errp)
     const bool shared = qemu_ram_is_shared(new_block);
     RAMBlock *block;
     RAMBlock *last_block = NULL;
+    struct MemoryRegion *mr = new_block->mr;
     ram_addr_t old_ram_size, new_ram_size;
     Error *err = NULL;
+    const char *name;
+    void *addr = 0;
+    size_t maxlen;
+    MachineState *ms = MACHINE(qdev_get_machine());
 
     old_ram_size = last_ram_page();
 
     qemu_mutex_lock_ramlist();
-    new_block->offset = find_ram_offset(new_block->max_length);
+    maxlen = new_block->max_length;
+    new_block->offset = find_ram_offset(maxlen);
 
     if (!new_block->host) {
         if (xen_enabled()) {
-            xen_ram_alloc(new_block->offset, new_block->max_length,
-                          new_block->mr, &err);
+            xen_ram_alloc(new_block->offset, maxlen, new_block->mr, &err);
             if (err) {
                 error_propagate(errp, err);
                 qemu_mutex_unlock_ramlist();
                 return;
             }
         } else {
-            new_block->host = qemu_anon_ram_alloc(new_block->max_length,
-                                                  &new_block->mr->align,
-                                                  shared, noreserve);
-            if (!new_block->host) {
+            name = memory_region_name(new_block->mr);
+            if (ms->memfd_alloc) {
+                int mfd = -1;          /* placeholder until next patch */
+                mr->align = QEMU_VMALLOC_ALIGN;
+                if (mfd < 0) {
+                    mfd = qemu_memfd_create(name, maxlen + mr->align,
+                                            0, 0, 0, &err);
+                    if (mfd < 0) {
+                        return;
+                    }
+                }
+                new_block->flags |= RAM_SHARED;
+                addr = file_ram_alloc(new_block, maxlen, mfd,
+                                      false, false, 0, errp);
+                trace_anon_memfd_alloc(name, maxlen, addr, mfd);
+            } else {
+                addr = qemu_anon_ram_alloc(maxlen, &mr->align,
+                                           shared, noreserve);
+            }
+
+            if (!addr) {
                 error_setg_errno(errp, errno,
                                  "cannot set up guest memory '%s'",
-                                 memory_region_name(new_block->mr));
+                                 name);
                 qemu_mutex_unlock_ramlist();
                 return;
             }
-            memory_try_enable_merging(new_block->host, new_block->max_length);
+            memory_try_enable_merging(addr, maxlen);
+            new_block->host = addr;
         }
     }
 
diff --git a/trace-events b/trace-events
index 765fe25..6dbcd0e 100644
--- a/trace-events
+++ b/trace-events
@@ -40,6 +40,7 @@  ram_block_discard_range(const char *rbname, void *hva, size_t length, bool need_
 # accel/tcg/cputlb.c
 memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size) "0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
 memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
+anon_memfd_alloc(const char *name, size_t size, void *ptr, int fd) "%s size %zu ptr %p fd %d"
 
 # gdbstub.c
 gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
diff --git a/util/qemu-config.c b/util/qemu-config.c
index 84ee6dc..6162b4d 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -207,6 +207,10 @@  static QemuOptsList machine_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "enable/disable memory merge support",
         },{
+            .name = "memfd-alloc",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable memfd_create for anonymous memory",
+        },{
             .name = "usb",
             .type = QEMU_OPT_BOOL,
             .help = "Set on/off to enable/disable usb",