diff mbox

[RESEND,2/2] hostmem-file: add an attribute 'align' to set its alignment

Message ID 20170526023213.18741-2-haozhong.zhang@intel.com
State New
Headers show

Commit Message

Haozhong Zhang May 26, 2017, 2:32 a.m. UTC
file_ram_alloc() currently maps the backend file via mmap to a virtual
address aligned to the value returned by qemu_fd_getpagesize(). When a
DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
mmap implementation may require an alignment larger than what
qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.

This commit adds an attribute 'align' to hostmem-file, so that users
can specify a proper alignment that satisfies the kernel requirement.

If 'align' is not specified or is 0, the value returned by
qemu_fd_get_pagesize() will be used as before.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Xiao Guangrong <guangrong.xiao@gmail.com>
Cc: Stefan Hajnoczi <stefanha@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
---
Resend because the wrong maintainer email address was used.
---
 backends/hostmem-file.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 exec.c                  |  8 +++++++-
 include/exec/memory.h   |  2 ++
 memory.c                |  2 ++
 numa.c                  |  2 +-
 5 files changed, 52 insertions(+), 3 deletions(-)

Comments

Marc-André Lureau May 26, 2017, 6:39 a.m. UTC | #1
Hi

On Fri, May 26, 2017 at 6:34 AM Haozhong Zhang <haozhong.zhang@intel.com>
wrote:

> file_ram_alloc() currently maps the backend file via mmap to a virtual
> address aligned to the value returned by qemu_fd_getpagesize(). When a
> DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
> mmap implementation may require an alignment larger than what
> qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
>

How is the accepted value queried? Any chance to configure it
automatically?



> This commit adds an attribute 'align' to hostmem-file, so that users
> can specify a proper alignment that satisfies the kernel requirement.
>
> If 'align' is not specified or is 0, the value returned by
> qemu_fd_get_pagesize() will be used as before.
>
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Cc: Xiao Guangrong <guangrong.xiao@gmail.com>
> Cc: Stefan Hajnoczi <stefanha@gmail.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> ---
> Resend because the wrong maintainer email address was used.
> ---
>  backends/hostmem-file.c | 41 ++++++++++++++++++++++++++++++++++++++++-
>  exec.c                  |  8 +++++++-
>  include/exec/memory.h   |  2 ++
>  memory.c                |  2 ++
>  numa.c                  |  2 +-
>  5 files changed, 52 insertions(+), 3 deletions(-)
>
> diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
> index fc4ef46d11..d44fb41b55 100644
> --- a/backends/hostmem-file.c
> +++ b/backends/hostmem-file.c
> @@ -33,6 +33,7 @@ struct HostMemoryBackendFile {
>
>      bool share;
>      char *mem_path;
> +    uint64_t align;
>  };
>
>  static void
> @@ -57,7 +58,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend,
> Error **errp)
>          path = object_get_canonical_path(OBJECT(backend));
>          memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
>                                   path,
> -                                 backend->size, fb->share,
> +                                 backend->size, fb->align, fb->share,
>                                   fb->mem_path, errp);
>          g_free(path);
>      }
> @@ -104,6 +105,40 @@ static void file_memory_backend_set_share(Object *o,
> bool value, Error **errp)
>  }
>
>  static void
> +file_memory_backend_get_align(Object *o, Visitor *v, const char *name,
> +                              void *opaque, Error **errp)
> +{
> +    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
> +    uint64_t val = fb->align;
> +
> +    visit_type_size(v, name, &val, errp);
> +}
> +
> +static void
> +file_memory_backend_set_align(Object *o, Visitor *v, const char *name,
> +                              void *opaque, Error **errp)
> +{
> +    HostMemoryBackend *backend = MEMORY_BACKEND(o);
> +    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
> +    Error *local_err = NULL;
> +    uint64_t val;
> +
> +    if (host_memory_backend_mr_inited(backend)) {
> +        error_setg(&local_err, "cannot change property value");
> +        goto out;
> +    }
> +
> +    visit_type_size(v, name, &val, &local_err);
> +    if (local_err) {
> +        goto out;
> +    }
> +    fb->align = val;
> +
> + out:
> +    error_propagate(errp, local_err);
> +}
> +
> +static void
>  file_backend_class_init(ObjectClass *oc, void *data)
>  {
>      HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
> @@ -116,6 +151,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
>      object_class_property_add_str(oc, "mem-path",
>          get_mem_path, set_mem_path,
>          &error_abort);
> +    object_class_property_add(oc, "align", "int",
>

It would make sense to make it "size" instead of "int"


> +        file_memory_backend_get_align,
> +        file_memory_backend_set_align,
> +        NULL, NULL, &error_abort);
>  }
>
>  static void file_backend_instance_finalize(Object *o)
> diff --git a/exec.c b/exec.c
> index ff16f04f2b..5bb62e2e98 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1549,7 +1549,13 @@ static void *file_ram_alloc(RAMBlock *block,
>      }
>
>      block->page_size = qemu_fd_getpagesize(fd);
> -    block->mr->align = block->page_size;
> +    if (block->mr->align % block->page_size) {
> +        error_setg(errp, "alignment 0x%" PRIx64 " must be "
> +                   "multiple of page size 0x%" PRIx64,
> +                   block->mr->align, block->page_size);
> +        goto error;
> +    }
> +    block->mr->align = MAX(block->page_size, block->mr->align);
>  #if defined(__s390x__)
>      if (kvm_enabled()) {
>          block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 99e0f54d86..05d3d0da3b 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -441,6 +441,7 @@ void memory_region_init_resizeable_ram(MemoryRegion
> *mr,
>   * @name: Region name, becomes part of RAMBlock name used in migration
> stream
>   *        must be unique within any device
>   * @size: size of the region.
> + * @align: alignment of the region.
>   * @share: %true if memory must be mmaped with the MAP_SHARED flag
>   * @path: the path in which to allocate the RAM.
>   * @errp: pointer to Error*, to store an error if it happens.
> @@ -449,6 +450,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr,
>                                        struct Object *owner,
>                                        const char *name,
>                                        uint64_t size,
> +                                      uint64_t align,
>                                        bool share,
>                                        const char *path,
>                                        Error **errp);
> diff --git a/memory.c b/memory.c
> index b727f5ec0e..5165b9aa08 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1386,6 +1386,7 @@ void memory_region_init_ram_from_file(MemoryRegion
> *mr,
>                                        struct Object *owner,
>                                        const char *name,
>                                        uint64_t size,
> +                                      uint64_t align,
>                                        bool share,
>                                        const char *path,
>                                        Error **errp)
> @@ -1394,6 +1395,7 @@ void memory_region_init_ram_from_file(MemoryRegion
> *mr,
>      mr->ram = true;
>      mr->terminates = true;
>      mr->destructor = memory_region_destructor_ram;
> +    mr->align = align;
>      mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
>      mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
>  }
> diff --git a/numa.c b/numa.c
> index ca731455e9..39a25aa1d2 100644
> --- a/numa.c
> +++ b/numa.c
> @@ -541,7 +541,7 @@ static void
> allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
>      if (mem_path) {
>  #ifdef __linux__
>          Error *err = NULL;
> -        memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
> +        memory_region_init_ram_from_file(mr, owner, name, ram_size, 0,
> false,
>                                           mem_path, &err);
>          if (err) {
>              error_report_err(err);
> --
> 2.11.0
>
>
otherwise, looks good to me
Marc-André Lureau May 26, 2017, 7:05 a.m. UTC | #2
Hi

On Fri, May 26, 2017 at 10:51 AM Haozhong Zhang <haozhong.zhang@intel.com>
wrote:

> On 05/26/17 06:39 +0000, Marc-André Lureau wrote:
> > Hi
> >
> > On Fri, May 26, 2017 at 6:34 AM Haozhong Zhang <haozhong.zhang@intel.com
> >
> > wrote:
> >
> > > file_ram_alloc() currently maps the backend file via mmap to a virtual
> > > address aligned to the value returned by qemu_fd_getpagesize(). When a
> > > DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
> > > mmap implementation may require an alignment larger than what
> > > qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
> > >
> >
> > How is the accepted value queried? Any chance to configure it
> > automatically?
>
> Take /dev/dax0.0 for example. The value can be read from
> /sys/class/dax/dax0.0/device/dax_region/align.
>

Should this work be left to management layer, or could qemu figure it out
by itself (using udev?)


> [..]
> > > +static void
> > >  file_backend_class_init(ObjectClass *oc, void *data)
> > >  {
> > >      HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
> > > @@ -116,6 +151,10 @@ file_backend_class_init(ObjectClass *oc, void
> *data)
> > >      object_class_property_add_str(oc, "mem-path",
> > >          get_mem_path, set_mem_path,
> > >          &error_abort);
> > > +    object_class_property_add(oc, "align", "int",
> > >
> >
> > It would make sense to make it "size" instead of "int"
> >
>
> will change in the next version
>

thanks
Dan Williams May 26, 2017, 2:24 p.m. UTC | #3
On Fri, May 26, 2017 at 12:16 AM, Haozhong Zhang
<haozhong.zhang@intel.com> wrote:
> On 05/26/17 07:05 +0000, Marc-André Lureau wrote:
>> Hi
>>
>> On Fri, May 26, 2017 at 10:51 AM Haozhong Zhang <haozhong.zhang@intel.com>
>> wrote:
>>
>> > On 05/26/17 06:39 +0000, Marc-André Lureau wrote:
>> > > Hi
>> > >
>> > > On Fri, May 26, 2017 at 6:34 AM Haozhong Zhang <haozhong.zhang@intel.com
>> > >
>> > > wrote:
>> > >
>> > > > file_ram_alloc() currently maps the backend file via mmap to a virtual
>> > > > address aligned to the value returned by qemu_fd_getpagesize(). When a
>> > > > DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
>> > > > mmap implementation may require an alignment larger than what
>> > > > qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
>> > > >
>> > >
>> > > How is the accepted value queried? Any chance to configure it
>> > > automatically?
>> >
>> > Take /dev/dax0.0 for example. The value can be read from
>> > /sys/class/dax/dax0.0/device/dax_region/align.
>> >
>>
>> Should this work be left to management layer, or could qemu figure it out
>> by itself (using udev?)
>>
>
> For DAX device only, QEMU can figure out the proper alignment by
> itself. However, I'm not sure whether there are other non-DAX cases
> requiring non-default alignment, so I think it's better to just add an
> interface (i.e. align attribute) in QEMU and let other management
> tools (e.g. libvirt?) fill a proper value.

I can't imagine any cases where you would want to specify an
alignment. If it's regular file mmap any alignment is fine, and if
it's device-dax only the configured alignment of the device instance
is allowed. So, I don't think this should be a configurable option,
just read it from the device instance and you're done.
Eduardo Habkost May 26, 2017, 6:55 p.m. UTC | #4
On Fri, May 26, 2017 at 07:24:26AM -0700, Dan Williams wrote:
> On Fri, May 26, 2017 at 12:16 AM, Haozhong Zhang
> <haozhong.zhang@intel.com> wrote:
> > On 05/26/17 07:05 +0000, Marc-André Lureau wrote:
> >> Hi
> >>
> >> On Fri, May 26, 2017 at 10:51 AM Haozhong Zhang <haozhong.zhang@intel.com>
> >> wrote:
> >>
> >> > On 05/26/17 06:39 +0000, Marc-André Lureau wrote:
> >> > > Hi
> >> > >
> >> > > On Fri, May 26, 2017 at 6:34 AM Haozhong Zhang <haozhong.zhang@intel.com
> >> > >
> >> > > wrote:
> >> > >
> >> > > > file_ram_alloc() currently maps the backend file via mmap to a virtual
> >> > > > address aligned to the value returned by qemu_fd_getpagesize(). When a
> >> > > > DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
> >> > > > mmap implementation may require an alignment larger than what
> >> > > > qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
> >> > > >
> >> > >
> >> > > How is the accepted value queried? Any chance to configure it
> >> > > automatically?
> >> >
> >> > Take /dev/dax0.0 for example. The value can be read from
> >> > /sys/class/dax/dax0.0/device/dax_region/align.
> >> >
> >>
> >> Should this work be left to management layer, or could qemu figure it out
> >> by itself (using udev?)
> >>
> >
> > For DAX device only, QEMU can figure out the proper alignment by
> > itself. However, I'm not sure whether there are other non-DAX cases
> > requiring non-default alignment, so I think it's better to just add an
> > interface (i.e. align attribute) in QEMU and let other management
> > tools (e.g. libvirt?) fill a proper value.
> 
> I can't imagine any cases where you would want to specify an
> alignment. If it's regular file mmap any alignment is fine, and if
> it's device-dax only the configured alignment of the device instance
> is allowed. So, I don't think this should be a configurable option,
> just read it from the device instance and you're done.

Agreed.

BTW, there's no generic interface to ask the kernel what's the
required mmap() alignment for a file?
Dan Williams May 26, 2017, 8:50 p.m. UTC | #5
On Fri, May 26, 2017 at 11:55 AM, Eduardo Habkost <ehabkost@redhat.com> wrote:
> On Fri, May 26, 2017 at 07:24:26AM -0700, Dan Williams wrote:
>> On Fri, May 26, 2017 at 12:16 AM, Haozhong Zhang
>> <haozhong.zhang@intel.com> wrote:
>> > On 05/26/17 07:05 +0000, Marc-André Lureau wrote:
>> >> Hi
>> >>
>> >> On Fri, May 26, 2017 at 10:51 AM Haozhong Zhang <haozhong.zhang@intel.com>
>> >> wrote:
>> >>
>> >> > On 05/26/17 06:39 +0000, Marc-André Lureau wrote:
>> >> > > Hi
>> >> > >
>> >> > > On Fri, May 26, 2017 at 6:34 AM Haozhong Zhang <haozhong.zhang@intel.com
>> >> > >
>> >> > > wrote:
>> >> > >
>> >> > > > file_ram_alloc() currently maps the backend file via mmap to a virtual
>> >> > > > address aligned to the value returned by qemu_fd_getpagesize(). When a
>> >> > > > DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
>> >> > > > mmap implementation may require an alignment larger than what
>> >> > > > qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
>> >> > > >
>> >> > >
>> >> > > How is the accepted value queried? Any chance to configure it
>> >> > > automatically?
>> >> >
>> >> > Take /dev/dax0.0 for example. The value can be read from
>> >> > /sys/class/dax/dax0.0/device/dax_region/align.
>> >> >
>> >>
>> >> Should this work be left to management layer, or could qemu figure it out
>> >> by itself (using udev?)
>> >>
>> >
>> > For DAX device only, QEMU can figure out the proper alignment by
>> > itself. However, I'm not sure whether there are other non-DAX cases
>> > requiring non-default alignment, so I think it's better to just add an
>> > interface (i.e. align attribute) in QEMU and let other management
>> > tools (e.g. libvirt?) fill a proper value.
>>
>> I can't imagine any cases where you would want to specify an
>> alignment. If it's regular file mmap any alignment is fine, and if
>> it's device-dax only the configured alignment of the device instance
>> is allowed. So, I don't think this should be a configurable option,
>> just read it from the device instance and you're done.
>
> Agreed.
>
> BTW, there's no generic interface to ask the kernel what's the
> required mmap() alignment for a file?

It's only this /dev/dax device-file case where the alignment is
strictly enforced. The required alignment is advertised in sysfs, for
example:

# ls -l /dev/dax0.0
crw------- 1 root root 253, 0 May 26 06:41 /dev/dax0.0
# cat /sys/dev/char/253\:0/device/align
2097152
Haozhong Zhang May 27, 2017, 1:04 a.m. UTC | #6
On 05/26/17 15:55 -0300, Eduardo Habkost wrote:
> On Fri, May 26, 2017 at 07:24:26AM -0700, Dan Williams wrote:
> > On Fri, May 26, 2017 at 12:16 AM, Haozhong Zhang
> > <haozhong.zhang@intel.com> wrote:
> > > On 05/26/17 07:05 +0000, Marc-André Lureau wrote:
> > >> Hi
> > >>
> > >> On Fri, May 26, 2017 at 10:51 AM Haozhong Zhang <haozhong.zhang@intel.com>
> > >> wrote:
> > >>
> > >> > On 05/26/17 06:39 +0000, Marc-André Lureau wrote:
> > >> > > Hi
> > >> > >
> > >> > > On Fri, May 26, 2017 at 6:34 AM Haozhong Zhang <haozhong.zhang@intel.com
> > >> > >
> > >> > > wrote:
> > >> > >
> > >> > > > file_ram_alloc() currently maps the backend file via mmap to a virtual
> > >> > > > address aligned to the value returned by qemu_fd_getpagesize(). When a
> > >> > > > DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
> > >> > > > mmap implementation may require an alignment larger than what
> > >> > > > qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
> > >> > > >
> > >> > >
> > >> > > How is the accepted value queried? Any chance to configure it
> > >> > > automatically?
> > >> >
> > >> > Take /dev/dax0.0 for example. The value can be read from
> > >> > /sys/class/dax/dax0.0/device/dax_region/align.
> > >> >
> > >>
> > >> Should this work be left to management layer, or could qemu figure it out
> > >> by itself (using udev?)
> > >>
> > >
> > > For DAX device only, QEMU can figure out the proper alignment by
> > > itself. However, I'm not sure whether there are other non-DAX cases
> > > requiring non-default alignment, so I think it's better to just add an
> > > interface (i.e. align attribute) in QEMU and let other management
> > > tools (e.g. libvirt?) fill a proper value.
> > 
> > I can't imagine any cases where you would want to specify an
> > alignment. If it's regular file mmap any alignment is fine, and if
> > it's device-dax only the configured alignment of the device instance
> > is allowed. So, I don't think this should be a configurable option,
> > just read it from the device instance and you're done.
> 
> Agreed.
>

Ok, I'll drop this attribute and let QEMU figure out the alignment.

Thanks,
Haozhong

> BTW, there's no generic interface to ask the kernel what's the
> required mmap() alignment for a file?
> 
> -- 
> Eduardo
>
Paolo Bonzini May 30, 2017, 12:17 p.m. UTC | #7
On 26/05/2017 16:24, Dan Williams wrote:
>> For DAX device only, QEMU can figure out the proper alignment by
>> itself. However, I'm not sure whether there are other non-DAX cases
>> requiring non-default alignment, so I think it's better to just add an
>> interface (i.e. align attribute) in QEMU and let other management
>> tools (e.g. libvirt?) fill a proper value.
> I can't imagine any cases where you would want to specify an
> alignment. If it's regular file mmap any alignment is fine, and if
> it's device-dax only the configured alignment of the device instance
> is allowed. So, I don't think this should be a configurable option,
> just read it from the device instance and you're done.

A 2M or 1G alignment lets KVM use EPT hugepages if the host physical
addresses are contiguous and 2M- or 1G-aligned.

QEMU only does this for hugetlbfs currently, where the requirement on
the host physical addresses is always satisfied.  Would the same apply
to NVDIMM device DAX?

Thanks,

Paolo
Dan Williams May 30, 2017, 7:16 p.m. UTC | #8
On Tue, May 30, 2017 at 5:17 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 26/05/2017 16:24, Dan Williams wrote:
>>> For DAX device only, QEMU can figure out the proper alignment by
>>> itself. However, I'm not sure whether there are other non-DAX cases
>>> requiring non-default alignment, so I think it's better to just add an
>>> interface (i.e. align attribute) in QEMU and let other management
>>> tools (e.g. libvirt?) fill a proper value.
>> I can't imagine any cases where you would want to specify an
>> alignment. If it's regular file mmap any alignment is fine, and if
>> it's device-dax only the configured alignment of the device instance
>> is allowed. So, I don't think this should be a configurable option,
>> just read it from the device instance and you're done.
>
> A 2M or 1G alignment lets KVM use EPT hugepages if the host physical
> addresses are contiguous and 2M- or 1G-aligned.
>
> QEMU only does this for hugetlbfs currently, where the requirement on
> the host physical addresses is always satisfied.  Would the same apply
> to NVDIMM device DAX?
>

Yes, I believe it is similar. Device-dax guarantees and enforces (mmap
area must be aligned) a given fault granularity.
diff mbox

Patch

diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index fc4ef46d11..d44fb41b55 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -33,6 +33,7 @@  struct HostMemoryBackendFile {
 
     bool share;
     char *mem_path;
+    uint64_t align;
 };
 
 static void
@@ -57,7 +58,7 @@  file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         path = object_get_canonical_path(OBJECT(backend));
         memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
                                  path,
-                                 backend->size, fb->share,
+                                 backend->size, fb->align, fb->share,
                                  fb->mem_path, errp);
         g_free(path);
     }
@@ -104,6 +105,40 @@  static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
 }
 
 static void
+file_memory_backend_get_align(Object *o, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+    uint64_t val = fb->align;
+
+    visit_type_size(v, name, &val, errp);
+}
+
+static void
+file_memory_backend_set_align(Object *o, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+    Error *local_err = NULL;
+    uint64_t val;
+
+    if (host_memory_backend_mr_inited(backend)) {
+        error_setg(&local_err, "cannot change property value");
+        goto out;
+    }
+
+    visit_type_size(v, name, &val, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    fb->align = val;
+
+ out:
+    error_propagate(errp, local_err);
+}
+
+static void
 file_backend_class_init(ObjectClass *oc, void *data)
 {
     HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
@@ -116,6 +151,10 @@  file_backend_class_init(ObjectClass *oc, void *data)
     object_class_property_add_str(oc, "mem-path",
         get_mem_path, set_mem_path,
         &error_abort);
+    object_class_property_add(oc, "align", "int",
+        file_memory_backend_get_align,
+        file_memory_backend_set_align,
+        NULL, NULL, &error_abort);
 }
 
 static void file_backend_instance_finalize(Object *o)
diff --git a/exec.c b/exec.c
index ff16f04f2b..5bb62e2e98 100644
--- a/exec.c
+++ b/exec.c
@@ -1549,7 +1549,13 @@  static void *file_ram_alloc(RAMBlock *block,
     }
 
     block->page_size = qemu_fd_getpagesize(fd);
-    block->mr->align = block->page_size;
+    if (block->mr->align % block->page_size) {
+        error_setg(errp, "alignment 0x%" PRIx64 " must be "
+                   "multiple of page size 0x%" PRIx64,
+                   block->mr->align, block->page_size);
+        goto error;
+    }
+    block->mr->align = MAX(block->page_size, block->mr->align);
 #if defined(__s390x__)
     if (kvm_enabled()) {
         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 99e0f54d86..05d3d0da3b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -441,6 +441,7 @@  void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @name: Region name, becomes part of RAMBlock name used in migration stream
  *        must be unique within any device
  * @size: size of the region.
+ * @align: alignment of the region.
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @path: the path in which to allocate the RAM.
  * @errp: pointer to Error*, to store an error if it happens.
@@ -449,6 +450,7 @@  void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
                                       const char *name,
                                       uint64_t size,
+                                      uint64_t align,
                                       bool share,
                                       const char *path,
                                       Error **errp);
diff --git a/memory.c b/memory.c
index b727f5ec0e..5165b9aa08 100644
--- a/memory.c
+++ b/memory.c
@@ -1386,6 +1386,7 @@  void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
                                       const char *name,
                                       uint64_t size,
+                                      uint64_t align,
                                       bool share,
                                       const char *path,
                                       Error **errp)
@@ -1394,6 +1395,7 @@  void memory_region_init_ram_from_file(MemoryRegion *mr,
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
+    mr->align = align;
     mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
 }
diff --git a/numa.c b/numa.c
index ca731455e9..39a25aa1d2 100644
--- a/numa.c
+++ b/numa.c
@@ -541,7 +541,7 @@  static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
     if (mem_path) {
 #ifdef __linux__
         Error *err = NULL;
-        memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
+        memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false,
                                          mem_path, &err);
         if (err) {
             error_report_err(err);