diff mbox

[v2,04/16] exec: ram_block_discard_range

Message ID 20170206173306.20603-5-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert Feb. 6, 2017, 5:32 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Create ram_block_discard_range in exec.c to replace
postcopy_ram_discard_range and most of ram_discard_range.

Those two routines are a bit of a weird combination, and
ram_discard_range is about to get more complex for hugepages.
It's OS dependent code (so shouldn't be in migration/ram.c) but
it needs quite a bit of the innards of RAMBlock so doesn't belong in
the os*.c.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 exec.c                    | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 include/exec/cpu-common.h |  1 +
 2 files changed, 60 insertions(+)

Comments

Juan Quintela Feb. 24, 2017, 1:14 p.m. UTC | #1
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>
> Create ram_block_discard_range in exec.c to replace
> postcopy_ram_discard_range and most of ram_discard_range.
>
> Those two routines are a bit of a weird combination, and
> ram_discard_range is about to get more complex for hugepages.
> It's OS dependent code (so shouldn't be in migration/ram.c) but
> it needs quite a bit of the innards of RAMBlock so doesn't belong in
> the os*.c.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

Reviewed-by: Juan Quintela <quintela@redhat.com>
Laurent Vivier Feb. 24, 2017, 2:04 p.m. UTC | #2
On 06/02/2017 18:32, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> Create ram_block_discard_range in exec.c to replace
> postcopy_ram_discard_range and most of ram_discard_range.
> 
> Those two routines are a bit of a weird combination, and
> ram_discard_range is about to get more complex for hugepages.
> It's OS dependent code (so shouldn't be in migration/ram.c) but
> it needs quite a bit of the innards of RAMBlock so doesn't belong in
> the os*.c.
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  exec.c                    | 59 +++++++++++++++++++++++++++++++++++++++++++++++
>  include/exec/cpu-common.h |  1 +
>  2 files changed, 60 insertions(+)
> 
> diff --git a/exec.c b/exec.c
> index 8b9ed73..e040cdf 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -45,6 +45,12 @@
>  #include "exec/address-spaces.h"
>  #include "sysemu/xen-mapcache.h"
>  #include "trace-root.h"
> +
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +#include <fcntl.h>
> +#include <linux/falloc.h>
> +#endif
> +
>  #endif
>  #include "exec/cpu-all.h"
>  #include "qemu/rcu_queue.h"
> @@ -3286,4 +3292,57 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
>      rcu_read_unlock();
>      return ret;
>  }
> +
> +/*
> + * Unmap pages of memory from start to start+length such that
> + * they a) read as 0, b) Trigger whatever fault mechanism
> + * the OS provides for postcopy.
> + * The pages must be unmapped by the end of the function.
> + * Returns: 0 on success, none-0 on failure
> + *
> + */
> +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
> +{
> +    int ret = -1;
> +
> +    rcu_read_lock();
> +    uint8_t *host_startaddr = rb->host + start;
> +
> +    if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
> +        error_report("ram_block_discard_range: Unaligned start address: %p",
> +                     host_startaddr);
> +        goto err;
> +    }
> +
> +    if ((start + length) <= rb->used_length) {
> +        uint8_t *host_endaddr = host_startaddr + length;
> +        if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
> +            error_report("ram_block_discard_range: Unaligned end address: %p",
> +                         host_endaddr);
> +            goto err;
> +        }
> +
> +        errno = ENOTSUP; /* If we are missing MADVISE etc */
> +
> +#if defined(CONFIG_MADVISE)
> +        ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
> +#endif
> +        if (ret) {
> +            ret = -errno;
> +            error_report("ram_block_discard_range: Failed to discard range "
> +                         "%s:%" PRIx64 " +%zx (%d)",
> +                         rb->idstr, start, length, ret);
> +        }
> +    } else {
> +        error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
> +                     "/%zx/" RAM_ADDR_FMT")",
> +                     rb->idstr, start, length, rb->used_length);
> +    }
> +
> +err:
> +    rcu_read_unlock();
> +
> +    return ret;
> +}

I really looks like a copy'n'paste from ram_discard_range(). It could be
clearer if you remove the code from ram_discard_range() and call this
function instead.

I think you don't need the "#if defined(CONFIG_MADVISE)" as you use
qemu_madvise() (or you should use madvise() directly if you want to
avoid the posix_madvise()).
[perhaps qemu_madvise() should set errno to ENOTSUP instead of EINVAL]

Laurent
Laurent Vivier Feb. 24, 2017, 2:08 p.m. UTC | #3
On 06/02/2017 18:32, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> Create ram_block_discard_range in exec.c to replace
> postcopy_ram_discard_range and most of ram_discard_range.
> 
> Those two routines are a bit of a weird combination, and
> ram_discard_range is about to get more complex for hugepages.
> It's OS dependent code (so shouldn't be in migration/ram.c) but
> it needs quite a bit of the innards of RAMBlock so doesn't belong in
> the os*.c.
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  exec.c                    | 59 +++++++++++++++++++++++++++++++++++++++++++++++
>  include/exec/cpu-common.h |  1 +
>  2 files changed, 60 insertions(+)
> 
> diff --git a/exec.c b/exec.c
> index 8b9ed73..e040cdf 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -45,6 +45,12 @@
>  #include "exec/address-spaces.h"
>  #include "sysemu/xen-mapcache.h"
>  #include "trace-root.h"
> +
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +#include <fcntl.h>
> +#include <linux/falloc.h>
> +#endif

Should it be in PATCH 05/16 instead?

Laurent
Dr. David Alan Gilbert Feb. 24, 2017, 3:35 p.m. UTC | #4
* Laurent Vivier (lvivier@redhat.com) wrote:
> On 06/02/2017 18:32, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > Create ram_block_discard_range in exec.c to replace
> > postcopy_ram_discard_range and most of ram_discard_range.
> > 
> > Those two routines are a bit of a weird combination, and
> > ram_discard_range is about to get more complex for hugepages.
> > It's OS dependent code (so shouldn't be in migration/ram.c) but
> > it needs quite a bit of the innards of RAMBlock so doesn't belong in
> > the os*.c.
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> >  exec.c                    | 59 +++++++++++++++++++++++++++++++++++++++++++++++
> >  include/exec/cpu-common.h |  1 +
> >  2 files changed, 60 insertions(+)
> > 
> > diff --git a/exec.c b/exec.c
> > index 8b9ed73..e040cdf 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -45,6 +45,12 @@
> >  #include "exec/address-spaces.h"
> >  #include "sysemu/xen-mapcache.h"
> >  #include "trace-root.h"
> > +
> > +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> > +#include <fcntl.h>
> > +#include <linux/falloc.h>
> > +#endif
> 
> Should it be in PATCH 05/16 instead?

Ah, yes, it should.

Dave

> Laurent
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert Feb. 24, 2017, 4:50 p.m. UTC | #5
* Laurent Vivier (lvivier@redhat.com) wrote:
> On 06/02/2017 18:32, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > Create ram_block_discard_range in exec.c to replace
> > postcopy_ram_discard_range and most of ram_discard_range.
> > 
> > Those two routines are a bit of a weird combination, and
> > ram_discard_range is about to get more complex for hugepages.
> > It's OS dependent code (so shouldn't be in migration/ram.c) but
> > it needs quite a bit of the innards of RAMBlock so doesn't belong in
> > the os*.c.
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> >  exec.c                    | 59 +++++++++++++++++++++++++++++++++++++++++++++++
> >  include/exec/cpu-common.h |  1 +
> >  2 files changed, 60 insertions(+)
> > 
> > diff --git a/exec.c b/exec.c
> > index 8b9ed73..e040cdf 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -45,6 +45,12 @@
> >  #include "exec/address-spaces.h"
> >  #include "sysemu/xen-mapcache.h"
> >  #include "trace-root.h"
> > +
> > +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> > +#include <fcntl.h>
> > +#include <linux/falloc.h>
> > +#endif
> > +
> >  #endif
> >  #include "exec/cpu-all.h"
> >  #include "qemu/rcu_queue.h"
> > @@ -3286,4 +3292,57 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
> >      rcu_read_unlock();
> >      return ret;
> >  }
> > +
> > +/*
> > + * Unmap pages of memory from start to start+length such that
> > + * they a) read as 0, b) Trigger whatever fault mechanism
> > + * the OS provides for postcopy.
> > + * The pages must be unmapped by the end of the function.
> > + * Returns: 0 on success, none-0 on failure
> > + *
> > + */
> > +int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
> > +{
> > +    int ret = -1;
> > +
> > +    rcu_read_lock();
> > +    uint8_t *host_startaddr = rb->host + start;
> > +
> > +    if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
> > +        error_report("ram_block_discard_range: Unaligned start address: %p",
> > +                     host_startaddr);
> > +        goto err;
> > +    }
> > +
> > +    if ((start + length) <= rb->used_length) {
> > +        uint8_t *host_endaddr = host_startaddr + length;
> > +        if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
> > +            error_report("ram_block_discard_range: Unaligned end address: %p",
> > +                         host_endaddr);
> > +            goto err;
> > +        }
> > +
> > +        errno = ENOTSUP; /* If we are missing MADVISE etc */
> > +
> > +#if defined(CONFIG_MADVISE)
> > +        ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
> > +#endif
> > +        if (ret) {
> > +            ret = -errno;
> > +            error_report("ram_block_discard_range: Failed to discard range "
> > +                         "%s:%" PRIx64 " +%zx (%d)",
> > +                         rb->idstr, start, length, ret);
> > +        }
> > +    } else {
> > +        error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
> > +                     "/%zx/" RAM_ADDR_FMT")",
> > +                     rb->idstr, start, length, rb->used_length);
> > +    }
> > +
> > +err:
> > +    rcu_read_unlock();
> > +
> > +    return ret;
> > +}
> 
> I really looks like a copy'n'paste from ram_discard_range(). It could be
> clearer if you remove the code from ram_discard_range() and call this
> function instead.

Yes, flattened into the latter commit.

> I think you don't need the "#if defined(CONFIG_MADVISE)" as you use
> qemu_madvise() (or you should use madvise() directly if you want to
> avoid the posix_madvise()).

Yes, changed to CONFIG_MADVISE + madvise()

I need to avoid posix_madvise because it doesn't do the same thing.

> [perhaps qemu_madvise() should set errno to ENOTSUP instead of EINVAL]

The difficulty is with how it fiddles with it's QEMU_MADV_* macros,
when it finds one that doesn't exist it gets defined as -1 or the like
which then fails that way.

Dave

> 
> Laurent
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 8b9ed73..e040cdf 100644
--- a/exec.c
+++ b/exec.c
@@ -45,6 +45,12 @@ 
 #include "exec/address-spaces.h"
 #include "sysemu/xen-mapcache.h"
 #include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
 #endif
 #include "exec/cpu-all.h"
 #include "qemu/rcu_queue.h"
@@ -3286,4 +3292,57 @@  int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
     rcu_read_unlock();
     return ret;
 }
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+    int ret = -1;
+
+    rcu_read_lock();
+    uint8_t *host_startaddr = rb->host + start;
+
+    if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+        error_report("ram_block_discard_range: Unaligned start address: %p",
+                     host_startaddr);
+        goto err;
+    }
+
+    if ((start + length) <= rb->used_length) {
+        uint8_t *host_endaddr = host_startaddr + length;
+        if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+            error_report("ram_block_discard_range: Unaligned end address: %p",
+                         host_endaddr);
+            goto err;
+        }
+
+        errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+#if defined(CONFIG_MADVISE)
+        ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
+#endif
+        if (ret) {
+            ret = -errno;
+            error_report("ram_block_discard_range: Failed to discard range "
+                         "%s:%" PRIx64 " +%zx (%d)",
+                         rb->idstr, start, length, ret);
+        }
+    } else {
+        error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+                     "/%zx/" RAM_ADDR_FMT")",
+                     rb->idstr, start, length, rb->used_length);
+    }
+
+err:
+    rcu_read_unlock();
+
+    return ret;
+}
+
 #endif
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index bd15853..1350c2e 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -105,6 +105,7 @@  typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
     ram_addr_t offset, ram_addr_t length, void *opaque);
 
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
 
 #endif