Patchwork memory: add cmd line option to omit guest memory from qmeu core dump

login
register
mail settings
Submitter Jason Baron
Date July 31, 2012, 8:27 p.m.
Message ID <201207312027.q6VKR8YE023009@int-mx02.intmail.prod.int.phx2.redhat.com>
Download mbox | patch
Permalink /patch/174328/
State New
Headers show

Comments

Jason Baron - July 31, 2012, 8:27 p.m.
Add a new '[,dump=on|off]' option to the '-m' memory option. When 'dump=off'
is specified, guest memory is omitted from the core dump. The default behavior
continues to be to include guest memory when a core dump is triggered. In my
testing, this brought the core dump size down from 384MB to 6 MB on a 2GB guest.
Thanks to Avi Kivity for the command line syntax.

Signed-off-by: Jason Baron <jbaron@redhat.com>
---
 exec.c          |   13 +++++++++++++
 osdep.h         |    7 +++++++
 qemu-options.hx |   11 +++++++----
 sysemu.h        |    1 +
 vl.c            |   51 +++++++++++++++++++++++++++++++++++----------------
 5 files changed, 63 insertions(+), 20 deletions(-)
Anthony Liguori - Aug. 1, 2012, 12:07 a.m.
Jason Baron <jbaron@redhat.com> writes:

> Add a new '[,dump=on|off]' option to the '-m' memory option. When 'dump=off'
> is specified, guest memory is omitted from the core dump. The default behavior
> continues to be to include guest memory when a core dump is triggered. In my
> testing, this brought the core dump size down from 384MB to 6 MB on a 2GB guest.
> Thanks to Avi Kivity for the command line syntax.
>
> Signed-off-by: Jason Baron <jbaron@redhat.com>
> ---
>  exec.c          |   13 +++++++++++++
>  osdep.h         |    7 +++++++
>  qemu-options.hx |   11 +++++++----
>  sysemu.h        |    1 +
>  vl.c            |   51 +++++++++++++++++++++++++++++++++++----------------
>  5 files changed, 63 insertions(+), 20 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index feb4795..75b8e23 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -35,6 +35,7 @@
>  #include "qemu-timer.h"
>  #include "memory.h"
>  #include "exec-memory.h"
> +#include "sysemu.h"
>  #if defined(CONFIG_USER_ONLY)
>  #include <qemu.h>
>  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
> @@ -2514,6 +2515,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>                                     MemoryRegion *mr)
>  {
>      RAMBlock *new_block;
> +    int ret;
>  
>      size = TARGET_PAGE_ALIGN(size);
>      new_block = g_malloc0(sizeof(*new_block));
> @@ -2555,6 +2557,17 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
>                                         last_ram_offset() >> TARGET_PAGE_BITS);
>      cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
>  
> +
> +    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
> +    if (dont_dump_guest) {
> +        ret = qemu_madvise(new_block->host, size, QEMU_MADV_DONTDUMP);
> +        if (ret) {
> +            perror("qemu_madvise");
> +            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
> +                            "but dump=off specified\n");
> +        }
> +    }
> +
>      if (kvm_enabled())
>          kvm_setup_guest_memory(new_block->host, size);
>  
> diff --git a/osdep.h b/osdep.h
> index 1e15a4b..e2d0f57 100644
> --- a/osdep.h
> +++ b/osdep.h
> @@ -102,6 +102,11 @@ void qemu_vfree(void *ptr);
>  #else
>  #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
>  #endif
> +#ifdef MADV_DONTDUMP
> +#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
> +#else
> +#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
> +#endif
>  
>  #elif defined(CONFIG_POSIX_MADVISE)
>  
> @@ -109,6 +114,7 @@ void qemu_vfree(void *ptr);
>  #define QEMU_MADV_DONTNEED  POSIX_MADV_DONTNEED
>  #define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
>  #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
> +#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
>  
>  #else /* no-op */
>  
> @@ -116,6 +122,7 @@ void qemu_vfree(void *ptr);
>  #define QEMU_MADV_DONTNEED  QEMU_MADV_INVALID
>  #define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
>  #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
> +#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
>  
>  #endif
>  
> diff --git a/qemu-options.hx b/qemu-options.hx
> index dc68e15..9c21b99 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -382,14 +382,17 @@ the write back by pressing @key{C-a s} (@pxref{disk_images}).
>  ETEXI
>  
>  DEF("m", HAS_ARG, QEMU_OPTION_m,
> -    "-m megs         set virtual RAM size to megs MB [default="
> -    stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_ARCH_ALL)
> +    "-m megs[,dump=on|off]\n"
> +    "                set virtual RAM size to megs MB [default=" stringify(DEFAULT_RAM_SIZE) "]\n"
> +    "                use 'dump=off' to omit guest memory from a core dump [default=on]\n",
> +     QEMU_ARCH_ALL)
>  STEXI
> -@item -m @var{megs}
> +@item -m @var{megs}[,dump=on|off]
>  @findex -m
>  Set virtual RAM size to @var{megs} megabytes. Default is 128 MiB.  Optionally,
>  a suffix of ``M'' or ``G'' can be used to signify a value in megabytes or
> -gigabytes respectively.
> +gigabytes respectively. When 'dump=off' is specified, the guest memory is omitted
> +from a qemu core dump. The default is 'dump=on'.
>  ETEXI
>  
>  DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
> diff --git a/sysemu.h b/sysemu.h
> index 6540c79..f1a6f08 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -132,6 +132,7 @@ extern uint8_t *boot_splash_filedata;
>  extern int boot_splash_filedata_size;
>  extern uint8_t qemu_extra_params_fw[2];
>  extern QEMUClock *rtc_clock;
> +extern int dont_dump_guest;
>  
>  #define MAX_NODES 64
>  extern int nb_numa_nodes;
> diff --git a/vl.c b/vl.c
> index c18bb80..54ff314 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -226,6 +226,7 @@ int boot_menu;
>  uint8_t *boot_splash_filedata;
>  int boot_splash_filedata_size;
>  uint8_t qemu_extra_params_fw[2];
> +int dont_dump_guest;
>  
>  typedef struct FWBootEntry FWBootEntry;
>  
> @@ -1041,6 +1042,38 @@ static void smp_parse(const char *optarg)
>          max_cpus = smp_cpus;
>  }
>  
> +static void memory_parse(const char *optarg)
> +{
> +    char option[128];
> +    int64_t value;
> +    char *end;
> +
> +    optarg = get_opt_name(option, 128, optarg, ',');
> +    value = strtosz(option, &end);
> +    if (value < 0 || (*end && (*end != ','))) {
> +        fprintf(stderr, "qemu: invalid ram size: %s\n", option);
> +        exit(1);
> +    }

I'll say it before Markus does--we shouldn't open code option parsing
code.  This is what QemuOpts is for.

That said, I think this ought to be a -machine option.  ram-size is
destined to become a machine option which would make -m N short hand for
-machine ram-size=n anyway.

Otherwise, the patch is nice.  I really like MADV_DONTDUMP.  We've had
similar requirements in the past and resorted to a really nasty core
dump parser.  This is much more elegant.

Regards,

Anthony Liguori

> +    if (value != (uint64_t)(ram_addr_t)value) {
> +        fprintf(stderr, "qemu: ram size too large\n");
> +        exit(1);
> +    }
> +    ram_size = value;
> +    if (*optarg == ',') {
> +        optarg++;
> +    }
> +    if (get_param_value(option, 128, "dump", optarg) != 0) {
> +        if (!strcmp(option, "on")) {
> +            dont_dump_guest = 0;
> +        } else if (!strcmp(option, "off")) {
> +            dont_dump_guest = 1;
> +        } else {
> +            fprintf(stderr, "qemu: invalid option value '%s'\n", option);
> +            exit(1);
> +        }
> +    }
> +}
> +
>  /***********************************************************/
>  /* USB devices */
>  
> @@ -2650,23 +2683,9 @@ int main(int argc, char **argv, char **envp)
>                  version();
>                  exit(0);
>                  break;
> -            case QEMU_OPTION_m: {
> -                int64_t value;
> -                char *end;
> -
> -                value = strtosz(optarg, &end);
> -                if (value < 0 || *end) {
> -                    fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
> -                    exit(1);
> -                }
> -
> -                if (value != (uint64_t)(ram_addr_t)value) {
> -                    fprintf(stderr, "qemu: ram size too large\n");
> -                    exit(1);
> -                }
> -                ram_size = value;
> +            case QEMU_OPTION_m:
> +                memory_parse(optarg);
>                  break;
> -            }
>              case QEMU_OPTION_mempath:
>                  mem_path = optarg;
>                  break;
> -- 
> 1.7.1
Eric Blake - Aug. 1, 2012, 12:14 a.m.
On 07/31/2012 06:07 PM, Anthony Liguori wrote:
> Jason Baron <jbaron@redhat.com> writes:
> 
>> Add a new '[,dump=on|off]' option to the '-m' memory option. When 'dump=off'
>> is specified, guest memory is omitted from the core dump. The default behavior
>> continues to be to include guest memory when a core dump is triggered. In my
>> testing, this brought the core dump size down from 384MB to 6 MB on a 2GB guest.
>> Thanks to Avi Kivity for the command line syntax.

>>  
>>  DEF("m", HAS_ARG, QEMU_OPTION_m,
>> -    "-m megs         set virtual RAM size to megs MB [default="
>> -    stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_ARCH_ALL)
>> +    "-m megs[,dump=on|off]\n"
>> +    "                set virtual RAM size to megs MB [default=" stringify(DEFAULT_RAM_SIZE) "]\n"
>> +    "                use 'dump=off' to omit guest memory from a core dump [default=on]\n",
>> +     QEMU_ARCH_ALL)

> That said, I think this ought to be a -machine option.  ram-size is
> destined to become a machine option which would make -m N short hand for
> -machine ram-size=n anyway.

And hopefully whatever you decide on in 'qemu -help' won't break
existing libvirt, while we move towards newer libvirt avoiding
sensitivities to -help output.

> 
> Otherwise, the patch is nice.  I really like MADV_DONTDUMP.  We've had
> similar requirements in the past and resorted to a really nasty core
> dump parser.  This is much more elegant.

I'm now trying to figure out how beset to expose this functionality in
libvirt XML.  Tying it to -m might imply adding to the //domain/memory
XPath element, while tying it to -machine might imply adding to
//domain/on_crash or even to /domain/devices/emulator.  Thinking a bit
more, //domain/memory is a guest-visible attribute, but this new option
is not (it's really more of a tweak on what to do at the emulator level,
for when qemu crashes, with no impact to the guest itself).  So I'm also
in favor of tying it to -machine rather than -m, if possible (but
libvirt will be able to adapt to whatever you settle on).
Anthony Liguori - Aug. 1, 2012, 2:08 a.m.
Eric Blake <eblake@redhat.com> writes:

> On 07/31/2012 06:07 PM, Anthony Liguori wrote:
>> Jason Baron <jbaron@redhat.com> writes:
>> 
>>> Add a new '[,dump=on|off]' option to the '-m' memory option. When 'dump=off'
>>> is specified, guest memory is omitted from the core dump. The default behavior
>>> continues to be to include guest memory when a core dump is triggered. In my
>>> testing, this brought the core dump size down from 384MB to 6 MB on a 2GB guest.
>>> Thanks to Avi Kivity for the command line syntax.
>
>>>  
>>>  DEF("m", HAS_ARG, QEMU_OPTION_m,
>>> -    "-m megs         set virtual RAM size to megs MB [default="
>>> -    stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_ARCH_ALL)
>>> +    "-m megs[,dump=on|off]\n"
>>> +    "                set virtual RAM size to megs MB [default=" stringify(DEFAULT_RAM_SIZE) "]\n"
>>> +    "                use 'dump=off' to omit guest memory from a core dump [default=on]\n",
>>> +     QEMU_ARCH_ALL)
>
>> That said, I think this ought to be a -machine option.  ram-size is
>> destined to become a machine option which would make -m N short hand for
>> -machine ram-size=n anyway.
>
> And hopefully whatever you decide on in 'qemu -help' won't break
> existing libvirt, while we move towards newer libvirt avoiding
> sensitivities to -help output.

Ack.

>> Otherwise, the patch is nice.  I really like MADV_DONTDUMP.  We've had
>> similar requirements in the past and resorted to a really nasty core
>> dump parser.  This is much more elegant.
>
> I'm now trying to figure out how beset to expose this functionality in
> libvirt XML.  Tying it to -m might imply adding to the //domain/memory
> XPath element, while tying it to -machine might imply adding to
> //domain/on_crash or even to /domain/devices/emulator.  Thinking a bit
> more, //domain/memory is a guest-visible attribute, but this new option
> is not (it's really more of a tweak on what to do at the emulator level,
> for when qemu crashes, with no impact to the guest itself).  So I'm also
> in favor of tying it to -machine rather than -m, if possible (but
> libvirt will be able to adapt to whatever you settle on).

From a QEMU perspective, I'd eventually like to see memory modeled as
just another type of device.  I would expect host properties (mergable
and don't dump) to be a property of the device.  I don't think forcing a
split between guest/host properties makes a ton of sense here although I
guess it's technically possible.

As a libvirt user, the users that don't want memory to be dumped want it
for all guests.  It's exactly the same as KSM.  Either you want it
everywhere or you don't want it at all.

Regards,

Anthony Liguori

>
> -- 
> Eric Blake   eblake@redhat.com    +1-919-301-3266
> Libvirt virtualization library http://libvirt.org

Patch

diff --git a/exec.c b/exec.c
index feb4795..75b8e23 100644
--- a/exec.c
+++ b/exec.c
@@ -35,6 +35,7 @@ 
 #include "qemu-timer.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "sysemu.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -2514,6 +2515,7 @@  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr)
 {
     RAMBlock *new_block;
+    int ret;
 
     size = TARGET_PAGE_ALIGN(size);
     new_block = g_malloc0(sizeof(*new_block));
@@ -2555,6 +2557,17 @@  ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                        last_ram_offset() >> TARGET_PAGE_BITS);
     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
 
+
+    /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
+    if (dont_dump_guest) {
+        ret = qemu_madvise(new_block->host, size, QEMU_MADV_DONTDUMP);
+        if (ret) {
+            perror("qemu_madvise");
+            fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
+                            "but dump=off specified\n");
+        }
+    }
+
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
 
diff --git a/osdep.h b/osdep.h
index 1e15a4b..e2d0f57 100644
--- a/osdep.h
+++ b/osdep.h
@@ -102,6 +102,11 @@  void qemu_vfree(void *ptr);
 #else
 #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
 #endif
+#ifdef MADV_DONTDUMP
+#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
+#else
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#endif
 
 #elif defined(CONFIG_POSIX_MADVISE)
 
@@ -109,6 +114,7 @@  void qemu_vfree(void *ptr);
 #define QEMU_MADV_DONTNEED  POSIX_MADV_DONTNEED
 #define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
 #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
 
 #else /* no-op */
 
@@ -116,6 +122,7 @@  void qemu_vfree(void *ptr);
 #define QEMU_MADV_DONTNEED  QEMU_MADV_INVALID
 #define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
 #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
 
 #endif
 
diff --git a/qemu-options.hx b/qemu-options.hx
index dc68e15..9c21b99 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -382,14 +382,17 @@  the write back by pressing @key{C-a s} (@pxref{disk_images}).
 ETEXI
 
 DEF("m", HAS_ARG, QEMU_OPTION_m,
-    "-m megs         set virtual RAM size to megs MB [default="
-    stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_ARCH_ALL)
+    "-m megs[,dump=on|off]\n"
+    "                set virtual RAM size to megs MB [default=" stringify(DEFAULT_RAM_SIZE) "]\n"
+    "                use 'dump=off' to omit guest memory from a core dump [default=on]\n",
+     QEMU_ARCH_ALL)
 STEXI
-@item -m @var{megs}
+@item -m @var{megs}[,dump=on|off]
 @findex -m
 Set virtual RAM size to @var{megs} megabytes. Default is 128 MiB.  Optionally,
 a suffix of ``M'' or ``G'' can be used to signify a value in megabytes or
-gigabytes respectively.
+gigabytes respectively. When 'dump=off' is specified, the guest memory is omitted
+from a qemu core dump. The default is 'dump=on'.
 ETEXI
 
 DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
diff --git a/sysemu.h b/sysemu.h
index 6540c79..f1a6f08 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -132,6 +132,7 @@  extern uint8_t *boot_splash_filedata;
 extern int boot_splash_filedata_size;
 extern uint8_t qemu_extra_params_fw[2];
 extern QEMUClock *rtc_clock;
+extern int dont_dump_guest;
 
 #define MAX_NODES 64
 extern int nb_numa_nodes;
diff --git a/vl.c b/vl.c
index c18bb80..54ff314 100644
--- a/vl.c
+++ b/vl.c
@@ -226,6 +226,7 @@  int boot_menu;
 uint8_t *boot_splash_filedata;
 int boot_splash_filedata_size;
 uint8_t qemu_extra_params_fw[2];
+int dont_dump_guest;
 
 typedef struct FWBootEntry FWBootEntry;
 
@@ -1041,6 +1042,38 @@  static void smp_parse(const char *optarg)
         max_cpus = smp_cpus;
 }
 
+static void memory_parse(const char *optarg)
+{
+    char option[128];
+    int64_t value;
+    char *end;
+
+    optarg = get_opt_name(option, 128, optarg, ',');
+    value = strtosz(option, &end);
+    if (value < 0 || (*end && (*end != ','))) {
+        fprintf(stderr, "qemu: invalid ram size: %s\n", option);
+        exit(1);
+    }
+    if (value != (uint64_t)(ram_addr_t)value) {
+        fprintf(stderr, "qemu: ram size too large\n");
+        exit(1);
+    }
+    ram_size = value;
+    if (*optarg == ',') {
+        optarg++;
+    }
+    if (get_param_value(option, 128, "dump", optarg) != 0) {
+        if (!strcmp(option, "on")) {
+            dont_dump_guest = 0;
+        } else if (!strcmp(option, "off")) {
+            dont_dump_guest = 1;
+        } else {
+            fprintf(stderr, "qemu: invalid option value '%s'\n", option);
+            exit(1);
+        }
+    }
+}
+
 /***********************************************************/
 /* USB devices */
 
@@ -2650,23 +2683,9 @@  int main(int argc, char **argv, char **envp)
                 version();
                 exit(0);
                 break;
-            case QEMU_OPTION_m: {
-                int64_t value;
-                char *end;
-
-                value = strtosz(optarg, &end);
-                if (value < 0 || *end) {
-                    fprintf(stderr, "qemu: invalid ram size: %s\n", optarg);
-                    exit(1);
-                }
-
-                if (value != (uint64_t)(ram_addr_t)value) {
-                    fprintf(stderr, "qemu: ram size too large\n");
-                    exit(1);
-                }
-                ram_size = value;
+            case QEMU_OPTION_m:
+                memory_parse(optarg);
                 break;
-            }
             case QEMU_OPTION_mempath:
                 mem_path = optarg;
                 break;