Patchwork [V2,17/23] kvm tools: Add ability to map guest RAM from hugetlbfs

login
register
mail settings
Submitter Matt Evans
Date Dec. 9, 2011, 6:55 a.m.
Message ID <4EE1B0DD.6030004@ozlabs.org>
Download mbox | patch
Permalink /patch/130300/
State New
Headers show

Comments

Matt Evans - Dec. 9, 2011, 6:55 a.m.
Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
memory (down in kvm__arch_init()).  For x86, guest memory is a normal
ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.

Signed-off-by: Matt Evans <matt@ozlabs.org>
---
 tools/kvm/builtin-run.c      |    4 ++-
 tools/kvm/include/kvm/kvm.h  |    4 +-
 tools/kvm/include/kvm/util.h |    4 +++
 tools/kvm/kvm.c              |    4 +-
 tools/kvm/util.c             |   45 ++++++++++++++++++++++++++++++++++++++++++
 tools/kvm/x86/kvm.c          |   20 +++++++++++++++--
 6 files changed, 73 insertions(+), 8 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sasha Levin - Dec. 9, 2011, 7:39 a.m.
On Fri, 2011-12-09 at 17:55 +1100, Matt Evans wrote:
> Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
> memory (down in kvm__arch_init()).  For x86, guest memory is a normal
> ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.
> 
> Signed-off-by: Matt Evans <matt@ozlabs.org>
> ---
>  tools/kvm/builtin-run.c      |    4 ++-
>  tools/kvm/include/kvm/kvm.h  |    4 +-
>  tools/kvm/include/kvm/util.h |    4 +++
>  tools/kvm/kvm.c              |    4 +-
>  tools/kvm/util.c             |   45 ++++++++++++++++++++++++++++++++++++++++++
>  tools/kvm/x86/kvm.c          |   20 +++++++++++++++--
>  6 files changed, 73 insertions(+), 8 deletions(-)
> 
> diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
> index 7969901..0acfe81 100644
> --- a/tools/kvm/builtin-run.c
> +++ b/tools/kvm/builtin-run.c
> @@ -82,6 +82,7 @@ static const char *guest_mac;
>  static const char *host_mac;
>  static const char *script;
>  static const char *guest_name;
> +static const char *hugetlbfs_path;
>  static struct virtio_net_params *net_params;
>  static bool single_step;
>  static bool readonly_image[MAX_DISK_IMAGES];
> @@ -422,6 +423,7 @@ static const struct option options[] = {
>  	OPT_CALLBACK('\0', "tty", NULL, "tty id",
>  		     "Remap guest TTY into a pty on the host",
>  		     tty_parser),
> +	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
>  
>  	OPT_GROUP("Kernel options:"),
>  	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
> @@ -807,7 +809,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
>  		guest_name = default_name;
>  	}
>  
> -	kvm = kvm__init(dev, ram_size, guest_name);
> +	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
>  
>  	kvm->single_step = single_step;
>  
> diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
> index 5fe6e75..7159952 100644
> --- a/tools/kvm/include/kvm/kvm.h
> +++ b/tools/kvm/include/kvm/kvm.h
> @@ -30,7 +30,7 @@ struct kvm_ext {
>  void kvm__set_dir(const char *fmt, ...);
>  const char *kvm__get_dir(void);
>  
> -struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name);
> +struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name);
>  int kvm__recommended_cpus(struct kvm *kvm);
>  int kvm__max_cpus(struct kvm *kvm);
>  void kvm__init_ram(struct kvm *kvm);
> @@ -54,7 +54,7 @@ int kvm__enumerate_instances(int (*callback)(const char *name, int pid));
>  void kvm__remove_socket(const char *name);
>  
>  void kvm__arch_set_cmdline(char *cmdline, bool video);
> -void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name);
> +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name);
>  void kvm__arch_setup_firmware(struct kvm *kvm);
>  bool kvm__arch_cpu_supports_vm(void);
>  void kvm__arch_periodic_poll(struct kvm *kvm);
> diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
> index dc2e0b9..1f6fbbd 100644
> --- a/tools/kvm/include/kvm/util.h
> +++ b/tools/kvm/include/kvm/util.h
> @@ -20,6 +20,7 @@
>  #include <limits.h>
>  #include <sys/param.h>
>  #include <sys/types.h>
> +#include <linux/types.h>
>  
>  #ifdef __GNUC__
>  #define NORETURN __attribute__((__noreturn__))
> @@ -75,4 +76,7 @@ static inline void msleep(unsigned int msecs)
>  {
>  	usleep(MSECS_TO_USECS(msecs));
>  }
> +
> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size);
> +
>  #endif /* KVM__UTIL_H */
> diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
> index c54f886..35ca2c5 100644
> --- a/tools/kvm/kvm.c
> +++ b/tools/kvm/kvm.c
> @@ -306,7 +306,7 @@ int kvm__max_cpus(struct kvm *kvm)
>  	return ret;
>  }
>  
> -struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
> +struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name)
>  {
>  	struct kvm *kvm;
>  	int ret;
> @@ -339,7 +339,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
>  	if (kvm__check_extensions(kvm))
>  		die("A required KVM extention is not supported by OS");
>  
> -	kvm__arch_init(kvm, kvm_dev, ram_size, name);
> +	kvm__arch_init(kvm, kvm_dev, hugetlbfs_path, ram_size, name);
>  
>  	kvm->name = name;
>  
> diff --git a/tools/kvm/util.c b/tools/kvm/util.c
> index 4efbce9..90b6a3b 100644
> --- a/tools/kvm/util.c
> +++ b/tools/kvm/util.c
> @@ -4,6 +4,11 @@
>  
>  #include "kvm/util.h"
>  
> +#include <linux/magic.h>	/* For HUGETLBFS_MAGIC */
> +#include <sys/mman.h>
> +#include <sys/stat.h>
> +#include <sys/statfs.h>
> +
>  static void report(const char *prefix, const char *err, va_list params)
>  {
>  	char msg[1024];
> @@ -99,3 +104,43 @@ size_t strlcat(char *dest, const char *src, size_t count)
>  
>  	return res;
>  }
> +
> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size)
> +{
> +	char mpath[PATH_MAX];
> +	int fd;
> +	int r;
> +	struct statfs sfs;
> +	void *addr;
> +
> +	do {
> +		/*
> +		 * QEMU seems to work around this returning EINTR...  Let's do
> +		 * that too.
> +		 */
> +		r = statfs(htlbfs_path, &sfs);
> +	} while (r && errno == EINTR);
> +
> +	if (r)
> +		die("Can't stat %s\n", htlbfs_path);
> +
> +	if (sfs.f_type != HUGETLBFS_MAGIC) {
> +		die("%s is not hugetlbfs!\n", htlbfs_path);
> +	}
> +
> +	if (sfs.f_bsize == 0 || (unsigned long)sfs.f_bsize > size) {
> +		die("Can't use hugetlbfs pagesize %ld for mem size %lld\n",
> +		    sfs.f_bsize, size);
> +	}
> +
> +	snprintf(mpath, PATH_MAX, "%s/kvmtoolXXXXXX", htlbfs_path);
> +	fd = mkstemp(mpath);
> +	if (fd < 0)
> +		die("Can't open %s for hugetlbfs map\n", mpath);
> +	unlink(mpath);
> +	ftruncate(fd, size);

Getting this thing here:

cc1: warnings being treated as errors
util.c: In function 'mmap_hugetlbfs':
util.c:141:11: error: ignoring return value of 'ftruncate', declared with attribute warn_unused_result
make: *** [util.o] Error 1
Pekka Enberg - Dec. 9, 2011, 8:38 a.m.
On Fri, Dec 9, 2011 at 8:55 AM, Matt Evans <matt@ozlabs.org> wrote:
> Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
> memory (down in kvm__arch_init()).  For x86, guest memory is a normal
> ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.
>
> Signed-off-by: Matt Evans <matt@ozlabs.org>

> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size)
> +{
> +       char mpath[PATH_MAX];
> +       int fd;
> +       int r;
> +       struct statfs sfs;
> +       void *addr;
> +
> +       do {
> +               /*
> +                * QEMU seems to work around this returning EINTR...  Let's do
> +                * that too.
> +                */
> +               r = statfs(htlbfs_path, &sfs);
> +       } while (r && errno == EINTR);

Can this really happen? What about EAGAIN? The retry logic really
wants to live in tools/kvm/read-write.c as a xstatfs() wrapper if we
do need this.

                            Pekka
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pekka Enberg - Dec. 9, 2011, 8:42 a.m.
On Fri, Dec 9, 2011 at 8:55 AM, Matt Evans <matt@ozlabs.org> wrote:
> Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
> memory (down in kvm__arch_init()).  For x86, guest memory is a normal
> ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.
>
> Signed-off-by: Matt Evans <matt@ozlabs.org>

Btw, why don't you want to use MADV_HUGEPAGE for this? You could just
do it unconditionally, no?
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matt Evans - Dec. 12, 2011, 5:05 a.m.
On 09/12/11 18:39, Sasha Levin wrote:
> On Fri, 2011-12-09 at 17:55 +1100, Matt Evans wrote:
>> Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
>> memory (down in kvm__arch_init()).  For x86, guest memory is a normal
>> ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.
>>
>> Signed-off-by: Matt Evans <matt@ozlabs.org>
>> ---
>>  tools/kvm/builtin-run.c      |    4 ++-
>>  tools/kvm/include/kvm/kvm.h  |    4 +-
>>  tools/kvm/include/kvm/util.h |    4 +++
>>  tools/kvm/kvm.c              |    4 +-
>>  tools/kvm/util.c             |   45 ++++++++++++++++++++++++++++++++++++++++++
>>  tools/kvm/x86/kvm.c          |   20 +++++++++++++++--
>>  6 files changed, 73 insertions(+), 8 deletions(-)
>>
>> diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
>> index 7969901..0acfe81 100644
>> --- a/tools/kvm/builtin-run.c
>> +++ b/tools/kvm/builtin-run.c
>> @@ -82,6 +82,7 @@ static const char *guest_mac;
>>  static const char *host_mac;
>>  static const char *script;
>>  static const char *guest_name;
>> +static const char *hugetlbfs_path;
>>  static struct virtio_net_params *net_params;
>>  static bool single_step;
>>  static bool readonly_image[MAX_DISK_IMAGES];
>> @@ -422,6 +423,7 @@ static const struct option options[] = {
>>  	OPT_CALLBACK('\0', "tty", NULL, "tty id",
>>  		     "Remap guest TTY into a pty on the host",
>>  		     tty_parser),
>> +	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
>>  
>>  	OPT_GROUP("Kernel options:"),
>>  	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
>> @@ -807,7 +809,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
>>  		guest_name = default_name;
>>  	}
>>  
>> -	kvm = kvm__init(dev, ram_size, guest_name);
>> +	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
>>  
>>  	kvm->single_step = single_step;
>>  
>> diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
>> index 5fe6e75..7159952 100644
>> --- a/tools/kvm/include/kvm/kvm.h
>> +++ b/tools/kvm/include/kvm/kvm.h
>> @@ -30,7 +30,7 @@ struct kvm_ext {
>>  void kvm__set_dir(const char *fmt, ...);
>>  const char *kvm__get_dir(void);
>>  
>> -struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name);
>> +struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name);
>>  int kvm__recommended_cpus(struct kvm *kvm);
>>  int kvm__max_cpus(struct kvm *kvm);
>>  void kvm__init_ram(struct kvm *kvm);
>> @@ -54,7 +54,7 @@ int kvm__enumerate_instances(int (*callback)(const char *name, int pid));
>>  void kvm__remove_socket(const char *name);
>>  
>>  void kvm__arch_set_cmdline(char *cmdline, bool video);
>> -void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name);
>> +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name);
>>  void kvm__arch_setup_firmware(struct kvm *kvm);
>>  bool kvm__arch_cpu_supports_vm(void);
>>  void kvm__arch_periodic_poll(struct kvm *kvm);
>> diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
>> index dc2e0b9..1f6fbbd 100644
>> --- a/tools/kvm/include/kvm/util.h
>> +++ b/tools/kvm/include/kvm/util.h
>> @@ -20,6 +20,7 @@
>>  #include <limits.h>
>>  #include <sys/param.h>
>>  #include <sys/types.h>
>> +#include <linux/types.h>
>>  
>>  #ifdef __GNUC__
>>  #define NORETURN __attribute__((__noreturn__))
>> @@ -75,4 +76,7 @@ static inline void msleep(unsigned int msecs)
>>  {
>>  	usleep(MSECS_TO_USECS(msecs));
>>  }
>> +
>> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size);
>> +
>>  #endif /* KVM__UTIL_H */
>> diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
>> index c54f886..35ca2c5 100644
>> --- a/tools/kvm/kvm.c
>> +++ b/tools/kvm/kvm.c
>> @@ -306,7 +306,7 @@ int kvm__max_cpus(struct kvm *kvm)
>>  	return ret;
>>  }
>>  
>> -struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
>> +struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name)
>>  {
>>  	struct kvm *kvm;
>>  	int ret;
>> @@ -339,7 +339,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
>>  	if (kvm__check_extensions(kvm))
>>  		die("A required KVM extention is not supported by OS");
>>  
>> -	kvm__arch_init(kvm, kvm_dev, ram_size, name);
>> +	kvm__arch_init(kvm, kvm_dev, hugetlbfs_path, ram_size, name);
>>  
>>  	kvm->name = name;
>>  
>> diff --git a/tools/kvm/util.c b/tools/kvm/util.c
>> index 4efbce9..90b6a3b 100644
>> --- a/tools/kvm/util.c
>> +++ b/tools/kvm/util.c
>> @@ -4,6 +4,11 @@
>>  
>>  #include "kvm/util.h"
>>  
>> +#include <linux/magic.h>	/* For HUGETLBFS_MAGIC */
>> +#include <sys/mman.h>
>> +#include <sys/stat.h>
>> +#include <sys/statfs.h>
>> +
>>  static void report(const char *prefix, const char *err, va_list params)
>>  {
>>  	char msg[1024];
>> @@ -99,3 +104,43 @@ size_t strlcat(char *dest, const char *src, size_t count)
>>  
>>  	return res;
>>  }
>> +
>> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size)
>> +{
>> +	char mpath[PATH_MAX];
>> +	int fd;
>> +	int r;
>> +	struct statfs sfs;
>> +	void *addr;
>> +
>> +	do {
>> +		/*
>> +		 * QEMU seems to work around this returning EINTR...  Let's do
>> +		 * that too.
>> +		 */
>> +		r = statfs(htlbfs_path, &sfs);
>> +	} while (r && errno == EINTR);
>> +
>> +	if (r)
>> +		die("Can't stat %s\n", htlbfs_path);
>> +
>> +	if (sfs.f_type != HUGETLBFS_MAGIC) {
>> +		die("%s is not hugetlbfs!\n", htlbfs_path);
>> +	}
>> +
>> +	if (sfs.f_bsize == 0 || (unsigned long)sfs.f_bsize > size) {
>> +		die("Can't use hugetlbfs pagesize %ld for mem size %lld\n",
>> +		    sfs.f_bsize, size);
>> +	}
>> +
>> +	snprintf(mpath, PATH_MAX, "%s/kvmtoolXXXXXX", htlbfs_path);
>> +	fd = mkstemp(mpath);
>> +	if (fd < 0)
>> +		die("Can't open %s for hugetlbfs map\n", mpath);
>> +	unlink(mpath);
>> +	ftruncate(fd, size);
> 
> Getting this thing here:
> 
> cc1: warnings being treated as errors
> util.c: In function 'mmap_hugetlbfs':
> util.c:141:11: error: ignoring return value of 'ftruncate', declared with attribute warn_unused_result
> make: *** [util.o] Error 1
> 

Ooookay, I swear I'm not seeing that here, GCC 4.6.2 & no extra -W flags added.

I've added a check of ftruncate()'s return value, anyway.

Thanks,


Matt

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matt Evans - Dec. 12, 2011, 5:17 a.m.
On 09/12/11 19:42, Pekka Enberg wrote:
> On Fri, Dec 9, 2011 at 8:55 AM, Matt Evans <matt@ozlabs.org> wrote:
>> Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
>> memory (down in kvm__arch_init()).  For x86, guest memory is a normal
>> ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.
>>
>> Signed-off-by: Matt Evans <matt@ozlabs.org>
> 
> Btw, why don't you want to use MADV_HUGEPAGE for this? You could just
> do it unconditionally, no?

Well, I'm manually mapping from hugetlbfs as currently* PPC KVM requires
hugepages to back guest RAM and MADV_HUGEPAGE is just a hint, no?  I also wanted
things to work on kernels without transparent hugepages enabled.  I think it's
safer to do things explicitly, as if the user requests hugepages it's more
transparent (I'm thinking benchmarking, etc.) to be definitely using hugepages.


Cheers,


Matt


*: I know Paul's posted patches to implement smallpage support... so this will
   change in time.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pekka Enberg - Dec. 12, 2011, 6:06 a.m.
On Mon, Dec 12, 2011 at 7:17 AM, Matt Evans <matt@ozlabs.org> wrote:
> Well, I'm manually mapping from hugetlbfs as currently* PPC KVM requires
> hugepages to back guest RAM and MADV_HUGEPAGE is just a hint, no?  I also wanted
> things to work on kernels without transparent hugepages enabled.  I think it's
> safer to do things explicitly, as if the user requests hugepages it's more
> transparent (I'm thinking benchmarking, etc.) to be definitely using hugepages.

OK, makes sense. You should probably mention that in the changelog.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Matt Evans - Dec. 12, 2011, 6:19 a.m.
On 09/12/11 19:38, Pekka Enberg wrote:
> On Fri, Dec 9, 2011 at 8:55 AM, Matt Evans <matt@ozlabs.org> wrote:
>> Add a --hugetlbfs commandline option to give a path to hugetlbfs-map guest
>> memory (down in kvm__arch_init()).  For x86, guest memory is a normal
>> ANON mmap() if this option is not provided, otherwise a hugetlbfs mmap.
>>
>> Signed-off-by: Matt Evans <matt@ozlabs.org>
> 
>> +void *mmap_hugetlbfs(const char *htlbfs_path, u64 size)
>> +{
>> +       char mpath[PATH_MAX];
>> +       int fd;
>> +       int r;
>> +       struct statfs sfs;
>> +       void *addr;
>> +
>> +       do {
>> +               /*
>> +                * QEMU seems to work around this returning EINTR...  Let's do
>> +                * that too.
>> +                */
>> +               r = statfs(htlbfs_path, &sfs);
>> +       } while (r && errno == EINTR);
> 
> Can this really happen? What about EAGAIN? The retry logic really
> wants to live in tools/kvm/read-write.c as a xstatfs() wrapper if we
> do need this.

I don't think it can.  As per the comment, I thought QEMU knew something I
didn't but I haven't seen any other reason for doing this.  I'll remove it,
thanks for the sanity jolt.


Matt

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 7969901..0acfe81 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -82,6 +82,7 @@  static const char *guest_mac;
 static const char *host_mac;
 static const char *script;
 static const char *guest_name;
+static const char *hugetlbfs_path;
 static struct virtio_net_params *net_params;
 static bool single_step;
 static bool readonly_image[MAX_DISK_IMAGES];
@@ -422,6 +423,7 @@  static const struct option options[] = {
 	OPT_CALLBACK('\0', "tty", NULL, "tty id",
 		     "Remap guest TTY into a pty on the host",
 		     tty_parser),
+	OPT_STRING('\0', "hugetlbfs", &hugetlbfs_path, "path", "Hugetlbfs path"),
 
 	OPT_GROUP("Kernel options:"),
 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
@@ -807,7 +809,7 @@  int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 		guest_name = default_name;
 	}
 
-	kvm = kvm__init(dev, ram_size, guest_name);
+	kvm = kvm__init(dev, hugetlbfs_path, ram_size, guest_name);
 
 	kvm->single_step = single_step;
 
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index 5fe6e75..7159952 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -30,7 +30,7 @@  struct kvm_ext {
 void kvm__set_dir(const char *fmt, ...);
 const char *kvm__get_dir(void);
 
-struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name);
+struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name);
 int kvm__recommended_cpus(struct kvm *kvm);
 int kvm__max_cpus(struct kvm *kvm);
 void kvm__init_ram(struct kvm *kvm);
@@ -54,7 +54,7 @@  int kvm__enumerate_instances(int (*callback)(const char *name, int pid));
 void kvm__remove_socket(const char *name);
 
 void kvm__arch_set_cmdline(char *cmdline, bool video);
-void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name);
+void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name);
 void kvm__arch_setup_firmware(struct kvm *kvm);
 bool kvm__arch_cpu_supports_vm(void);
 void kvm__arch_periodic_poll(struct kvm *kvm);
diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h
index dc2e0b9..1f6fbbd 100644
--- a/tools/kvm/include/kvm/util.h
+++ b/tools/kvm/include/kvm/util.h
@@ -20,6 +20,7 @@ 
 #include <limits.h>
 #include <sys/param.h>
 #include <sys/types.h>
+#include <linux/types.h>
 
 #ifdef __GNUC__
 #define NORETURN __attribute__((__noreturn__))
@@ -75,4 +76,7 @@  static inline void msleep(unsigned int msecs)
 {
 	usleep(MSECS_TO_USECS(msecs));
 }
+
+void *mmap_hugetlbfs(const char *htlbfs_path, u64 size);
+
 #endif /* KVM__UTIL_H */
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index c54f886..35ca2c5 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -306,7 +306,7 @@  int kvm__max_cpus(struct kvm *kvm)
 	return ret;
 }
 
-struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
+struct kvm *kvm__init(const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name)
 {
 	struct kvm *kvm;
 	int ret;
@@ -339,7 +339,7 @@  struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name)
 	if (kvm__check_extensions(kvm))
 		die("A required KVM extention is not supported by OS");
 
-	kvm__arch_init(kvm, kvm_dev, ram_size, name);
+	kvm__arch_init(kvm, kvm_dev, hugetlbfs_path, ram_size, name);
 
 	kvm->name = name;
 
diff --git a/tools/kvm/util.c b/tools/kvm/util.c
index 4efbce9..90b6a3b 100644
--- a/tools/kvm/util.c
+++ b/tools/kvm/util.c
@@ -4,6 +4,11 @@ 
 
 #include "kvm/util.h"
 
+#include <linux/magic.h>	/* For HUGETLBFS_MAGIC */
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+
 static void report(const char *prefix, const char *err, va_list params)
 {
 	char msg[1024];
@@ -99,3 +104,43 @@  size_t strlcat(char *dest, const char *src, size_t count)
 
 	return res;
 }
+
+void *mmap_hugetlbfs(const char *htlbfs_path, u64 size)
+{
+	char mpath[PATH_MAX];
+	int fd;
+	int r;
+	struct statfs sfs;
+	void *addr;
+
+	do {
+		/*
+		 * QEMU seems to work around this returning EINTR...  Let's do
+		 * that too.
+		 */
+		r = statfs(htlbfs_path, &sfs);
+	} while (r && errno == EINTR);
+
+	if (r)
+		die("Can't stat %s\n", htlbfs_path);
+
+	if (sfs.f_type != HUGETLBFS_MAGIC) {
+		die("%s is not hugetlbfs!\n", htlbfs_path);
+	}
+
+	if (sfs.f_bsize == 0 || (unsigned long)sfs.f_bsize > size) {
+		die("Can't use hugetlbfs pagesize %ld for mem size %lld\n",
+		    sfs.f_bsize, size);
+	}
+
+	snprintf(mpath, PATH_MAX, "%s/kvmtoolXXXXXX", htlbfs_path);
+	fd = mkstemp(mpath);
+	if (fd < 0)
+		die("Can't open %s for hugetlbfs map\n", mpath);
+	unlink(mpath);
+	ftruncate(fd, size);
+	addr = mmap(NULL, size, PROT_RW, MAP_PRIVATE, fd, 0);
+	close(fd);
+
+	return addr;
+}
diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c
index a116f4b..31671e7 100644
--- a/tools/kvm/x86/kvm.c
+++ b/tools/kvm/x86/kvm.c
@@ -130,8 +130,22 @@  void kvm__arch_set_cmdline(char *cmdline, bool video)
 		strcat(cmdline, " console=ttyS0 earlyprintk=serial i8042.noaux=1");
 }
 
+/* This function wraps the decision between hugetlbfs map (if requested) or normal mmap */
+static void *mmap_anon_or_hugetlbfs(const char *hugetlbfs_path, u64 size)
+{
+	if (hugetlbfs_path) {
+		/*
+		 * We don't /need/ to map guest RAM from hugetlbfs, but we do so
+		 * if the user specifies a hugetlbfs path.
+		 */
+		return mmap_hugetlbfs(hugetlbfs_path, size);
+	} else {
+		return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
+	}
+}
+
 /* Architecture-specific KVM init */
-void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name)
+void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, const char *hugetlbfs_path, u64 ram_size, const char *name)
 {
 	struct kvm_pit_config pit_config = { .flags = 0, };
 	int ret;
@@ -147,9 +161,9 @@  void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const ch
 	kvm->ram_size		= ram_size;
 
 	if (kvm->ram_size < KVM_32BIT_GAP_START) {
-		kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
+		kvm->ram_start = mmap_anon_or_hugetlbfs(hugetlbfs_path, ram_size);
 	} else {
-		kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0);
+		kvm->ram_start = mmap_anon_or_hugetlbfs(hugetlbfs_path, ram_size + KVM_32BIT_GAP_SIZE);
 		if (kvm->ram_start != MAP_FAILED) {
 			/*
 			 * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that