diff mbox

ppc: Huge page detection mechanism fixes - Episode III

Message ID 1468847944-24533-1-git-send-email-thuth@redhat.com
State New
Headers show

Commit Message

Thomas Huth July 18, 2016, 1:19 p.m. UTC
After already fixing two issues with the huge page detection mechanism
(see commit 159d2e39a860 and 86b50f2e1bef), Greg Kurz noticed another
case that caused the guest to crash where QEMU announces huge pages
though they should not be available for the guest:

qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
 -m 1G,slots=4,maxmem=32G
 -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
 -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
 -numa node,nodeid=0 -numa node,nodeid=1

That means if there is a global mem-path option, we still have
to look at the memory-backend objects that have been specified
additionally and return their minimum page size if that value
is smaller than the page size of the main memory.

Reported-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
 target-ppc/kvm.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

Comments

Greg Kurz July 18, 2016, 3:18 p.m. UTC | #1
On Mon, 18 Jul 2016 15:19:04 +0200
Thomas Huth <thuth@redhat.com> wrote:

> After already fixing two issues with the huge page detection mechanism
> (see commit 159d2e39a860 and 86b50f2e1bef), Greg Kurz noticed another
> case that caused the guest to crash where QEMU announces huge pages
> though they should not be available for the guest:
> 
> qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
>  -m 1G,slots=4,maxmem=32G
>  -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
>  -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
>  -numa node,nodeid=0 -numa node,nodeid=1
> 
> That means if there is a global mem-path option, we still have
> to look at the memory-backend objects that have been specified
> additionally and return their minimum page size if that value
> is smaller than the page size of the main memory.
> 
> Reported-by: Greg Kurz <groug@kaod.org>
> Signed-off-by: Thomas Huth <thuth@redhat.com>
> ---

Just one remark, see below, but apart from that:

Reviewed-by: Greg Kurz <groug@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>

>  target-ppc/kvm.c | 27 ++++++++++++++-------------
>  1 file changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 7a8f555..97ab450 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -366,10 +366,13 @@ static int find_max_supported_pagesize(Object *obj, void *opaque)
>  static long getrampagesize(void)
>  {
>      long hpsize = LONG_MAX;
> +    long mainrampagesize;
>      Object *memdev_root;
>  
>      if (mem_path) {
> -        return gethugepagesize(mem_path);
> +        mainrampagesize = gethugepagesize(mem_path);
> +    } else {
> +        mainrampagesize = getpagesize();
>      }
>  
>      /* it's possible we have memory-backend objects with
> @@ -383,28 +386,26 @@ static long getrampagesize(void)
>       * backend isn't backed by hugepages.
>       */
>      memdev_root = object_resolve_path("/objects", NULL);
> -    if (!memdev_root) {
> -        return getpagesize();
> +    if (memdev_root) {
> +        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
>      }
> -
> -    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
> -
> -    if (hpsize == LONG_MAX || hpsize == getpagesize()) {
> -        return getpagesize();
> +    if (hpsize == LONG_MAX) {
> +        /* No additional memory regions found ==> Report main RAM page size */
> +        return mainrampagesize;
>      }
>  
>      /* If NUMA is disabled or the NUMA nodes are not backed with a
> -     * memory-backend, then there is at least one node using "normal"
> -     * RAM. And since normal RAM has not been configured with "-mem-path"
> -     * (what we've checked earlier here already), we can not use huge pages!
> +     * memory-backend, then there is at least one node using "normal" RAM,
> +     * so if its page size is smaller we have got to report that size instead.
>       */
> -    if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
> +    if (hpsize > mainrampagesize &&
> +        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
>          static bool warned;
>          if (!warned) {
>              error_report("Huge page support disabled (n/a for main memory).");

Maybe update the error message since we have another condition ?

Something like:

"Huge page support disabled (at least one numa uses standard page size)"

>              warned = true;
>          }
> -        return getpagesize();
> +        return mainrampagesize;
>      }
>  
>      return hpsize;
David Gibson July 19, 2016, 3:34 a.m. UTC | #2
On Mon, Jul 18, 2016 at 03:19:04PM +0200, Thomas Huth wrote:
> After already fixing two issues with the huge page detection mechanism
> (see commit 159d2e39a860 and 86b50f2e1bef), Greg Kurz noticed another
> case that caused the guest to crash where QEMU announces huge pages
> though they should not be available for the guest:
> 
> qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
>  -m 1G,slots=4,maxmem=32G
>  -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
>  -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
>  -numa node,nodeid=0 -numa node,nodeid=1
> 
> That means if there is a global mem-path option, we still have
> to look at the memory-backend objects that have been specified
> additionally and return their minimum page size if that value
> is smaller than the page size of the main memory.
> 
> Reported-by: Greg Kurz <groug@kaod.org>
> Signed-off-by: Thomas Huth <thuth@redhat.com>

Applied to ppc-for-2.7, thanks.

> ---
>  target-ppc/kvm.c | 27 ++++++++++++++-------------
>  1 file changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 7a8f555..97ab450 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -366,10 +366,13 @@ static int find_max_supported_pagesize(Object *obj, void *opaque)
>  static long getrampagesize(void)
>  {
>      long hpsize = LONG_MAX;
> +    long mainrampagesize;
>      Object *memdev_root;
>  
>      if (mem_path) {
> -        return gethugepagesize(mem_path);
> +        mainrampagesize = gethugepagesize(mem_path);
> +    } else {
> +        mainrampagesize = getpagesize();
>      }
>  
>      /* it's possible we have memory-backend objects with
> @@ -383,28 +386,26 @@ static long getrampagesize(void)
>       * backend isn't backed by hugepages.
>       */
>      memdev_root = object_resolve_path("/objects", NULL);
> -    if (!memdev_root) {
> -        return getpagesize();
> +    if (memdev_root) {
> +        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
>      }
> -
> -    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
> -
> -    if (hpsize == LONG_MAX || hpsize == getpagesize()) {
> -        return getpagesize();
> +    if (hpsize == LONG_MAX) {
> +        /* No additional memory regions found ==> Report main RAM page size */
> +        return mainrampagesize;
>      }
>  
>      /* If NUMA is disabled or the NUMA nodes are not backed with a
> -     * memory-backend, then there is at least one node using "normal"
> -     * RAM. And since normal RAM has not been configured with "-mem-path"
> -     * (what we've checked earlier here already), we can not use huge pages!
> +     * memory-backend, then there is at least one node using "normal" RAM,
> +     * so if its page size is smaller we have got to report that size instead.
>       */
> -    if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
> +    if (hpsize > mainrampagesize &&
> +        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
>          static bool warned;
>          if (!warned) {
>              error_report("Huge page support disabled (n/a for main memory).");
>              warned = true;
>          }
> -        return getpagesize();
> +        return mainrampagesize;
>      }
>  
>      return hpsize;
Thomas Huth July 19, 2016, 6:23 a.m. UTC | #3
On 18.07.2016 17:18, Greg Kurz wrote:
> On Mon, 18 Jul 2016 15:19:04 +0200
> Thomas Huth <thuth@redhat.com> wrote:
> 
>> After already fixing two issues with the huge page detection mechanism
>> (see commit 159d2e39a860 and 86b50f2e1bef), Greg Kurz noticed another
>> case that caused the guest to crash where QEMU announces huge pages
>> though they should not be available for the guest:
>>
>> qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
>>  -m 1G,slots=4,maxmem=32G
>>  -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
>>  -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
>>  -numa node,nodeid=0 -numa node,nodeid=1
>>
>> That means if there is a global mem-path option, we still have
>> to look at the memory-backend objects that have been specified
>> additionally and return their minimum page size if that value
>> is smaller than the page size of the main memory.
>>
>> Reported-by: Greg Kurz <groug@kaod.org>
>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>> ---
> 
> Just one remark, see below, but apart from that:
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> Tested-by: Greg Kurz <groug@kaod.org>
> 
>>  target-ppc/kvm.c | 27 ++++++++++++++-------------
>>  1 file changed, 14 insertions(+), 13 deletions(-)
>>
>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
>> index 7a8f555..97ab450 100644
>> --- a/target-ppc/kvm.c
>> +++ b/target-ppc/kvm.c
>> @@ -366,10 +366,13 @@ static int find_max_supported_pagesize(Object *obj, void *opaque)
>>  static long getrampagesize(void)
>>  {
>>      long hpsize = LONG_MAX;
>> +    long mainrampagesize;
>>      Object *memdev_root;
>>  
>>      if (mem_path) {
>> -        return gethugepagesize(mem_path);
>> +        mainrampagesize = gethugepagesize(mem_path);
>> +    } else {
>> +        mainrampagesize = getpagesize();
>>      }
>>  
>>      /* it's possible we have memory-backend objects with
>> @@ -383,28 +386,26 @@ static long getrampagesize(void)
>>       * backend isn't backed by hugepages.
>>       */
>>      memdev_root = object_resolve_path("/objects", NULL);
>> -    if (!memdev_root) {
>> -        return getpagesize();
>> +    if (memdev_root) {
>> +        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
>>      }
>> -
>> -    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
>> -
>> -    if (hpsize == LONG_MAX || hpsize == getpagesize()) {
>> -        return getpagesize();
>> +    if (hpsize == LONG_MAX) {
>> +        /* No additional memory regions found ==> Report main RAM page size */
>> +        return mainrampagesize;
>>      }
>>  
>>      /* If NUMA is disabled or the NUMA nodes are not backed with a
>> -     * memory-backend, then there is at least one node using "normal"
>> -     * RAM. And since normal RAM has not been configured with "-mem-path"
>> -     * (what we've checked earlier here already), we can not use huge pages!
>> +     * memory-backend, then there is at least one node using "normal" RAM,
>> +     * so if its page size is smaller we have got to report that size instead.
>>       */
>> -    if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
>> +    if (hpsize > mainrampagesize &&
>> +        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
>>          static bool warned;
>>          if (!warned) {
>>              error_report("Huge page support disabled (n/a for main memory).");
> 
> Maybe update the error message since we have another condition ?
> 
> Something like:
> 
> "Huge page support disabled (at least one numa uses standard page size)"

That sounds also a little bit confusing since the error message could
occur when there is no numa configured at all. I think refering to "main
memory" is better here so that the users have a chance to know that they
might need to specify the global "-mem-path" parameter here, too.

 Thomas
Greg Kurz July 19, 2016, 6:29 a.m. UTC | #4
On Tue, 19 Jul 2016 08:23:46 +0200
Thomas Huth <thuth@redhat.com> wrote:

> On 18.07.2016 17:18, Greg Kurz wrote:
> > On Mon, 18 Jul 2016 15:19:04 +0200
> > Thomas Huth <thuth@redhat.com> wrote:
> >   
> >> After already fixing two issues with the huge page detection mechanism
> >> (see commit 159d2e39a860 and 86b50f2e1bef), Greg Kurz noticed another
> >> case that caused the guest to crash where QEMU announces huge pages
> >> though they should not be available for the guest:
> >>
> >> qemu-system-ppc64 -enable-kvm ... -mem-path /dev/hugepages \
> >>  -m 1G,slots=4,maxmem=32G
> >>  -object memory-backend-ram,policy=default,size=1G,id=mem-mem1 \
> >>  -device pc-dimm,id=dimm-mem1,memdev=mem-mem1 -smp 2 \
> >>  -numa node,nodeid=0 -numa node,nodeid=1
> >>
> >> That means if there is a global mem-path option, we still have
> >> to look at the memory-backend objects that have been specified
> >> additionally and return their minimum page size if that value
> >> is smaller than the page size of the main memory.
> >>
> >> Reported-by: Greg Kurz <groug@kaod.org>
> >> Signed-off-by: Thomas Huth <thuth@redhat.com>
> >> ---  
> > 
> > Just one remark, see below, but apart from that:
> > 
> > Reviewed-by: Greg Kurz <groug@kaod.org>
> > Tested-by: Greg Kurz <groug@kaod.org>
> >   
> >>  target-ppc/kvm.c | 27 ++++++++++++++-------------
> >>  1 file changed, 14 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> >> index 7a8f555..97ab450 100644
> >> --- a/target-ppc/kvm.c
> >> +++ b/target-ppc/kvm.c
> >> @@ -366,10 +366,13 @@ static int find_max_supported_pagesize(Object *obj, void *opaque)
> >>  static long getrampagesize(void)
> >>  {
> >>      long hpsize = LONG_MAX;
> >> +    long mainrampagesize;
> >>      Object *memdev_root;
> >>  
> >>      if (mem_path) {
> >> -        return gethugepagesize(mem_path);
> >> +        mainrampagesize = gethugepagesize(mem_path);
> >> +    } else {
> >> +        mainrampagesize = getpagesize();
> >>      }
> >>  
> >>      /* it's possible we have memory-backend objects with
> >> @@ -383,28 +386,26 @@ static long getrampagesize(void)
> >>       * backend isn't backed by hugepages.
> >>       */
> >>      memdev_root = object_resolve_path("/objects", NULL);
> >> -    if (!memdev_root) {
> >> -        return getpagesize();
> >> +    if (memdev_root) {
> >> +        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
> >>      }
> >> -
> >> -    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
> >> -
> >> -    if (hpsize == LONG_MAX || hpsize == getpagesize()) {
> >> -        return getpagesize();
> >> +    if (hpsize == LONG_MAX) {
> >> +        /* No additional memory regions found ==> Report main RAM page size */
> >> +        return mainrampagesize;
> >>      }
> >>  
> >>      /* If NUMA is disabled or the NUMA nodes are not backed with a
> >> -     * memory-backend, then there is at least one node using "normal"
> >> -     * RAM. And since normal RAM has not been configured with "-mem-path"
> >> -     * (what we've checked earlier here already), we can not use huge pages!
> >> +     * memory-backend, then there is at least one node using "normal" RAM,
> >> +     * so if its page size is smaller we have got to report that size instead.
> >>       */
> >> -    if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
> >> +    if (hpsize > mainrampagesize &&
> >> +        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
> >>          static bool warned;
> >>          if (!warned) {
> >>              error_report("Huge page support disabled (n/a for main memory).");  
> > 
> > Maybe update the error message since we have another condition ?
> > 
> > Something like:
> > 
> > "Huge page support disabled (at least one numa uses standard page size)"  
> 
> That sounds also a little bit confusing since the error message could
> occur when there is no numa configured at all. I think refering to "main
> memory" is better here so that the users have a chance to know that they
> might need to specify the global "-mem-path" parameter here, too.
> 
>  Thomas
> 

Fair enough. And anyway, David has already applied the patch.

Cheers.

--
Greg
diff mbox

Patch

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 7a8f555..97ab450 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -366,10 +366,13 @@  static int find_max_supported_pagesize(Object *obj, void *opaque)
 static long getrampagesize(void)
 {
     long hpsize = LONG_MAX;
+    long mainrampagesize;
     Object *memdev_root;
 
     if (mem_path) {
-        return gethugepagesize(mem_path);
+        mainrampagesize = gethugepagesize(mem_path);
+    } else {
+        mainrampagesize = getpagesize();
     }
 
     /* it's possible we have memory-backend objects with
@@ -383,28 +386,26 @@  static long getrampagesize(void)
      * backend isn't backed by hugepages.
      */
     memdev_root = object_resolve_path("/objects", NULL);
-    if (!memdev_root) {
-        return getpagesize();
+    if (memdev_root) {
+        object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
     }
-
-    object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
-
-    if (hpsize == LONG_MAX || hpsize == getpagesize()) {
-        return getpagesize();
+    if (hpsize == LONG_MAX) {
+        /* No additional memory regions found ==> Report main RAM page size */
+        return mainrampagesize;
     }
 
     /* If NUMA is disabled or the NUMA nodes are not backed with a
-     * memory-backend, then there is at least one node using "normal"
-     * RAM. And since normal RAM has not been configured with "-mem-path"
-     * (what we've checked earlier here already), we can not use huge pages!
+     * memory-backend, then there is at least one node using "normal" RAM,
+     * so if its page size is smaller we have got to report that size instead.
      */
-    if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
+    if (hpsize > mainrampagesize &&
+        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
         static bool warned;
         if (!warned) {
             error_report("Huge page support disabled (n/a for main memory).");
             warned = true;
         }
-        return getpagesize();
+        return mainrampagesize;
     }
 
     return hpsize;