diff mbox

memory: info mtree check mr range overflow

Message ID 1489496187-624-1-git-send-email-peterx@redhat.com
State New
Headers show

Commit Message

Peter Xu March 14, 2017, 12:56 p.m. UTC
The address of memory regions might overflow when something wrong
happened, like reported in:

https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html

For easier debugging, let's try to detect it.

Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 memory.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

Comments

Michael S. Tsirkin March 14, 2017, 2:59 p.m. UTC | #1
On Tue, Mar 14, 2017 at 08:56:27PM +0800, Peter Xu wrote:
> The address of memory regions might overflow when something wrong
> happened, like reported in:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html
> 
> For easier debugging, let's try to detect it.
> 
> Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> Signed-off-by: Peter Xu <peterx@redhat.com>

I think I already commented on this.
Please use Int128 math instead of hacks around 64 bit math.


> ---
>  memory.c | 21 ++++++++++++++++-----
>  1 file changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/memory.c b/memory.c
> index 284894b..64b0a60 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -2494,6 +2494,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>      MemoryRegionListHead submr_print_queue;
>      const MemoryRegion *submr;
>      unsigned int i;
> +    hwaddr cur_start, cur_end;
>  
>      if (!mr) {
>          return;
> @@ -2503,6 +2504,18 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>          mon_printf(f, MTREE_INDENT);
>      }
>  
> +    cur_start = base + mr->addr;
> +    cur_end = cur_start + MR_SIZE(mr->size);
> +
> +    /*
> +     * Try to detect overflow of memory region. This should never
> +     * happen normally. When it happens, we dump something to warn the
> +     * user who is observing this.
> +     */
> +    if (cur_start < base || cur_end < cur_start) {
> +        mon_printf(f, "[DETECTED OVERFLOW!] ");
> +    }
> +
>      if (mr->alias) {
>          MemoryRegionList *ml;
>          bool found = false;
> @@ -2522,8 +2535,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>          mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
>                     " (prio %d, %s): alias %s @%s " TARGET_FMT_plx
>                     "-" TARGET_FMT_plx "%s\n",
> -                   base + mr->addr,
> -                   base + mr->addr + MR_SIZE(mr->size),
> +                   cur_start, cur_end,
>                     mr->priority,
>                     memory_region_type((MemoryRegion *)mr),
>                     memory_region_name(mr),
> @@ -2534,8 +2546,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>      } else {
>          mon_printf(f,
>                     TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s\n",
> -                   base + mr->addr,
> -                   base + mr->addr + MR_SIZE(mr->size),
> +                   cur_start, cur_end,
>                     mr->priority,
>                     memory_region_type((MemoryRegion *)mr),
>                     memory_region_name(mr),
> @@ -2562,7 +2573,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>      }
>  
>      QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
> -        mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr,
> +        mtree_print_mr(mon_printf, f, ml->mr, level + 1, cur_start,
>                         alias_print_queue);
>      }
>  
> -- 
> 2.7.4
Michael S. Tsirkin March 15, 2017, 1:24 a.m. UTC | #2
On Tue, Mar 14, 2017 at 08:56:27PM +0800, Peter Xu wrote:
> The address of memory regions might overflow when something wrong
> happened, like reported in:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html
> 
> For easier debugging, let's try to detect it.
> 
> Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> Signed-off-by: Peter Xu <peterx@redhat.com>


After a chat with Paolo, I think the following is a more general fix

- fix info mtree to do 128 bit math and display more than
  16 digits if necessary
- add info about region visibility
  how much info is appropriate is arguable - after all we already have info mtree -f
  we probably should report if region is not visible at all,
  how about partially occluded ones? listing all windows is probably not
  needed - we have the -f flag for that.

> ---
>  memory.c | 21 ++++++++++++++++-----
>  1 file changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/memory.c b/memory.c
> index 284894b..64b0a60 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -2494,6 +2494,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>      MemoryRegionListHead submr_print_queue;
>      const MemoryRegion *submr;
>      unsigned int i;
> +    hwaddr cur_start, cur_end;
>  
>      if (!mr) {
>          return;
> @@ -2503,6 +2504,18 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>          mon_printf(f, MTREE_INDENT);
>      }
>  
> +    cur_start = base + mr->addr;
> +    cur_end = cur_start + MR_SIZE(mr->size);
> +
> +    /*
> +     * Try to detect overflow of memory region. This should never
> +     * happen normally. When it happens, we dump something to warn the
> +     * user who is observing this.
> +     */
> +    if (cur_start < base || cur_end < cur_start) {
> +        mon_printf(f, "[DETECTED OVERFLOW!] ");
> +    }
> +
>      if (mr->alias) {
>          MemoryRegionList *ml;
>          bool found = false;
> @@ -2522,8 +2535,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>          mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
>                     " (prio %d, %s): alias %s @%s " TARGET_FMT_plx
>                     "-" TARGET_FMT_plx "%s\n",
> -                   base + mr->addr,
> -                   base + mr->addr + MR_SIZE(mr->size),
> +                   cur_start, cur_end,
>                     mr->priority,
>                     memory_region_type((MemoryRegion *)mr),
>                     memory_region_name(mr),
> @@ -2534,8 +2546,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>      } else {
>          mon_printf(f,
>                     TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s\n",
> -                   base + mr->addr,
> -                   base + mr->addr + MR_SIZE(mr->size),
> +                   cur_start, cur_end,
>                     mr->priority,
>                     memory_region_type((MemoryRegion *)mr),
>                     memory_region_name(mr),
> @@ -2562,7 +2573,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
>      }
>  
>      QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
> -        mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr,
> +        mtree_print_mr(mon_printf, f, ml->mr, level + 1, cur_start,
>                         alias_print_queue);
>      }
>  
> -- 
> 2.7.4
Peter Xu March 15, 2017, 3:15 a.m. UTC | #3
On Wed, Mar 15, 2017 at 03:24:04AM +0200, Michael S. Tsirkin wrote:
> On Tue, Mar 14, 2017 at 08:56:27PM +0800, Peter Xu wrote:
> > The address of memory regions might overflow when something wrong
> > happened, like reported in:
> > 
> > https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html
> > 
> > For easier debugging, let's try to detect it.
> > 
> > Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> 
> 
> After a chat with Paolo, I think the following is a more general fix
> 
> - fix info mtree to do 128 bit math and display more than
>   16 digits if necessary

Could you help elaborate in what case will we really need that 128 bit
address?

Btw, thanks for pointing out in the other thread that your patch
wasn't printing 128 bits but 64 bits, actually I didn't notice that
before... but even with that, I would still slightly prefer this one
though considering readability and simplicity.

> - add info about region visibility
>   how much info is appropriate is arguable - after all we already have info mtree -f
>   we probably should report if region is not visible at all,
>   how about partially occluded ones? listing all windows is probably not
>   needed - we have the -f flag for that.

For me, "info mtree" and its "-f" form work good enough. So I'll leave
the discussion on this one to people who know better than me...

Thanks,

-- peterx
Michael S. Tsirkin March 15, 2017, 3:30 a.m. UTC | #4
On Wed, Mar 15, 2017 at 11:15:50AM +0800, Peter Xu wrote:
> On Wed, Mar 15, 2017 at 03:24:04AM +0200, Michael S. Tsirkin wrote:
> > On Tue, Mar 14, 2017 at 08:56:27PM +0800, Peter Xu wrote:
> > > The address of memory regions might overflow when something wrong
> > > happened, like reported in:
> > > 
> > > https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html
> > > 
> > > For easier debugging, let's try to detect it.
> > > 
> > > Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > 
> > 
> > After a chat with Paolo, I think the following is a more general fix
> > 
> > - fix info mtree to do 128 bit math and display more than
> >   16 digits if necessary
> 
> Could you help elaborate in what case will we really need that 128 bit
> address?

This is how memory API works. It uses 128 bit addresses (in reality
it typically only needs 64 bit addresses but 128 means it can do
math without worrying about it too much).
Thus a region at offset 0xf << 60 in parent with address 0x1 << 60
and size 0x1 << 20 is not "overflowing" it is simply at and address
0x1 << 64 which is outside the range of parent so not visible
in the flat view.
But same can be said for region at offset 0x1 << 60 in same parent
and your patch does nothing to help detect it.

> Btw, thanks for pointing out in the other thread that your patch
> wasn't printing 128 bits but 64 bits, actually I didn't notice that
> before... but even with that, I would still slightly prefer this one
> though considering readability and simplicity.

Right but it's just trying to address the specific problem with
the given device. Which is unlikely to trigger again exactly
in the same way. The general issue is that the child region
address is outside the range of the parent.

> > - add info about region visibility
> >   how much info is appropriate is arguable - after all we already have info mtree -f
> >   we probably should report if region is not visible at all,
> >   how about partially occluded ones? listing all windows is probably not
> >   needed - we have the -f flag for that.
> 
> For me, "info mtree" and its "-f" form work good enough. So I'll leave
> the discussion on this one to people who know better than me...
> 
> Thanks,
> 
> -- peterx
Peter Xu March 15, 2017, 4:04 a.m. UTC | #5
On Wed, Mar 15, 2017 at 05:30:56AM +0200, Michael S. Tsirkin wrote:
> On Wed, Mar 15, 2017 at 11:15:50AM +0800, Peter Xu wrote:
> > On Wed, Mar 15, 2017 at 03:24:04AM +0200, Michael S. Tsirkin wrote:
> > > On Tue, Mar 14, 2017 at 08:56:27PM +0800, Peter Xu wrote:
> > > > The address of memory regions might overflow when something wrong
> > > > happened, like reported in:
> > > > 
> > > > https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html
> > > > 
> > > > For easier debugging, let's try to detect it.
> > > > 
> > > > Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> > > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > > 
> > > 
> > > After a chat with Paolo, I think the following is a more general fix
> > > 
> > > - fix info mtree to do 128 bit math and display more than
> > >   16 digits if necessary
> > 
> > Could you help elaborate in what case will we really need that 128 bit
> > address?
> 
> This is how memory API works. It uses 128 bit addresses (in reality
> it typically only needs 64 bit addresses but 128 means it can do
> math without worrying about it too much).

Yes. To be more specific, could I ask why do we need 128 bits here
when doing "info mtree"?

> Thus a region at offset 0xf << 60 in parent with address 0x1 << 60
> and size 0x1 << 20 is not "overflowing" it is simply at and address
> 0x1 << 64 which is outside the range of parent so not visible
> in the flat view.
> But same can be said for region at offset 0x1 << 60 in same parent
> and your patch does nothing to help detect it.

Not sure I fully understand the case mentioned above... I believe for
above example, current patch (either with, or without) will print:

    0x2000000000000000

And even with the patch "memory: use 128 bit in info mtree", it should
print the same. IIUC this is what we want, no? Did I miss anything?

> 
> > Btw, thanks for pointing out in the other thread that your patch
> > wasn't printing 128 bits but 64 bits, actually I didn't notice that
> > before... but even with that, I would still slightly prefer this one
> > though considering readability and simplicity.
> 
> Right but it's just trying to address the specific problem with
> the given device. Which is unlikely to trigger again exactly
> in the same way. The general issue is that the child region
> address is outside the range of the parent.

Hmm... frankly speaking I don't know whether current memory API would
allow this happen. I just see no danger if that happens, as long as we
will make sure those outranged regions will never be used during
rendering.

Anyway, IMHO that's another topic. This patch should be solely solving
the issue that was reported. Thanks,

-- peterx
Michael S. Tsirkin March 15, 2017, 4:23 a.m. UTC | #6
On Wed, Mar 15, 2017 at 12:04:27PM +0800, Peter Xu wrote:
> On Wed, Mar 15, 2017 at 05:30:56AM +0200, Michael S. Tsirkin wrote:
> > On Wed, Mar 15, 2017 at 11:15:50AM +0800, Peter Xu wrote:
> > > On Wed, Mar 15, 2017 at 03:24:04AM +0200, Michael S. Tsirkin wrote:
> > > > On Tue, Mar 14, 2017 at 08:56:27PM +0800, Peter Xu wrote:
> > > > > The address of memory regions might overflow when something wrong
> > > > > happened, like reported in:
> > > > > 
> > > > > https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg02043.html
> > > > > 
> > > > > For easier debugging, let's try to detect it.
> > > > > 
> > > > > Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
> > > > > Signed-off-by: Peter Xu <peterx@redhat.com>
> > > > 
> > > > 
> > > > After a chat with Paolo, I think the following is a more general fix
> > > > 
> > > > - fix info mtree to do 128 bit math and display more than
> > > >   16 digits if necessary
> > > 
> > > Could you help elaborate in what case will we really need that 128 bit
> > > address?
> > 
> > This is how memory API works. It uses 128 bit addresses (in reality
> > it typically only needs 64 bit addresses but 128 means it can do
> > math without worrying about it too much).
> 
> Yes. To be more specific, could I ask why do we need 128 bits here
> when doing "info mtree"?

Because when you add two 64 bit addresses you sometimes get a 67
bit one. info mtree shows some fictitious data: base/end addresses
that region would have had if it was fully visible in a flatview.
67 bit addresses are never visible there, that is true,
but that is not the only kind of address that is not visible
in flatview yet shown by info mtree.

> > Thus a region at offset 0xf << 60 in parent with address 0x1 << 60
> > and size 0x1 << 20 is not "overflowing" it is simply at and address
> > 0x1 << 64 which is outside the range of parent so not visible
> > in the flat view.
> > But same can be said for region at offset 0x1 << 60 in same parent
> > and your patch does nothing to help detect it.
> 
> Not sure I fully understand the case mentioned above... I believe for
> above example, current patch (either with, or without) will print:
> 
>     0x2000000000000000
> 
> And even with the patch "memory: use 128 bit in info mtree", it should
> print the same. IIUC this is what we want, no? Did I miss anything?

What are you trying to achieve though? The issue that started
it all is an openbios bug which did not init 64 bit bars
correctly. As a result the bar was not visible to guest
in the flatview and device did not work.


> > 
> > > Btw, thanks for pointing out in the other thread that your patch
> > > wasn't printing 128 bits but 64 bits, actually I didn't notice that
> > > before... but even with that, I would still slightly prefer this one
> > > though considering readability and simplicity.
> > 
> > Right but it's just trying to address the specific problem with
> > the given device. Which is unlikely to trigger again exactly
> > in the same way. The general issue is that the child region
> > address is outside the range of the parent.
> 
> Hmm... frankly speaking I don't know whether current memory API would
> allow this happen. I just see no danger if that happens, as long as we
> will make sure those outranged regions will never be used during
> rendering.
> 
> Anyway, IMHO that's another topic. This patch should be solely solving
> the issue that was reported. Thanks,
> 
> -- peterx


I think we need to address the root issue which is 64 bit math
which is the wrong thing to do within memory core.
Mark Cave-Ayland March 15, 2017, 1:30 p.m. UTC | #7
On 15/03/17 04:23, Michael S. Tsirkin wrote:

>>>> Could you help elaborate in what case will we really need that 128 bit
>>>> address?
>>>
>>> This is how memory API works. It uses 128 bit addresses (in reality
>>> it typically only needs 64 bit addresses but 128 means it can do
>>> math without worrying about it too much).
>>
>> Yes. To be more specific, could I ask why do we need 128 bits here
>> when doing "info mtree"?
> 
> Because when you add two 64 bit addresses you sometimes get a 67
> bit one. info mtree shows some fictitious data: base/end addresses
> that region would have had if it was fully visible in a flatview.
> 67 bit addresses are never visible there, that is true,
> but that is not the only kind of address that is not visible
> in flatview yet shown by info mtree.

Note that I'm going to send a PR for the OpenBIOS fix hopefully later
today which means the original test case shouldn't occur anymore.

However I will manually test any patches that are submitted to verify
that they work correctly for this particular case.


ATB,

Mark.
diff mbox

Patch

diff --git a/memory.c b/memory.c
index 284894b..64b0a60 100644
--- a/memory.c
+++ b/memory.c
@@ -2494,6 +2494,7 @@  static void mtree_print_mr(fprintf_function mon_printf, void *f,
     MemoryRegionListHead submr_print_queue;
     const MemoryRegion *submr;
     unsigned int i;
+    hwaddr cur_start, cur_end;
 
     if (!mr) {
         return;
@@ -2503,6 +2504,18 @@  static void mtree_print_mr(fprintf_function mon_printf, void *f,
         mon_printf(f, MTREE_INDENT);
     }
 
+    cur_start = base + mr->addr;
+    cur_end = cur_start + MR_SIZE(mr->size);
+
+    /*
+     * Try to detect overflow of memory region. This should never
+     * happen normally. When it happens, we dump something to warn the
+     * user who is observing this.
+     */
+    if (cur_start < base || cur_end < cur_start) {
+        mon_printf(f, "[DETECTED OVERFLOW!] ");
+    }
+
     if (mr->alias) {
         MemoryRegionList *ml;
         bool found = false;
@@ -2522,8 +2535,7 @@  static void mtree_print_mr(fprintf_function mon_printf, void *f,
         mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
                    " (prio %d, %s): alias %s @%s " TARGET_FMT_plx
                    "-" TARGET_FMT_plx "%s\n",
-                   base + mr->addr,
-                   base + mr->addr + MR_SIZE(mr->size),
+                   cur_start, cur_end,
                    mr->priority,
                    memory_region_type((MemoryRegion *)mr),
                    memory_region_name(mr),
@@ -2534,8 +2546,7 @@  static void mtree_print_mr(fprintf_function mon_printf, void *f,
     } else {
         mon_printf(f,
                    TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s\n",
-                   base + mr->addr,
-                   base + mr->addr + MR_SIZE(mr->size),
+                   cur_start, cur_end,
                    mr->priority,
                    memory_region_type((MemoryRegion *)mr),
                    memory_region_name(mr),
@@ -2562,7 +2573,7 @@  static void mtree_print_mr(fprintf_function mon_printf, void *f,
     }
 
     QTAILQ_FOREACH(ml, &submr_print_queue, queue) {
-        mtree_print_mr(mon_printf, f, ml->mr, level + 1, base + mr->addr,
+        mtree_print_mr(mon_printf, f, ml->mr, level + 1, cur_start,
                        alias_print_queue);
     }