diff mbox

extend limit of physical sections number

Message ID 1380300560-21086-1-git-send-email-akong@redhat.com
State New
Headers show

Commit Message

Amos Kong Sept. 27, 2013, 4:49 p.m. UTC
# qemu -drive file=/disk0,if=none,id=v0,format=qcow2 \
 -device virtio-blk-pci,drive=v0,id=v00,multifunction=on,addr=0x04.0
 ....

Launching guest with more than 32 virtio-blk disks,
qemu will crash, because there are too many BARs.

This patch brings the limit of non-tcg up by a factor
of 8 (32767 / 4096), i.e. 32*8 = 256.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Amos Kong <akong@redhat.com>
---
 exec.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

Comments

Paolo Bonzini Sept. 27, 2013, 4:52 p.m. UTC | #1
Il 27/09/2013 18:49, Amos Kong ha scritto:
>  # qemu -drive file=/disk0,if=none,id=v0,format=qcow2 \
>  -device virtio-blk-pci,drive=v0,id=v00,multifunction=on,addr=0x04.0
>  ....
> 
> Launching guest with more than 32 virtio-blk disks,
> qemu will crash, because there are too many BARs.
> 
> This patch brings the limit of non-tcg up by a factor
> of 8 (32767 / 4096), i.e. 32*8 = 256.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Amos Kong <akong@redhat.com>
> ---
>  exec.c | 17 ++++++++++++-----
>  1 file changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 5aef833..f639c01 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -763,11 +763,18 @@ void phys_mem_set_alloc(void *(*alloc)(ram_addr_t))
>  
>  static uint16_t phys_section_add(MemoryRegionSection *section)
>  {
> -    /* The physical section number is ORed with a page-aligned
> -     * pointer to produce the iotlb entries.  Thus it should
> -     * never overflow into the page-aligned value.
> -     */
> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    if (tcg_enabled()) {
> +        /* The physical section number is ORed with a page-aligned
> +         * pointer to produce the iotlb entries.  Thus it should
> +         * never overflow into the page-aligned value.
> +         */
> +        assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    } else {
> +        /* For KVM or Xen we can use the full range of the ptr field
> +         * in PhysPageEntry.
> +         */
> +        assert(next_map.sections_nb < SHRT_MAX);
> +    }
>  
>      if (next_map.sections_nb == next_map.sections_nb_alloc) {
>          next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> 

Thanks for testing this patch Amos!

Paolo
Amos Kong Nov. 5, 2013, 12:21 a.m. UTC | #2
On Fri, Sep 27, 2013 at 06:52:50PM +0200, Paolo Bonzini wrote:
> Il 27/09/2013 18:49, Amos Kong ha scritto:
> >  # qemu -drive file=/disk0,if=none,id=v0,format=qcow2 \
> >  -device virtio-blk-pci,drive=v0,id=v00,multifunction=on,addr=0x04.0
> >  ....
> > 
> > Launching guest with more than 32 virtio-blk disks,
> > qemu will crash, because there are too many BARs.
> > 
> > This patch brings the limit of non-tcg up by a factor
> > of 8 (32767 / 4096), i.e. 32*8 = 256.
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > Signed-off-by: Amos Kong <akong@redhat.com>

Hi Anthony, others


Can you help to review this patch?


Thanks, Amos

> > ---
> >  exec.c | 17 ++++++++++++-----
> >  1 file changed, 12 insertions(+), 5 deletions(-)
> > 
> > diff --git a/exec.c b/exec.c
> > index 5aef833..f639c01 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -763,11 +763,18 @@ void phys_mem_set_alloc(void *(*alloc)(ram_addr_t))
> >  
> >  static uint16_t phys_section_add(MemoryRegionSection *section)
> >  {
> > -    /* The physical section number is ORed with a page-aligned
> > -     * pointer to produce the iotlb entries.  Thus it should
> > -     * never overflow into the page-aligned value.
> > -     */
> > -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> > +    if (tcg_enabled()) {
> > +        /* The physical section number is ORed with a page-aligned
> > +         * pointer to produce the iotlb entries.  Thus it should
> > +         * never overflow into the page-aligned value.
> > +         */
> > +        assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> > +    } else {
> > +        /* For KVM or Xen we can use the full range of the ptr field
> > +         * in PhysPageEntry.
> > +         */
> > +        assert(next_map.sections_nb < SHRT_MAX);
> > +    }
> >  
> >      if (next_map.sections_nb == next_map.sections_nb_alloc) {
> >          next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> > 
> 
> Thanks for testing this patch Amos!
> 
> Paolo
Peter Maydell Nov. 5, 2013, 12:36 a.m. UTC | #3
On 27 September 2013 17:49, Amos Kong <akong@redhat.com> wrote:
>  # qemu -drive file=/disk0,if=none,id=v0,format=qcow2 \
>  -device virtio-blk-pci,drive=v0,id=v00,multifunction=on,addr=0x04.0
>  ....
>
> Launching guest with more than 32 virtio-blk disks,
> qemu will crash, because there are too many BARs.
>
> This patch brings the limit of non-tcg up by a factor
> of 8 (32767 / 4096), i.e. 32*8 = 256.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Amos Kong <akong@redhat.com>
> ---
>  exec.c | 17 ++++++++++++-----
>  1 file changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 5aef833..f639c01 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -763,11 +763,18 @@ void phys_mem_set_alloc(void *(*alloc)(ram_addr_t))
>
>  static uint16_t phys_section_add(MemoryRegionSection *section)
>  {
> -    /* The physical section number is ORed with a page-aligned
> -     * pointer to produce the iotlb entries.  Thus it should
> -     * never overflow into the page-aligned value.
> -     */
> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    if (tcg_enabled()) {
> +        /* The physical section number is ORed with a page-aligned
> +         * pointer to produce the iotlb entries.  Thus it should
> +         * never overflow into the page-aligned value.
> +         */
> +        assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    } else {
> +        /* For KVM or Xen we can use the full range of the ptr field
> +         * in PhysPageEntry.
> +         */
> +        assert(next_map.sections_nb < SHRT_MAX);
> +    }

This looks really weird. Why should the memory subsystem
care whether we're using TCG or KVM or Xen?

-- PMM
Paolo Bonzini Nov. 5, 2013, 9 a.m. UTC | #4
Il 05/11/2013 01:36, Peter Maydell ha scritto:
> On 27 September 2013 17:49, Amos Kong <akong@redhat.com> wrote:
>>  # qemu -drive file=/disk0,if=none,id=v0,format=qcow2 \
>>  -device virtio-blk-pci,drive=v0,id=v00,multifunction=on,addr=0x04.0
>>  ....
>>
>> Launching guest with more than 32 virtio-blk disks,
>> qemu will crash, because there are too many BARs.
>>
>> This patch brings the limit of non-tcg up by a factor
>> of 8 (32767 / 4096), i.e. 32*8 = 256.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> Signed-off-by: Amos Kong <akong@redhat.com>
>> ---
>>  exec.c | 17 ++++++++++++-----
>>  1 file changed, 12 insertions(+), 5 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 5aef833..f639c01 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -763,11 +763,18 @@ void phys_mem_set_alloc(void *(*alloc)(ram_addr_t))
>>
>>  static uint16_t phys_section_add(MemoryRegionSection *section)
>>  {
>> -    /* The physical section number is ORed with a page-aligned
>> -     * pointer to produce the iotlb entries.  Thus it should
>> -     * never overflow into the page-aligned value.
>> -     */
>> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
>> +    if (tcg_enabled()) {
>> +        /* The physical section number is ORed with a page-aligned
>> +         * pointer to produce the iotlb entries.  Thus it should
>> +         * never overflow into the page-aligned value.
>> +         */
>> +        assert(next_map.sections_nb < TARGET_PAGE_SIZE);
>> +    } else {
>> +        /* For KVM or Xen we can use the full range of the ptr field
>> +         * in PhysPageEntry.
>> +         */
>> +        assert(next_map.sections_nb < SHRT_MAX);
>> +    }
> 
> This looks really weird. Why should the memory subsystem
> care whether we're using TCG or KVM or Xen?

Because only TCG stores the section number in the low bits of the iotlb
entry.  This is exactly what is explained in the comments.

Paolo
Peter Maydell Nov. 5, 2013, 12:23 p.m. UTC | #5
On 5 November 2013 09:00, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 05/11/2013 01:36, Peter Maydell ha scritto:
>> On 27 September 2013 17:49, Amos Kong <akong@redhat.com> wrote:
>>>  # qemu -drive file=/disk0,if=none,id=v0,format=qcow2 \
>>>  -device virtio-blk-pci,drive=v0,id=v00,multifunction=on,addr=0x04.0
>>>  ....
>>>
>>> Launching guest with more than 32 virtio-blk disks,
>>> qemu will crash, because there are too many BARs.
>>>
>>> This patch brings the limit of non-tcg up by a factor
>>> of 8 (32767 / 4096), i.e. 32*8 = 256.

>> This looks really weird. Why should the memory subsystem
>> care whether we're using TCG or KVM or Xen?
>
> Because only TCG stores the section number in the low bits of the iotlb
> entry.  This is exactly what is explained in the comments.

So presumably we still crash if there are more than
32 virtio-blk disks on TCG (and indeed if more than 256
on KVM)? That doesn't seem very satisfactory...

-- PMM
Paolo Bonzini Nov. 5, 2013, 12:27 p.m. UTC | #6
Il 05/11/2013 13:23, Peter Maydell ha scritto:
>>> >> This looks really weird. Why should the memory subsystem
>>> >> care whether we're using TCG or KVM or Xen?
>> >
>> > Because only TCG stores the section number in the low bits of the iotlb
>> > entry.  This is exactly what is explained in the comments.
> So presumably we still crash if there are more than
> 32 virtio-blk disks on TCG (and indeed if more than 256
> on KVM)? That doesn't seem very satisfactory...

It isn't, do you have any idea on how to make the threshold equal for
TCG and KVM?

I guess the code could be made clearer like this:

    assert (... < SHRT_MAX);
    if (tcg_enabled()) {
        /* TCG has a stricter limit due to iotlb etc. etc. */
        assert (... < TARGET_PAGE_SIZE);
    }

but that's pretty much it...

Paolo
Amos Kong Nov. 19, 2013, 11:11 a.m. UTC | #7
On Sat, Sep 28, 2013 at 12:49:20AM +0800, Amos Kong wrote:
>  # qemu -drive file=/disk0,if=none,id=v0,format=qcow2 \
>  -device virtio-blk-pci,drive=v0,id=v00,multifunction=on,addr=0x04.0
>  ....
> 
> Launching guest with more than 32 virtio-blk disks,
> qemu will crash, because there are too many BARs.
> 
> This patch brings the limit of non-tcg up by a factor
> of 8 (32767 / 4096), i.e. 32*8 = 256.

I re-tested with latest guest kernel (3.10.0-rc5),
crash still occurred after applied this patch.


1. phys_section_add: assert(next_map.sections_nb < TARGET_PAGE_SIZE);
   crash

2. phys_section_add: assert(next_map.sections_nb < SHRT_MAX);
   crash

3. without this assert of next_map.sections_nb in phys_section_add
   crash occurred at register_subpage():
   assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);

 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Amos Kong <akong@redhat.com>
> ---
>  exec.c | 17 ++++++++++++-----
>  1 file changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 5aef833..f639c01 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -763,11 +763,18 @@ void phys_mem_set_alloc(void *(*alloc)(ram_addr_t))
>  
>  static uint16_t phys_section_add(MemoryRegionSection *section)
>  {
> -    /* The physical section number is ORed with a page-aligned
> -     * pointer to produce the iotlb entries.  Thus it should
> -     * never overflow into the page-aligned value.
> -     */
> -    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    if (tcg_enabled()) {
> +        /* The physical section number is ORed with a page-aligned
> +         * pointer to produce the iotlb entries.  Thus it should
> +         * never overflow into the page-aligned value.
> +         */
> +        assert(next_map.sections_nb < TARGET_PAGE_SIZE);
> +    } else {
> +        /* For KVM or Xen we can use the full range of the ptr field
> +         * in PhysPageEntry.
> +         */
> +        assert(next_map.sections_nb < SHRT_MAX);
> +    }
>  
>      if (next_map.sections_nb == next_map.sections_nb_alloc) {
>          next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
> -- 
> 1.8.3.1
>
diff mbox

Patch

diff --git a/exec.c b/exec.c
index 5aef833..f639c01 100644
--- a/exec.c
+++ b/exec.c
@@ -763,11 +763,18 @@  void phys_mem_set_alloc(void *(*alloc)(ram_addr_t))
 
 static uint16_t phys_section_add(MemoryRegionSection *section)
 {
-    /* The physical section number is ORed with a page-aligned
-     * pointer to produce the iotlb entries.  Thus it should
-     * never overflow into the page-aligned value.
-     */
-    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
+    if (tcg_enabled()) {
+        /* The physical section number is ORed with a page-aligned
+         * pointer to produce the iotlb entries.  Thus it should
+         * never overflow into the page-aligned value.
+         */
+        assert(next_map.sections_nb < TARGET_PAGE_SIZE);
+    } else {
+        /* For KVM or Xen we can use the full range of the ptr field
+         * in PhysPageEntry.
+         */
+        assert(next_map.sections_nb < SHRT_MAX);
+    }
 
     if (next_map.sections_nb == next_map.sections_nb_alloc) {
         next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,