diff mbox

[RFC] pci-assign: Fix memory out of bound when MSI-X table not fit in a single page

Message ID 33183CC9F5247A488A2544077AF19020815DC9F8@SZXEMA503-MBS.china.huawei.com
State New
Headers show

Commit Message

Gonglei (Arei) April 1, 2014, 3:23 p.m. UTC
Hi,

I have a problem about SR-IOV pass-through. 

The PF is Emulex Corporation OneConnect NIC (Lancer)(rev 10),
and the VF pci config is as follow:

LINUX:/sys/bus/pci/devices/0000:04:00.6 # hexdump config
0000000 ffff ffff 0000 0010 0010 0200 0000 0080
0000010 0000 0000 0000 0000 0000 0000 0000 0000
0000020 0000 0000 0000 0000 0000 0000 10df e264
0000030 0000 0000 0054 0000 0000 0000 0000 0000
0000040 0000 0000 0008 0000 0000 0000 0000 0000
0000050 0000 0000 6009 0008 2b41 c002 0000 0000
0000060 7805 018a 0000 0000 0000 0000 0000 0000
0000070 0000 0000 0000 0000 8411 03ff 4000 0000
0000080 3400 0000 9403 0000 0000 0000 0000 0000
0000090 0000 0000 0010 0002 8724 1000 0000 0000
00000a0 dc83 0041 0000 0000 0000 0000 0000 0000
00000b0 0000 0000 0000 0000 001f 0010 0000 0000
00000c0 000e 0000 0000 0000 0000 0000 0000 0000
00000d0 0000 0000 0000 0000 0000 0000 0000 0000

We can see the msix_max is 0x3ff and msix_table_entry is 0x4000 (4 pages). But QEMU 
only mmap MSIX_PAGE_SIZE memory for all pci devices in funciton assigned_dev_register_msix_mmio, 
meanwhile the set the one page memmory to zero, so the rest memory will be random value
(maybe etnry.data is not 0). 

In function assigned_dev_update_msix_mmio maybe occur the issue of entry_nr > 256, 
and the kmod reports the EINVAL error.

My patch fix this issue which alloc memory according to the real size of pci device config.

Any ideas? Thnaks.

Signed-off-by: Gonglei <arei.gonglei@huawei.com>
---
 hw/i386/kvm/pci-assign.c |   24 +++++++++++++++++++-----
 1 files changed, 19 insertions(+), 5 deletions(-)

Comments

Michael S. Tsirkin April 1, 2014, 3:45 p.m. UTC | #1
On Tue, Apr 01, 2014 at 03:23:36PM +0000, Gonglei (Arei) wrote:
> Hi,
> 
> I have a problem about SR-IOV pass-through. 
> 
> The PF is Emulex Corporation OneConnect NIC (Lancer)(rev 10),
> and the VF pci config is as follow:
> 
> LINUX:/sys/bus/pci/devices/0000:04:00.6 # hexdump config
> 0000000 ffff ffff 0000 0010 0010 0200 0000 0080
> 0000010 0000 0000 0000 0000 0000 0000 0000 0000
> 0000020 0000 0000 0000 0000 0000 0000 10df e264
> 0000030 0000 0000 0054 0000 0000 0000 0000 0000
> 0000040 0000 0000 0008 0000 0000 0000 0000 0000
> 0000050 0000 0000 6009 0008 2b41 c002 0000 0000
> 0000060 7805 018a 0000 0000 0000 0000 0000 0000
> 0000070 0000 0000 0000 0000 8411 03ff 4000 0000
> 0000080 3400 0000 9403 0000 0000 0000 0000 0000
> 0000090 0000 0000 0010 0002 8724 1000 0000 0000
> 00000a0 dc83 0041 0000 0000 0000 0000 0000 0000
> 00000b0 0000 0000 0000 0000 001f 0010 0000 0000
> 00000c0 000e 0000 0000 0000 0000 0000 0000 0000
> 00000d0 0000 0000 0000 0000 0000 0000 0000 0000
> 
> We can see the msix_max is 0x3ff and msix_table_entry is 0x4000 (4 pages). But QEMU 
> only mmap MSIX_PAGE_SIZE memory for all pci devices in funciton assigned_dev_register_msix_mmio, 
> meanwhile the set the one page memmory to zero, so the rest memory will be random value
> (maybe etnry.data is not 0). 
> 
> In function assigned_dev_update_msix_mmio maybe occur the issue of entry_nr > 256, 
> and the kmod reports the EINVAL error.
> 
> My patch fix this issue which alloc memory according to the real size of pci device config.
> 
> Any ideas? Thnaks.
> 
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> ---
>  hw/i386/kvm/pci-assign.c |   24 +++++++++++++++++++-----
>  1 files changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
> index a825871..daa191c 100644
> --- a/hw/i386/kvm/pci-assign.c
> +++ b/hw/i386/kvm/pci-assign.c
> @@ -1591,10 +1591,6 @@ static void assigned_dev_msix_reset(AssignedDevice *dev)
>      MSIXTableEntry *entry;
>      int i;
> 
> -    if (!dev->msix_table) {
> -        return;
> -    }
> -
>      memset(dev->msix_table, 0, MSIX_PAGE_SIZE);
> 
>      for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
> @@ -1604,13 +1600,31 @@ static void assigned_dev_msix_reset(AssignedDevice *dev)
> 
>  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
>  {
> -    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE,
> +    int nr_pages;
> +    int size;
> +    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct MSIXTableEntry);
> +    
> +    if (dev->msix_max > entry_per_page) {
> +        nr_pages = dev->msix_max / entry_per_page;
> +        if (dev->msix_max % entry_per_page) {
> +            nr_pages += 1;
> +        }
> +    } else {
> +        nr_pages = 1;
> +    }

It's usually not a good idea to special-case corner-cases like this.


> +
> +    size = MSIX_PAGE_SIZE * nr_pages;

Just use ROUND_UP?

> +    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,
>                             MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);

Need to fix unmap as well?

>      if (dev->msix_table == MAP_FAILED) {
>          error_report("fail allocate msix_table! %s", strerror(errno));
>          return -EFAULT;
>      }
> +    if (!dev->msix_table) {
> +        return -EFAULT;
> +    }
> 
> +    memset(dev->msix_table, 0, size);
>      assigned_dev_msix_reset(dev);
> 
>      memory_region_init_io(&dev->mmio, OBJECT(dev), &assigned_dev_msix_mmio_ops,
> -- 
> 1.6.0.2
> 
> Best regards,
> -Gonglei
>
Gonglei (Arei) April 2, 2014, 3:12 a.m. UTC | #2
> > Hi,
> >
> > I have a problem about SR-IOV pass-through.
> >
> > The PF is Emulex Corporation OneConnect NIC (Lancer)(rev 10),
> > and the VF pci config is as follow:
> >
> > LINUX:/sys/bus/pci/devices/0000:04:00.6 # hexdump config
> > 0000000 ffff ffff 0000 0010 0010 0200 0000 0080
> > 0000010 0000 0000 0000 0000 0000 0000 0000 0000
> > 0000020 0000 0000 0000 0000 0000 0000 10df e264
> > 0000030 0000 0000 0054 0000 0000 0000 0000 0000
> > 0000040 0000 0000 0008 0000 0000 0000 0000 0000
> > 0000050 0000 0000 6009 0008 2b41 c002 0000 0000
> > 0000060 7805 018a 0000 0000 0000 0000 0000 0000
> > 0000070 0000 0000 0000 0000 8411 03ff 4000 0000
> > 0000080 3400 0000 9403 0000 0000 0000 0000 0000
> > 0000090 0000 0000 0010 0002 8724 1000 0000 0000
> > 00000a0 dc83 0041 0000 0000 0000 0000 0000 0000
> > 00000b0 0000 0000 0000 0000 001f 0010 0000 0000
> > 00000c0 000e 0000 0000 0000 0000 0000 0000 0000
> > 00000d0 0000 0000 0000 0000 0000 0000 0000 0000
> >
> > We can see the msix_max is 0x3ff and msix_table_entry is 0x4000 (4 pages).
> But QEMU
> > only mmap MSIX_PAGE_SIZE memory for all pci devices in funciton
> assigned_dev_register_msix_mmio,
> > meanwhile the set the one page memmory to zero, so the rest memory will
> be random value
> > (maybe etnry.data is not 0).
> >
> > In function assigned_dev_update_msix_mmio maybe occur the issue of
> entry_nr > 256,
> > and the kmod reports the EINVAL error.
> >
> > My patch fix this issue which alloc memory according to the real size of pci
> device config.
> >
> > Any ideas? Thnaks.
> >
> > Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> > ---
> >  hw/i386/kvm/pci-assign.c |   24 +++++++++++++++++++-----
> >  1 files changed, 19 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
> > index a825871..daa191c 100644
> > --- a/hw/i386/kvm/pci-assign.c
> > +++ b/hw/i386/kvm/pci-assign.c
> > @@ -1591,10 +1591,6 @@ static void
> assigned_dev_msix_reset(AssignedDevice *dev)
> >      MSIXTableEntry *entry;
> >      int i;
> >
> > -    if (!dev->msix_table) {
> > -        return;
> > -    }
> > -
> >      memset(dev->msix_table, 0, MSIX_PAGE_SIZE);
> >
> >      for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
> > @@ -1604,13 +1600,31 @@ static void
> assigned_dev_msix_reset(AssignedDevice *dev)
> >
> >  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
> >  {
> > -    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE,
> PROT_READ|PROT_WRITE,
> > +    int nr_pages;
> > +    int size;
> > +    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct MSIXTableEntry);
> > +
> > +    if (dev->msix_max > entry_per_page) {
> > +        nr_pages = dev->msix_max / entry_per_page;
> > +        if (dev->msix_max % entry_per_page) {
> > +            nr_pages += 1;
> > +        }
> > +    } else {
> > +        nr_pages = 1;
> > +    }
> 
> It's usually not a good idea to special-case corner-cases like this.
> 
IMHO, we should assure the memory page-aligned, so I use ROUND_UP according to dev->msix_max.

> 
> > +
> > +    size = MSIX_PAGE_SIZE * nr_pages;
> 
> Just use ROUND_UP?
> 
> > +    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,
> >                             MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> 
> Need to fix unmap as well?
> 
Yep, it should be unmap for new size memory in assigned_dev_unregister_msix_mmio. Thanks, Michael.

BTW, do you think the KVM should upsize the max support MSI-X entry to 2048.
Because the MSI-X supports a maximum table size of 2048 entries, which is descript in 
PCI specification 3.0 version 6.8.3.2: "MSI-X Configuration".

The history patch about downsize the MSI-X entry size to 256:
http://thread.gmane.org/gmane.comp.emulators.kvm.devel/38852/focus=38849

Best regards,
-Gonglei
Alex Williamson April 2, 2014, 3:45 a.m. UTC | #3
On Tue, 2014-04-01 at 15:23 +0000, Gonglei (Arei) wrote:
> Hi,
> 
> I have a problem about SR-IOV pass-through. 
> 
> The PF is Emulex Corporation OneConnect NIC (Lancer)(rev 10),
> and the VF pci config is as follow:
> 
> LINUX:/sys/bus/pci/devices/0000:04:00.6 # hexdump config
> 0000000 ffff ffff 0000 0010 0010 0200 0000 0080
> 0000010 0000 0000 0000 0000 0000 0000 0000 0000
> 0000020 0000 0000 0000 0000 0000 0000 10df e264
> 0000030 0000 0000 0054 0000 0000 0000 0000 0000
> 0000040 0000 0000 0008 0000 0000 0000 0000 0000
> 0000050 0000 0000 6009 0008 2b41 c002 0000 0000
> 0000060 7805 018a 0000 0000 0000 0000 0000 0000
> 0000070 0000 0000 0000 0000 8411 03ff 4000 0000
> 0000080 3400 0000 9403 0000 0000 0000 0000 0000
> 0000090 0000 0000 0010 0002 8724 1000 0000 0000
> 00000a0 dc83 0041 0000 0000 0000 0000 0000 0000
> 00000b0 0000 0000 0000 0000 001f 0010 0000 0000
> 00000c0 000e 0000 0000 0000 0000 0000 0000 0000
> 00000d0 0000 0000 0000 0000 0000 0000 0000 0000
> 
> We can see the msix_max is 0x3ff and msix_table_entry is 0x4000 (4 pages). But QEMU 
> only mmap MSIX_PAGE_SIZE memory for all pci devices in funciton assigned_dev_register_msix_mmio, 
> meanwhile the set the one page memmory to zero, so the rest memory will be random value
> (maybe etnry.data is not 0). 
> 
> In function assigned_dev_update_msix_mmio maybe occur the issue of entry_nr > 256, 
> and the kmod reports the EINVAL error.
> 
> My patch fix this issue which alloc memory according to the real size of pci device config.
> 
> Any ideas? Thnaks.

Isn't this already fixed if you use vfio-pci?  pci-assign is not well
supported anymore.

> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> ---
>  hw/i386/kvm/pci-assign.c |   24 +++++++++++++++++++-----
>  1 files changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
> index a825871..daa191c 100644
> --- a/hw/i386/kvm/pci-assign.c
> +++ b/hw/i386/kvm/pci-assign.c
> @@ -1591,10 +1591,6 @@ static void assigned_dev_msix_reset(AssignedDevice *dev)
>      MSIXTableEntry *entry;
>      int i;
> 
> -    if (!dev->msix_table) {
> -        return;
> -    }
> -

How would this not result in a segfault if the assigned device doesn't
support msix?

>      memset(dev->msix_table, 0, MSIX_PAGE_SIZE);

This memset may no longer cover the entire table

> 
>      for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
> @@ -1604,13 +1600,31 @@ static void assigned_dev_msix_reset(AssignedDevice *dev)
> 
>  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
>  {
> -    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE,
> +    int nr_pages;
> +    int size;
> +    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct MSIXTableEntry);
> +    
> +    if (dev->msix_max > entry_per_page) {
> +        nr_pages = dev->msix_max / entry_per_page;
> +        if (dev->msix_max % entry_per_page) {
> +            nr_pages += 1;
> +        }
> +    } else {
> +        nr_pages = 1;
> +    }
> +
> +    size = MSIX_PAGE_SIZE * nr_pages;

Agree with the ROUND_UP comments:

size = ROUND_UP(dev->msix_max * sizeof(struct MSIXTableEntry), MSIX_PAGE_SIZE);

> +    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,
>                             MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
>      if (dev->msix_table == MAP_FAILED) {
>          error_report("fail allocate msix_table! %s", strerror(errno));
>          return -EFAULT;
>      }
> +    if (!dev->msix_table) {
> +        return -EFAULT;
> +    }

This is a bogus test for mmap(2)

> 
> +    memset(dev->msix_table, 0, size);

This is unnecessary when assigned_dev_msix_reset() is fixed to memset
the whole mmap.

>      assigned_dev_msix_reset(dev);
> 
>      memory_region_init_io(&dev->mmio, OBJECT(dev), &assigned_dev_msix_mmio_ops,

This memory_region_init_io also requires a size parameter that isn't
updated for the new size.

As noted by other comments, the munmap() size isn't addressed.

IMHO, the correct way to fix this would be to update pci-assign to use
the msix infrastructure, but legacy KVM assignment is being phased out
and replaced by VFIO.  Is there something that ties you to pci-assign
instead of using vfio-pci?  Thanks,

Alex
Gonglei (Arei) April 2, 2014, 4:18 a.m. UTC | #4
> > Hi,

> >

> > I have a problem about SR-IOV pass-through.

> >

> > The PF is Emulex Corporation OneConnect NIC (Lancer)(rev 10),

> > and the VF pci config is as follow:

> >

> > LINUX:/sys/bus/pci/devices/0000:04:00.6 # hexdump config

> > 0000000 ffff ffff 0000 0010 0010 0200 0000 0080

> > 0000010 0000 0000 0000 0000 0000 0000 0000 0000

> > 0000020 0000 0000 0000 0000 0000 0000 10df e264

> > 0000030 0000 0000 0054 0000 0000 0000 0000 0000

> > 0000040 0000 0000 0008 0000 0000 0000 0000 0000

> > 0000050 0000 0000 6009 0008 2b41 c002 0000 0000

> > 0000060 7805 018a 0000 0000 0000 0000 0000 0000

> > 0000070 0000 0000 0000 0000 8411 03ff 4000 0000

> > 0000080 3400 0000 9403 0000 0000 0000 0000 0000

> > 0000090 0000 0000 0010 0002 8724 1000 0000 0000

> > 00000a0 dc83 0041 0000 0000 0000 0000 0000 0000

> > 00000b0 0000 0000 0000 0000 001f 0010 0000 0000

> > 00000c0 000e 0000 0000 0000 0000 0000 0000 0000

> > 00000d0 0000 0000 0000 0000 0000 0000 0000 0000

> >

> > We can see the msix_max is 0x3ff and msix_table_entry is 0x4000 (4 pages).

> But QEMU

> > only mmap MSIX_PAGE_SIZE memory for all pci devices in funciton

> assigned_dev_register_msix_mmio,

> > meanwhile the set the one page memmory to zero, so the rest memory will

> be random value

> > (maybe etnry.data is not 0).

> >

> > In function assigned_dev_update_msix_mmio maybe occur the issue of

> entry_nr > 256,

> > and the kmod reports the EINVAL error.

> >

> > My patch fix this issue which alloc memory according to the real size of pci

> device config.

> >

> > Any ideas? Thnaks.

> 

> Isn't this already fixed if you use vfio-pci?  pci-assign is not well

> supported anymore.

> 

No, I haven't tried it use vfio-pci. Maybe I will have a try.

> > Signed-off-by: Gonglei <arei.gonglei@huawei.com>

> > ---

> >  hw/i386/kvm/pci-assign.c |   24 +++++++++++++++++++-----

> >  1 files changed, 19 insertions(+), 5 deletions(-)

> >

> > diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c

> > index a825871..daa191c 100644

> > --- a/hw/i386/kvm/pci-assign.c

> > +++ b/hw/i386/kvm/pci-assign.c

> > @@ -1591,10 +1591,6 @@ static void

> assigned_dev_msix_reset(AssignedDevice *dev)

> >      MSIXTableEntry *entry;

> >      int i;

> >

> > -    if (!dev->msix_table) {

> > -        return;

> > -    }

> > -

> 

> How would this not result in a segfault if the assigned device doesn't

> support msix?

> 

Actually I move the judge in function assigned_dev_register_msix_mmio.
Because assigned_dev_register_msix_mmio do not address the return value,
if dev->msix_table is null, this will result a segfault. Right?

> >      memset(dev->msix_table, 0, MSIX_PAGE_SIZE);

> 

> This memset may no longer cover the entire table

> 

Yeah, this memset is useless. Do it in assigned_dev_register_msix_mmio.

> >

> >      for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {

> > @@ -1604,13 +1600,31 @@ static void

> assigned_dev_msix_reset(AssignedDevice *dev)

> >

> >  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)

> >  {

> > -    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE,

> PROT_READ|PROT_WRITE,

> > +    int nr_pages;

> > +    int size;

> > +    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct MSIXTableEntry);

> > +

> > +    if (dev->msix_max > entry_per_page) {

> > +        nr_pages = dev->msix_max / entry_per_page;

> > +        if (dev->msix_max % entry_per_page) {

> > +            nr_pages += 1;

> > +        }

> > +    } else {

> > +        nr_pages = 1;

> > +    }

> > +

> > +    size = MSIX_PAGE_SIZE * nr_pages;

> 

> Agree with the ROUND_UP comments:

> 

> size = ROUND_UP(dev->msix_max * sizeof(struct MSIXTableEntry),

> MSIX_PAGE_SIZE);

> 

Nice. I don't know the macro before. Thank you so much!
> > +    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,

> >                             MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);

> >      if (dev->msix_table == MAP_FAILED) {

> >          error_report("fail allocate msix_table! %s", strerror(errno));

> >          return -EFAULT;

> >      }

> > +    if (!dev->msix_table) {

> > +        return -EFAULT;

> > +    }

> 

> This is a bogus test for mmap(2)

> 

> >

> > +    memset(dev->msix_table, 0, size);

> 

> This is unnecessary when assigned_dev_msix_reset() is fixed to memset

> the whole mmap.

> 

> >      assigned_dev_msix_reset(dev);

> >

> >      memory_region_init_io(&dev->mmio, OBJECT(dev),

> &assigned_dev_msix_mmio_ops,

> 

> This memory_region_init_io also requires a size parameter that isn't

> updated for the new size.

> 

Nice catch.
> As noted by other comments, the munmap() size isn't addressed.

> 

Get it.
> IMHO, the correct way to fix this would be to update pci-assign to use

> the msix infrastructure, but legacy KVM assignment is being phased out

> and replaced by VFIO.  Is there something that ties you to pci-assign

> instead of using vfio-pci?  Thanks,

> 

I will have a try. Alex, What's your opinion about my former letter about the MSI-X maximum entry. 
Thanks,

Best regards,
-Gonglei
Alex Williamson April 2, 2014, 4:47 a.m. UTC | #5
On Wed, 2014-04-02 at 04:18 +0000, Gonglei (Arei) wrote:
> > > Hi,
> > >
> > > I have a problem about SR-IOV pass-through.
> > >
> > > The PF is Emulex Corporation OneConnect NIC (Lancer)(rev 10),
> > > and the VF pci config is as follow:
> > >
> > > LINUX:/sys/bus/pci/devices/0000:04:00.6 # hexdump config
> > > 0000000 ffff ffff 0000 0010 0010 0200 0000 0080
> > > 0000010 0000 0000 0000 0000 0000 0000 0000 0000
> > > 0000020 0000 0000 0000 0000 0000 0000 10df e264
> > > 0000030 0000 0000 0054 0000 0000 0000 0000 0000
> > > 0000040 0000 0000 0008 0000 0000 0000 0000 0000
> > > 0000050 0000 0000 6009 0008 2b41 c002 0000 0000
> > > 0000060 7805 018a 0000 0000 0000 0000 0000 0000
> > > 0000070 0000 0000 0000 0000 8411 03ff 4000 0000
> > > 0000080 3400 0000 9403 0000 0000 0000 0000 0000
> > > 0000090 0000 0000 0010 0002 8724 1000 0000 0000
> > > 00000a0 dc83 0041 0000 0000 0000 0000 0000 0000
> > > 00000b0 0000 0000 0000 0000 001f 0010 0000 0000
> > > 00000c0 000e 0000 0000 0000 0000 0000 0000 0000
> > > 00000d0 0000 0000 0000 0000 0000 0000 0000 0000
> > >
> > > We can see the msix_max is 0x3ff and msix_table_entry is 0x4000 (4 pages).
> > But QEMU
> > > only mmap MSIX_PAGE_SIZE memory for all pci devices in funciton
> > assigned_dev_register_msix_mmio,
> > > meanwhile the set the one page memmory to zero, so the rest memory will
> > be random value
> > > (maybe etnry.data is not 0).
> > >
> > > In function assigned_dev_update_msix_mmio maybe occur the issue of
> > entry_nr > 256,
> > > and the kmod reports the EINVAL error.
> > >
> > > My patch fix this issue which alloc memory according to the real size of pci
> > device config.
> > >
> > > Any ideas? Thnaks.
> > 
> > Isn't this already fixed if you use vfio-pci?  pci-assign is not well
> > supported anymore.
> > 
> No, I haven't tried it use vfio-pci. Maybe I will have a try.
> 
> > > Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> > > ---
> > >  hw/i386/kvm/pci-assign.c |   24 +++++++++++++++++++-----
> > >  1 files changed, 19 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
> > > index a825871..daa191c 100644
> > > --- a/hw/i386/kvm/pci-assign.c
> > > +++ b/hw/i386/kvm/pci-assign.c
> > > @@ -1591,10 +1591,6 @@ static void
> > assigned_dev_msix_reset(AssignedDevice *dev)
> > >      MSIXTableEntry *entry;
> > >      int i;
> > >
> > > -    if (!dev->msix_table) {
> > > -        return;
> > > -    }
> > > -
> > 
> > How would this not result in a segfault if the assigned device doesn't
> > support msix?
> > 
> Actually I move the judge in function assigned_dev_register_msix_mmio.
> Because assigned_dev_register_msix_mmio do not address the return value,
> if dev->msix_table is null, this will result a segfault. Right?

I see the confusion, there is a bug there but I think it should be fixed
by setting msix_table to NULL in the MAP_FAILED case.  The intent of
assigned_dev_msix_reset() is to put the msix table in the state it would
be in at reset.  Without a memset() that doesn't happen.  I believe the
reason we test msix_table in assigned_dev_msix_reset() is so that the
function could be called from anywhere, such as reset_assigned_device()
even though it's currently not called from there.  So, if the memset()
gets removed, then the whole function should be dissolved.  I'd prefer
to keep it and store or recalculate the size for memset.

> > >      memset(dev->msix_table, 0, MSIX_PAGE_SIZE);
> > 
> > This memset may no longer cover the entire table
> > 
> Yeah, this memset is useless. Do it in assigned_dev_register_msix_mmio.

Not useless, see above.

> > >
> > >      for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
> > > @@ -1604,13 +1600,31 @@ static void
> > assigned_dev_msix_reset(AssignedDevice *dev)
> > >
> > >  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
> > >  {
> > > -    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE,
> > PROT_READ|PROT_WRITE,
> > > +    int nr_pages;
> > > +    int size;
> > > +    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct MSIXTableEntry);
> > > +
> > > +    if (dev->msix_max > entry_per_page) {
> > > +        nr_pages = dev->msix_max / entry_per_page;
> > > +        if (dev->msix_max % entry_per_page) {
> > > +            nr_pages += 1;
> > > +        }
> > > +    } else {
> > > +        nr_pages = 1;
> > > +    }
> > > +
> > > +    size = MSIX_PAGE_SIZE * nr_pages;
> > 
> > Agree with the ROUND_UP comments:
> > 
> > size = ROUND_UP(dev->msix_max * sizeof(struct MSIXTableEntry),
> > MSIX_PAGE_SIZE);
> > 
> Nice. I don't know the macro before. Thank you so much!
> > > +    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,
> > >                             MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> > >      if (dev->msix_table == MAP_FAILED) {
> > >          error_report("fail allocate msix_table! %s", strerror(errno));
> > >          return -EFAULT;
> > >      }
> > > +    if (!dev->msix_table) {
> > > +        return -EFAULT;
> > > +    }
> > 
> > This is a bogus test for mmap(2)
> > 
> > >
> > > +    memset(dev->msix_table, 0, size);
> > 
> > This is unnecessary when assigned_dev_msix_reset() is fixed to memset
> > the whole mmap.
> > 
> > >      assigned_dev_msix_reset(dev);
> > >
> > >      memory_region_init_io(&dev->mmio, OBJECT(dev),
> > &assigned_dev_msix_mmio_ops,
> > 
> > This memory_region_init_io also requires a size parameter that isn't
> > updated for the new size.
> > 
> Nice catch.
> > As noted by other comments, the munmap() size isn't addressed.
> > 
> Get it.
> > IMHO, the correct way to fix this would be to update pci-assign to use
> > the msix infrastructure, but legacy KVM assignment is being phased out
> > and replaced by VFIO.  Is there something that ties you to pci-assign
> > instead of using vfio-pci?  Thanks,
> > 
> I will have a try. Alex, What's your opinion about my former letter about the MSI-X maximum entry. 

From other thread:

> BTW, do you think the KVM should upsize the max support MSI-X entry to 2048.
> Because the MSI-X supports a maximum table size of 2048 entries, which is descript in 
> PCI specification 3.0 version 6.8.3.2: "MSI-X Configuration".
>
> The history patch about downsize the MSI-X entry size to 256:
> http://thread.gmane.org/gmane.comp.emulators.kvm.devel/38852/focus=38849

No, I think we should deprecate KVM device assignment and use VFIO.
Thanks,

Alex
Gonglei (Arei) April 2, 2014, 8:50 a.m. UTC | #6
Hi, 
> > >

> > Actually I move the judge in function assigned_dev_register_msix_mmio.

> > Because assigned_dev_register_msix_mmio do not address the return value,

> > if dev->msix_table is null, this will result a segfault. Right?

> 

> I see the confusion, there is a bug there but I think it should be fixed

> by setting msix_table to NULL in the MAP_FAILED case.  The intent of

> assigned_dev_msix_reset() is to put the msix table in the state it would

> be in at reset.  Without a memset() that doesn't happen.  I believe the

> reason we test msix_table in assigned_dev_msix_reset() is so that the

> function could be called from anywhere, such as reset_assigned_device()

> even though it's currently not called from there.  So, if the memset()

> gets removed, then the whole function should be dissolved.  I'd prefer

> to keep it and store or recalculate the size for memset.

> 

Yep, agreed. Thank you so much. I want to post a formal patch, can I?

> > > >      memset(dev->msix_table, 0, MSIX_PAGE_SIZE);

> > >

> > > This memset may no longer cover the entire table

> > >

> > Yeah, this memset is useless. Do it in assigned_dev_register_msix_mmio.

> 

> Not useless, see above.

> 

> > > >

> > > >      for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++)

> {

> > > > @@ -1604,13 +1600,31 @@ static void

> > > assigned_dev_msix_reset(AssignedDevice *dev)

> > > >

> > > >  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)

> > > >  {

> > > > -    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE,

> > > PROT_READ|PROT_WRITE,

> > > > +    int nr_pages;

> > > > +    int size;

> > > > +    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct

> MSIXTableEntry);

> > > > +

> > > > +    if (dev->msix_max > entry_per_page) {

> > > > +        nr_pages = dev->msix_max / entry_per_page;

> > > > +        if (dev->msix_max % entry_per_page) {

> > > > +            nr_pages += 1;

> > > > +        }

> > > > +    } else {

> > > > +        nr_pages = 1;

> > > > +    }

> > > > +

> > > > +    size = MSIX_PAGE_SIZE * nr_pages;

> > >

> > > Agree with the ROUND_UP comments:

> > >

> > > size = ROUND_UP(dev->msix_max * sizeof(struct MSIXTableEntry),

> > > MSIX_PAGE_SIZE);

> > >

> > Nice. I don't know the macro before. Thank you so much!

> > > > +    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,

> > > >                             MAP_ANONYMOUS|MAP_PRIVATE, 0,

> 0);

> > > >      if (dev->msix_table == MAP_FAILED) {

> > > >          error_report("fail allocate msix_table! %s", strerror(errno));

> > > >          return -EFAULT;

> > > >      }

> > > > +    if (!dev->msix_table) {

> > > > +        return -EFAULT;

> > > > +    }

> > >

> > > This is a bogus test for mmap(2)

> > >

> > > >

> > > > +    memset(dev->msix_table, 0, size);

> > >

> > > This is unnecessary when assigned_dev_msix_reset() is fixed to memset

> > > the whole mmap.

> > >

> > > >      assigned_dev_msix_reset(dev);

> > > >

> > > >      memory_region_init_io(&dev->mmio, OBJECT(dev),

> > > &assigned_dev_msix_mmio_ops,

> > >

> > > This memory_region_init_io also requires a size parameter that isn't

> > > updated for the new size.

> > >

> > Nice catch.

> > > As noted by other comments, the munmap() size isn't addressed.

> > >

> > Get it.

> > > IMHO, the correct way to fix this would be to update pci-assign to use

> > > the msix infrastructure, but legacy KVM assignment is being phased out

> > > and replaced by VFIO.  Is there something that ties you to pci-assign

> > > instead of using vfio-pci?  Thanks,

> > >

> > I will have a try. Alex, What's your opinion about my former letter about the

> MSI-X maximum entry.

> 

> From other thread:

> 

> > BTW, do you think the KVM should upsize the max support MSI-X entry to

> 2048.

> > Because the MSI-X supports a maximum table size of 2048 entries, which is

> descript in

> > PCI specification 3.0 version 6.8.3.2: "MSI-X Configuration".

> >

> > The history patch about downsize the MSI-X entry size to 256:

> >

> http://thread.gmane.org/gmane.comp.emulators.kvm.devel/38852/focus=388

> 49

> 

> No, I think we should deprecate KVM device assignment and use VFIO.

> Thanks,

> 

OK. I will send the result of using VFIO later.

Best regards,
-Gonglei
Alex Williamson April 2, 2014, 3:41 p.m. UTC | #7
On Wed, 2014-04-02 at 08:50 +0000, Gonglei (Arei) wrote:
> Hi, 
> > > >
> > > Actually I move the judge in function assigned_dev_register_msix_mmio.
> > > Because assigned_dev_register_msix_mmio do not address the return value,
> > > if dev->msix_table is null, this will result a segfault. Right?
> > 
> > I see the confusion, there is a bug there but I think it should be fixed
> > by setting msix_table to NULL in the MAP_FAILED case.  The intent of
> > assigned_dev_msix_reset() is to put the msix table in the state it would
> > be in at reset.  Without a memset() that doesn't happen.  I believe the
> > reason we test msix_table in assigned_dev_msix_reset() is so that the
> > function could be called from anywhere, such as reset_assigned_device()
> > even though it's currently not called from there.  So, if the memset()
> > gets removed, then the whole function should be dissolved.  I'd prefer
> > to keep it and store or recalculate the size for memset.
> > 
> Yep, agreed. Thank you so much. I want to post a formal patch, can I?

Of course, bug fixes are always welcome.  Features and significant code
churn to pci-assign probably require some discussion as we'd really like
to see it phased out in favor of vfio-pci.

> > > > >      memset(dev->msix_table, 0, MSIX_PAGE_SIZE);
> > > >
> > > > This memset may no longer cover the entire table
> > > >
> > > Yeah, this memset is useless. Do it in assigned_dev_register_msix_mmio.
> > 
> > Not useless, see above.
> > 
> > > > >
> > > > >      for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++)
> > {
> > > > > @@ -1604,13 +1600,31 @@ static void
> > > > assigned_dev_msix_reset(AssignedDevice *dev)
> > > > >
> > > > >  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
> > > > >  {
> > > > > -    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE,
> > > > PROT_READ|PROT_WRITE,
> > > > > +    int nr_pages;
> > > > > +    int size;
> > > > > +    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct
> > MSIXTableEntry);
> > > > > +
> > > > > +    if (dev->msix_max > entry_per_page) {
> > > > > +        nr_pages = dev->msix_max / entry_per_page;
> > > > > +        if (dev->msix_max % entry_per_page) {
> > > > > +            nr_pages += 1;
> > > > > +        }
> > > > > +    } else {
> > > > > +        nr_pages = 1;
> > > > > +    }
> > > > > +
> > > > > +    size = MSIX_PAGE_SIZE * nr_pages;
> > > >
> > > > Agree with the ROUND_UP comments:
> > > >
> > > > size = ROUND_UP(dev->msix_max * sizeof(struct MSIXTableEntry),
> > > > MSIX_PAGE_SIZE);
> > > >
> > > Nice. I don't know the macro before. Thank you so much!
> > > > > +    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,
> > > > >                             MAP_ANONYMOUS|MAP_PRIVATE, 0,
> > 0);
> > > > >      if (dev->msix_table == MAP_FAILED) {
> > > > >          error_report("fail allocate msix_table! %s", strerror(errno));
> > > > >          return -EFAULT;
> > > > >      }
> > > > > +    if (!dev->msix_table) {
> > > > > +        return -EFAULT;
> > > > > +    }
> > > >
> > > > This is a bogus test for mmap(2)
> > > >
> > > > >
> > > > > +    memset(dev->msix_table, 0, size);
> > > >
> > > > This is unnecessary when assigned_dev_msix_reset() is fixed to memset
> > > > the whole mmap.
> > > >
> > > > >      assigned_dev_msix_reset(dev);
> > > > >
> > > > >      memory_region_init_io(&dev->mmio, OBJECT(dev),
> > > > &assigned_dev_msix_mmio_ops,
> > > >
> > > > This memory_region_init_io also requires a size parameter that isn't
> > > > updated for the new size.
> > > >
> > > Nice catch.
> > > > As noted by other comments, the munmap() size isn't addressed.
> > > >
> > > Get it.
> > > > IMHO, the correct way to fix this would be to update pci-assign to use
> > > > the msix infrastructure, but legacy KVM assignment is being phased out
> > > > and replaced by VFIO.  Is there something that ties you to pci-assign
> > > > instead of using vfio-pci?  Thanks,
> > > >
> > > I will have a try. Alex, What's your opinion about my former letter about the
> > MSI-X maximum entry.
> > 
> > From other thread:
> > 
> > > BTW, do you think the KVM should upsize the max support MSI-X entry to
> > 2048.
> > > Because the MSI-X supports a maximum table size of 2048 entries, which is
> > descript in
> > > PCI specification 3.0 version 6.8.3.2: "MSI-X Configuration".
> > >
> > > The history patch about downsize the MSI-X entry size to 256:
> > >
> > http://thread.gmane.org/gmane.comp.emulators.kvm.devel/38852/focus=388
> > 49
> > 
> > No, I think we should deprecate KVM device assignment and use VFIO.
> > Thanks,
> > 
> OK. I will send the result of using VFIO later.

Great, please let me know if you have any problems.  Thanks,

Alex
diff mbox

Patch

diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index a825871..daa191c 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1591,10 +1591,6 @@  static void assigned_dev_msix_reset(AssignedDevice *dev)
     MSIXTableEntry *entry;
     int i;

-    if (!dev->msix_table) {
-        return;
-    }
-
     memset(dev->msix_table, 0, MSIX_PAGE_SIZE);

     for (i = 0, entry = dev->msix_table; i < dev->msix_max; i++, entry++) {
@@ -1604,13 +1600,31 @@  static void assigned_dev_msix_reset(AssignedDevice *dev)

 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 {
-    dev->msix_table = mmap(NULL, MSIX_PAGE_SIZE, PROT_READ|PROT_WRITE,
+    int nr_pages;
+    int size;
+    int entry_per_page = MSIX_PAGE_SIZE / sizeof(struct MSIXTableEntry);
+    
+    if (dev->msix_max > entry_per_page) {
+        nr_pages = dev->msix_max / entry_per_page;
+        if (dev->msix_max % entry_per_page) {
+            nr_pages += 1;
+        }
+    } else {
+        nr_pages = 1;
+    }
+
+    size = MSIX_PAGE_SIZE * nr_pages;
+    dev->msix_table = mmap(NULL, size, PROT_READ|PROT_WRITE,
                            MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
     if (dev->msix_table == MAP_FAILED) {
         error_report("fail allocate msix_table! %s", strerror(errno));
         return -EFAULT;
     }
+    if (!dev->msix_table) {
+        return -EFAULT;
+    }

+    memset(dev->msix_table, 0, size);
     assigned_dev_msix_reset(dev);

     memory_region_init_io(&dev->mmio, OBJECT(dev), &assigned_dev_msix_mmio_ops,