Patchwork [v4,3/6] pci: set PCI multi-function bit appropriately.

login
register
mail settings
Submitter Isaku Yamahata
Date June 21, 2010, 6:03 a.m.
Message ID <99d849f1b4eeb12893447e78a6950c26a32088ac.1277100005.git.yamahata@valinux.co.jp>
Download mbox | patch
Permalink /patch/56291/
State New
Headers show

Comments

Isaku Yamahata - June 21, 2010, 6:03 a.m.
Set PCI multi-function bit according to multifunction property.
PCI address, devfn ,is exported to users as addr property,
so users can populate pci function(PCIDevice in qemu)
at arbitrary devfn.
It means each function(PCIDevice) don't know whether pci device
(PCIDevice[8]) is multi function or not.
So this patch allows user to set multifunction bit via property
and checks whether multifunction bit is set correctly.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

---
changes v3 -> v4:
- introduce multifunction property.

changes v2 -> v3:
- introduce PCI_FUNC_MAX
- more commit log

changes v1 -> v2:
---
 hw/pci.c |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 hw/pci.h |    4 ++++
 2 files changed, 61 insertions(+), 3 deletions(-)
Juan Quintela - June 21, 2010, 9:45 a.m.
Isaku Yamahata <yamahata@valinux.co.jp> wrote:
> Set PCI multi-function bit according to multifunction property.
> PCI address, devfn ,is exported to users as addr property,
> so users can populate pci function(PCIDevice in qemu)
> at arbitrary devfn.
> It means each function(PCIDevice) don't know whether pci device
> (PCIDevice[8]) is multi function or not.
> So this patch allows user to set multifunction bit via property
> and checks whether multifunction bit is set correctly.

[...]

> @@ -629,6 +668,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      if (is_bridge) {
>          pci_init_wmask_bridge(pci_dev);
>      }
> +    if (pci_init_multifunction(bus, pci_dev)) {
> +        return NULL;
> +    }

Don't we have here a mem leak?
what frees what pci_config_alloc() assignates in the error case?

Later, Juan.
Isaku Yamahata - June 21, 2010, 9:52 a.m.
On Mon, Jun 21, 2010 at 11:45:15AM +0200, Juan Quintela wrote:
> Isaku Yamahata <yamahata@valinux.co.jp> wrote:
> > Set PCI multi-function bit according to multifunction property.
> > PCI address, devfn ,is exported to users as addr property,
> > so users can populate pci function(PCIDevice in qemu)
> > at arbitrary devfn.
> > It means each function(PCIDevice) don't know whether pci device
> > (PCIDevice[8]) is multi function or not.
> > So this patch allows user to set multifunction bit via property
> > and checks whether multifunction bit is set correctly.
> 
> [...]
> 
> > @@ -629,6 +668,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >      if (is_bridge) {
> >          pci_init_wmask_bridge(pci_dev);
> >      }
> > +    if (pci_init_multifunction(bus, pci_dev)) {
> > +        return NULL;
> > +    }
> 
> Don't we have here a mem leak?
> what frees what pci_config_alloc() assignates in the error case?

Oh, you're right. Will fix it.
Michael S. Tsirkin - June 21, 2010, 12:36 p.m.
On Mon, Jun 21, 2010 at 03:03:58PM +0900, Isaku Yamahata wrote:
> Set PCI multi-function bit according to multifunction property.
> PCI address, devfn ,is exported to users as addr property,
> so users can populate pci function(PCIDevice in qemu)
> at arbitrary devfn.
> It means each function(PCIDevice) don't know whether pci device
> (PCIDevice[8]) is multi function or not.
> So this patch allows user to set multifunction bit via property
> and checks whether multifunction bit is set correctly.
> 
> Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>

Applying it this way will break bisect.
We also need to handle migration compatibility.
I propose we split it this way:
- patch to add multifunction property (ignored)
- set property in builtin devices where appropriate
- patch to look at property and set bit in header

> ---
> changes v3 -> v4:
> - introduce multifunction property.
> 
> changes v2 -> v3:
> - introduce PCI_FUNC_MAX
> - more commit log
> 
> changes v1 -> v2:
> ---
>  hw/pci.c |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  hw/pci.h |    4 ++++
>  2 files changed, 61 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/pci.c b/hw/pci.c
> index b6c0a10..abc3c1d 100644
> --- a/hw/pci.c
> +++ b/hw/pci.c
> @@ -67,6 +67,7 @@ static struct BusInfo pci_bus_info = {
>          DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
>          DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
>          DEFINE_PROP_UINT32("rombar",  PCIDevice, rom_bar, 1),
> +        DEFINE_PROP_UINT8("multifunction",  PCIDevice, mf, 0),

Please make this a bit property, not UINT8. It can be stored in
cap_present.

>          DEFINE_PROP_END_OF_LIST()
>      }
>  };
> @@ -575,6 +576,44 @@ static void pci_init_wmask_bridge(PCIDevice *d)
>      pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0xffff);
>  }
>  
> +static int pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
> +{

IMO we should just add in pci_register_device:

	if (d->cap_resent & QEMU_PCI_CAP_MULTIFUNCTION) {
		dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
	} else if (PCI_FUNC(dev->devfn)) {
		error_report("PCI: single function device can't be populated %x.%x",
			     PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
		return -1;
	}

And be done with it.

> +    uint8_t slot = PCI_SLOT(dev->devfn);
> +    uint8_t func = PCI_FUNC(dev->devfn);
> +
> +    /* we are here before bus->devices[dev->devfn] = dev */
> +    assert(!bus->devices[dev->devfn]);

Can users trigger this?
If yes, this needs and error, not an assert.

> +
> +    if (dev->mf) {
> +        dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> +    }
> +
> +    if (func) {

Please open-code func above.

> +        PCIDevice *d = bus->devices[PCI_DEVFN(slot, 0)];
> +        if (d && !d->mf) {
> +            /* function 0 should set multifunction bit */
> +            error_report("PCI: single function device can't be populated "
> +                         "in function %x.%x", slot, func);
> +            return -1;
> +        }
> +        return 0;
> +    }
> +
> +    if (dev->mf) {
> +        return 0;
> +    }
> +    /* function 0 indicates single function, so function > 0 must be NULL */


We don't need the below test: each function will be checked
when it is added.

> +    for (func = 1; func < PCI_FUNC_MAX; ++func) {
> +        if (bus->devices[PCI_DEVFN(slot, func)]) {
> +            error_report("PCI: %x.0 indicates single function, "
> +                         "but %x.%x is already populated.",
> +                         slot, slot, func);
> +            return -1;
> +        }
> +    }
> +    return 0;
> +}
> +
>  static void pci_config_alloc(PCIDevice *pci_dev)
>  {
>      int config_size = pci_config_size(pci_dev);
> @@ -629,6 +668,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
>      if (is_bridge) {
>          pci_init_wmask_bridge(pci_dev);
>      }
> +    if (pci_init_multifunction(bus, pci_dev)) {
> +        return NULL;
> +    }
>  
>      if (!config_read)
>          config_read = pci_default_read_config;
> @@ -1652,22 +1694,34 @@ void pci_qdev_register_many(PCIDeviceInfo *info)
>      }
>  }
>  
> -PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
> +PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name)
>  {
>      DeviceState *dev;
>  
>      dev = qdev_create(&bus->qbus, name);
>      qdev_prop_set_uint32(dev, "addr", devfn);
> +    qdev_prop_set_uint8(dev, "multifunction", mf);
>      return DO_UPCAST(PCIDevice, qdev, dev);
>  }
>  
> -PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> +PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
> +                                const char *name)
>  {
> -    PCIDevice *dev = pci_create(bus, devfn, name);
> +    PCIDevice *dev = pci_create_mf(bus, devfn, mf, name);
>      qdev_init_nofail(&dev->qdev);
>      return dev;
>  }
>  
> +PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
> +{
> +    return pci_create_mf(bus, devfn, 0, name);
> +}
> +
> +PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> +{
> +    return pci_create_simple_mf(bus, devfn, 0, name);
> +}
> +
>  static int pci_find_space(PCIDevice *pdev, uint8_t size)
>  {
>      int config_size = pci_config_size(pdev);
> diff --git a/hw/pci.h b/hw/pci.h
> index 76adc66..685fd44 100644
> --- a/hw/pci.h
> +++ b/hw/pci.h
> @@ -131,6 +131,7 @@ struct PCIDevice {
>      /* the following fields are read only */
>      PCIBus *bus;
>      uint32_t devfn;
> +    uint8_t mf;         /* multi function capabile device */

Add a bit in cap_present please.

>      char name[64];
>      PCIIORegion io_regions[PCI_NUM_REGIONS];
>  
> @@ -343,6 +344,9 @@ typedef struct {
>  void pci_qdev_register(PCIDeviceInfo *info);
>  void pci_qdev_register_many(PCIDeviceInfo *info);
>  
> +PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name);
> +PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
> +                                const char *name);

mf->multifunction

But do we need the extra functions? I thought qdev can handle
the flag?

>  PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name);
>  PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name);
>  
> -- 
> 1.6.6.1
Isaku Yamahata - June 23, 2010, 7:25 a.m.
On Mon, Jun 21, 2010 at 03:36:00PM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 21, 2010 at 03:03:58PM +0900, Isaku Yamahata wrote:
> > Set PCI multi-function bit according to multifunction property.
> > PCI address, devfn ,is exported to users as addr property,
> > so users can populate pci function(PCIDevice in qemu)
> > at arbitrary devfn.
> > It means each function(PCIDevice) don't know whether pci device
> > (PCIDevice[8]) is multi function or not.
> > So this patch allows user to set multifunction bit via property
> > and checks whether multifunction bit is set correctly.
> > 
> > Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
> 
> Applying it this way will break bisect.
> We also need to handle migration compatibility.
> I propose we split it this way:
> - patch to add multifunction property (ignored)
> - set property in builtin devices where appropriate
> - patch to look at property and set bit in header

Done.

> 
> > ---
> > changes v3 -> v4:
> > - introduce multifunction property.
> > 
> > changes v2 -> v3:
> > - introduce PCI_FUNC_MAX
> > - more commit log
> > 
> > changes v1 -> v2:
> > ---
> >  hw/pci.c |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> >  hw/pci.h |    4 ++++
> >  2 files changed, 61 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/pci.c b/hw/pci.c
> > index b6c0a10..abc3c1d 100644
> > --- a/hw/pci.c
> > +++ b/hw/pci.c
> > @@ -67,6 +67,7 @@ static struct BusInfo pci_bus_info = {
> >          DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> >          DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
> >          DEFINE_PROP_UINT32("rombar",  PCIDevice, rom_bar, 1),
> > +        DEFINE_PROP_UINT8("multifunction",  PCIDevice, mf, 0),
> 
> Please make this a bit property, not UINT8. It can be stored in
> cap_present.
> 
> >          DEFINE_PROP_END_OF_LIST()
> >      }
> >  };
> > @@ -575,6 +576,44 @@ static void pci_init_wmask_bridge(PCIDevice *d)
> >      pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0xffff);
> >  }
> >  
> > +static int pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
> > +{
> 
> IMO we should just add in pci_register_device:
> 
> 	if (d->cap_resent & QEMU_PCI_CAP_MULTIFUNCTION) {
> 		dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> 	} else if (PCI_FUNC(dev->devfn)) {
> 		error_report("PCI: single function device can't be populated %x.%x",
> 			     PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
> 		return -1;
> 	}
> 
> And be done with it.

Unfortunately there are two ways to set the bit.
- set the bit of all the function.
  Example: Intel X58(north bridge.)
- set the bit of only function = 0.
  Example: PIIX3, PIIX4, ... ICH10.

lspci -x would help to see what your pc has.


> 
> > +    uint8_t slot = PCI_SLOT(dev->devfn);
> > +    uint8_t func = PCI_FUNC(dev->devfn);
> > +
> > +    /* we are here before bus->devices[dev->devfn] = dev */
> > +    assert(!bus->devices[dev->devfn]);
> 
> Can users trigger this?
> If yes, this needs and error, not an assert.
> 
> > +
> > +    if (dev->mf) {
> > +        dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> > +    }
> > +
> > +    if (func) {
> 
> Please open-code func above.
> 
> > +        PCIDevice *d = bus->devices[PCI_DEVFN(slot, 0)];
> > +        if (d && !d->mf) {
> > +            /* function 0 should set multifunction bit */
> > +            error_report("PCI: single function device can't be populated "
> > +                         "in function %x.%x", slot, func);
> > +            return -1;
> > +        }
> > +        return 0;
> > +    }
> > +
> > +    if (dev->mf) {
> > +        return 0;
> > +    }
> > +    /* function 0 indicates single function, so function > 0 must be NULL */
> 
> 
> We don't need the below test: each function will be checked
> when it is added.
> 
> > +    for (func = 1; func < PCI_FUNC_MAX; ++func) {
> > +        if (bus->devices[PCI_DEVFN(slot, func)]) {
> > +            error_report("PCI: %x.0 indicates single function, "
> > +                         "but %x.%x is already populated.",
> > +                         slot, slot, func);
> > +            return -1;
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> >  static void pci_config_alloc(PCIDevice *pci_dev)
> >  {
> >      int config_size = pci_config_size(pci_dev);
> > @@ -629,6 +668,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> >      if (is_bridge) {
> >          pci_init_wmask_bridge(pci_dev);
> >      }
> > +    if (pci_init_multifunction(bus, pci_dev)) {
> > +        return NULL;
> > +    }
> >  
> >      if (!config_read)
> >          config_read = pci_default_read_config;
> > @@ -1652,22 +1694,34 @@ void pci_qdev_register_many(PCIDeviceInfo *info)
> >      }
> >  }
> >  
> > -PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
> > +PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name)
> >  {
> >      DeviceState *dev;
> >  
> >      dev = qdev_create(&bus->qbus, name);
> >      qdev_prop_set_uint32(dev, "addr", devfn);
> > +    qdev_prop_set_uint8(dev, "multifunction", mf);
> >      return DO_UPCAST(PCIDevice, qdev, dev);
> >  }
> >  
> > -PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> > +PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
> > +                                const char *name)
> >  {
> > -    PCIDevice *dev = pci_create(bus, devfn, name);
> > +    PCIDevice *dev = pci_create_mf(bus, devfn, mf, name);
> >      qdev_init_nofail(&dev->qdev);
> >      return dev;
> >  }
> >  
> > +PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
> > +{
> > +    return pci_create_mf(bus, devfn, 0, name);
> > +}
> > +
> > +PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> > +{
> > +    return pci_create_simple_mf(bus, devfn, 0, name);
> > +}
> > +
> >  static int pci_find_space(PCIDevice *pdev, uint8_t size)
> >  {
> >      int config_size = pci_config_size(pdev);
> > diff --git a/hw/pci.h b/hw/pci.h
> > index 76adc66..685fd44 100644
> > --- a/hw/pci.h
> > +++ b/hw/pci.h
> > @@ -131,6 +131,7 @@ struct PCIDevice {
> >      /* the following fields are read only */
> >      PCIBus *bus;
> >      uint32_t devfn;
> > +    uint8_t mf;         /* multi function capabile device */
> 
> Add a bit in cap_present please.
> 
> >      char name[64];
> >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> >  
> > @@ -343,6 +344,9 @@ typedef struct {
> >  void pci_qdev_register(PCIDeviceInfo *info);
> >  void pci_qdev_register_many(PCIDeviceInfo *info);
> >  
> > +PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name);
> > +PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
> > +                                const char *name);
> 
> mf->multifunction
> 
> But do we need the extra functions? I thought qdev can handle
> the flag?

If the device tree was fully created via qdev from configuration file,
that would be correct.
However the  conversion isn't completed, so convenient functions
are exported to the code which creates the device tree.
Please notice that devfn here is also property.

While I could add one more parameter to pci_create_simple(),
I would have to touch many callers.

> 
> >  PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name);
> >  PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name);
> >  
> > -- 
> > 1.6.6.1
>
Michael S. Tsirkin - June 23, 2010, 9:52 a.m.
On Wed, Jun 23, 2010 at 04:25:20PM +0900, Isaku Yamahata wrote:
> On Mon, Jun 21, 2010 at 03:36:00PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 21, 2010 at 03:03:58PM +0900, Isaku Yamahata wrote:
> > > Set PCI multi-function bit according to multifunction property.
> > > PCI address, devfn ,is exported to users as addr property,
> > > so users can populate pci function(PCIDevice in qemu)
> > > at arbitrary devfn.
> > > It means each function(PCIDevice) don't know whether pci device
> > > (PCIDevice[8]) is multi function or not.
> > > So this patch allows user to set multifunction bit via property
> > > and checks whether multifunction bit is set correctly.
> > > 
> > > Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
> > 
> > Applying it this way will break bisect.
> > We also need to handle migration compatibility.
> > I propose we split it this way:
> > - patch to add multifunction property (ignored)
> > - set property in builtin devices where appropriate
> > - patch to look at property and set bit in header
> 
> Done.
> 
> > 
> > > ---
> > > changes v3 -> v4:
> > > - introduce multifunction property.
> > > 
> > > changes v2 -> v3:
> > > - introduce PCI_FUNC_MAX
> > > - more commit log
> > > 
> > > changes v1 -> v2:
> > > ---
> > >  hw/pci.c |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
> > >  hw/pci.h |    4 ++++
> > >  2 files changed, 61 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/hw/pci.c b/hw/pci.c
> > > index b6c0a10..abc3c1d 100644
> > > --- a/hw/pci.c
> > > +++ b/hw/pci.c
> > > @@ -67,6 +67,7 @@ static struct BusInfo pci_bus_info = {
> > >          DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
> > >          DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
> > >          DEFINE_PROP_UINT32("rombar",  PCIDevice, rom_bar, 1),
> > > +        DEFINE_PROP_UINT8("multifunction",  PCIDevice, mf, 0),
> > 
> > Please make this a bit property, not UINT8. It can be stored in
> > cap_present.
> > 
> > >          DEFINE_PROP_END_OF_LIST()
> > >      }
> > >  };
> > > @@ -575,6 +576,44 @@ static void pci_init_wmask_bridge(PCIDevice *d)
> > >      pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0xffff);
> > >  }
> > >  
> > > +static int pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
> > > +{
> > 
> > IMO we should just add in pci_register_device:
> > 
> > 	if (d->cap_resent & QEMU_PCI_CAP_MULTIFUNCTION) {
> > 		dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> > 	} else if (PCI_FUNC(dev->devfn)) {
> > 		error_report("PCI: single function device can't be populated %x.%x",
> > 			     PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
> > 		return -1;
> > 	}
> > 
> > And be done with it.
> 
> Unfortunately there are two ways to set the bit.
> - set the bit of all the function.
>   Example: Intel X58(north bridge.)
> - set the bit of only function = 0.
>   Example: PIIX3, PIIX4, ... ICH10.
> 
> lspci -x would help to see what your pc has.

This is correct:
	The order in which configuration software probes devices residing on a
	bus segment is not specified. Typically, configuration software either
	starts with Device Number 0 and works up or starts at Device Number 31
	and works down. If a single function device is detected (i.e., bit 7 in
	the Header Type register of function 0 is 0), no more functions for that
	Device Number will be checked. If a multi-function device is detected
	(i.e., bit 7 in the Header Type register of function 0 is 1), then all
	remaining Function Numbers will be checked.

So what my proposal would do is set the bit for all functions.
I don't think it matters - do you?
If you want to try and match the behaviour you observe
in actual hardware exactly, we can add
	/* Some devices only set multifunction status bit in function 0. */
	static void pci_clear_multifunction(...) {
		if (PCI_FUNC(dev->devfn))
			dev->config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
	}

and devices can call this in their init routine.

> > 
> > > +    uint8_t slot = PCI_SLOT(dev->devfn);
> > > +    uint8_t func = PCI_FUNC(dev->devfn);
> > > +
> > > +    /* we are here before bus->devices[dev->devfn] = dev */
> > > +    assert(!bus->devices[dev->devfn]);
> > 
> > Can users trigger this?
> > If yes, this needs and error, not an assert.
> > 
> > > +
> > > +    if (dev->mf) {
> > > +        dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> > > +    }
> > > +
> > > +    if (func) {
> > 
> > Please open-code func above.
> > 
> > > +        PCIDevice *d = bus->devices[PCI_DEVFN(slot, 0)];
> > > +        if (d && !d->mf) {
> > > +            /* function 0 should set multifunction bit */
> > > +            error_report("PCI: single function device can't be populated "
> > > +                         "in function %x.%x", slot, func);
> > > +            return -1;
> > > +        }
> > > +        return 0;
> > > +    }
> > > +
> > > +    if (dev->mf) {
> > > +        return 0;
> > > +    }
> > > +    /* function 0 indicates single function, so function > 0 must be NULL */
> > 
> > 
> > We don't need the below test: each function will be checked
> > when it is added.
> > 
> > > +    for (func = 1; func < PCI_FUNC_MAX; ++func) {
> > > +        if (bus->devices[PCI_DEVFN(slot, func)]) {
> > > +            error_report("PCI: %x.0 indicates single function, "
> > > +                         "but %x.%x is already populated.",
> > > +                         slot, slot, func);
> > > +            return -1;
> > > +        }
> > > +    }
> > > +    return 0;
> > > +}
> > > +
> > >  static void pci_config_alloc(PCIDevice *pci_dev)
> > >  {
> > >      int config_size = pci_config_size(pci_dev);
> > > @@ -629,6 +668,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
> > >      if (is_bridge) {
> > >          pci_init_wmask_bridge(pci_dev);
> > >      }
> > > +    if (pci_init_multifunction(bus, pci_dev)) {
> > > +        return NULL;
> > > +    }
> > >  
> > >      if (!config_read)
> > >          config_read = pci_default_read_config;
> > > @@ -1652,22 +1694,34 @@ void pci_qdev_register_many(PCIDeviceInfo *info)
> > >      }
> > >  }
> > >  
> > > -PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
> > > +PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name)
> > >  {
> > >      DeviceState *dev;
> > >  
> > >      dev = qdev_create(&bus->qbus, name);
> > >      qdev_prop_set_uint32(dev, "addr", devfn);
> > > +    qdev_prop_set_uint8(dev, "multifunction", mf);
> > >      return DO_UPCAST(PCIDevice, qdev, dev);
> > >  }
> > >  
> > > -PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> > > +PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
> > > +                                const char *name)
> > >  {
> > > -    PCIDevice *dev = pci_create(bus, devfn, name);
> > > +    PCIDevice *dev = pci_create_mf(bus, devfn, mf, name);
> > >      qdev_init_nofail(&dev->qdev);
> > >      return dev;
> > >  }
> > >  
> > > +PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
> > > +{
> > > +    return pci_create_mf(bus, devfn, 0, name);
> > > +}
> > > +
> > > +PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
> > > +{
> > > +    return pci_create_simple_mf(bus, devfn, 0, name);
> > > +}
> > > +
> > >  static int pci_find_space(PCIDevice *pdev, uint8_t size)
> > >  {
> > >      int config_size = pci_config_size(pdev);
> > > diff --git a/hw/pci.h b/hw/pci.h
> > > index 76adc66..685fd44 100644
> > > --- a/hw/pci.h
> > > +++ b/hw/pci.h
> > > @@ -131,6 +131,7 @@ struct PCIDevice {
> > >      /* the following fields are read only */
> > >      PCIBus *bus;
> > >      uint32_t devfn;
> > > +    uint8_t mf;         /* multi function capabile device */
> > 
> > Add a bit in cap_present please.
> > 
> > >      char name[64];
> > >      PCIIORegion io_regions[PCI_NUM_REGIONS];
> > >  
> > > @@ -343,6 +344,9 @@ typedef struct {
> > >  void pci_qdev_register(PCIDeviceInfo *info);
> > >  void pci_qdev_register_many(PCIDeviceInfo *info);
> > >  
> > > +PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name);
> > > +PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
> > > +                                const char *name);
> > 
> > mf->multifunction
> > 
> > But do we need the extra functions? I thought qdev can handle
> > the flag?
> 
> If the device tree was fully created via qdev from configuration file,
> that would be correct.
> However the  conversion isn't completed, so convenient functions
> are exported to the code which creates the device tree.
> Please notice that devfn here is also property.
> 
> While I could add one more parameter to pci_create_simple(),
> I would have to touch many callers.
> 
> > 
> > >  PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name);
> > >  PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name);
> > >  
> > > -- 
> > > 1.6.6.1
> > 
> 
> -- 
> yamahata
Isaku Yamahata - June 23, 2010, 10:13 a.m.
On Wed, Jun 23, 2010 at 12:52:10PM +0300, Michael S. Tsirkin wrote:
> > > > @@ -575,6 +576,44 @@ static void pci_init_wmask_bridge(PCIDevice *d)
> > > >      pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0xffff);
> > > >  }
> > > >  
> > > > +static int pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
> > > > +{
> > > 
> > > IMO we should just add in pci_register_device:
> > > 
> > > 	if (d->cap_resent & QEMU_PCI_CAP_MULTIFUNCTION) {
> > > 		dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> > > 	} else if (PCI_FUNC(dev->devfn)) {
> > > 		error_report("PCI: single function device can't be populated %x.%x",
> > > 			     PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
> > > 		return -1;
> > > 	}
> > > 
> > > And be done with it.
> > 
> > Unfortunately there are two ways to set the bit.
> > - set the bit of all the function.
> >   Example: Intel X58(north bridge.)
> > - set the bit of only function = 0.
> >   Example: PIIX3, PIIX4, ... ICH10.
> > 
> > lspci -x would help to see what your pc has.
> 
> This is correct:
> 	The order in which configuration software probes devices residing on a
> 	bus segment is not specified. Typically, configuration software either
> 	starts with Device Number 0 and works up or starts at Device Number 31
> 	and works down. If a single function device is detected (i.e., bit 7 in
> 	the Header Type register of function 0 is 0), no more functions for that
> 	Device Number will be checked. If a multi-function device is detected
> 	(i.e., bit 7 in the Header Type register of function 0 is 1), then all
> 	remaining Function Numbers will be checked.
> 
> So what my proposal would do is set the bit for all functions.
> I don't think it matters - do you?
> If you want to try and match the behaviour you observe
> in actual hardware exactly, we can add
> 	/* Some devices only set multifunction status bit in function 0. */
> 	static void pci_clear_multifunction(...) {
> 		if (PCI_FUNC(dev->devfn))
> 			dev->config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
> 	}
> 
> and devices can call this in their init routine.

Personally I'm okay with either way as long as you accept the patch series.

In fact the existing qemu PIIX3/4 sets the bit of only function 0
and doesn't set the bit of function > 0.
- It would be better not to change the existing behavior.
- If all functions in a device are required to set multifunction bit,
  pci ide and ochi usb initialization code must be touched
  for pc and mips malta.

Said that, which way do you want to go?
- The current patches.(v5 9/9)
  My preference.

- require all functions in a device to set multi function bit.
  patch pci ide, ochi usb
  It will result in qemu behavior change.

- require all functions in a device to set multi function bit.
  patch pci ide, ochi usb.
  But try not to chage the existing qemu behavior by using
  pci_clear_multifunction()
Michael S. Tsirkin - June 23, 2010, 10:41 a.m.
On Wed, Jun 23, 2010 at 07:13:38PM +0900, Isaku Yamahata wrote:
> On Wed, Jun 23, 2010 at 12:52:10PM +0300, Michael S. Tsirkin wrote:
> > > > > @@ -575,6 +576,44 @@ static void pci_init_wmask_bridge(PCIDevice *d)
> > > > >      pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0xffff);
> > > > >  }
> > > > >  
> > > > > +static int pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
> > > > > +{
> > > > 
> > > > IMO we should just add in pci_register_device:
> > > > 
> > > > 	if (d->cap_resent & QEMU_PCI_CAP_MULTIFUNCTION) {
> > > > 		dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
> > > > 	} else if (PCI_FUNC(dev->devfn)) {
> > > > 		error_report("PCI: single function device can't be populated %x.%x",
> > > > 			     PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
> > > > 		return -1;
> > > > 	}
> > > > 
> > > > And be done with it.
> > > 
> > > Unfortunately there are two ways to set the bit.
> > > - set the bit of all the function.
> > >   Example: Intel X58(north bridge.)
> > > - set the bit of only function = 0.
> > >   Example: PIIX3, PIIX4, ... ICH10.
> > > 
> > > lspci -x would help to see what your pc has.
> > 
> > This is correct:
> > 	The order in which configuration software probes devices residing on a
> > 	bus segment is not specified. Typically, configuration software either
> > 	starts with Device Number 0 and works up or starts at Device Number 31
> > 	and works down. If a single function device is detected (i.e., bit 7 in
> > 	the Header Type register of function 0 is 0), no more functions for that
> > 	Device Number will be checked. If a multi-function device is detected
> > 	(i.e., bit 7 in the Header Type register of function 0 is 1), then all
> > 	remaining Function Numbers will be checked.
> > 
> > So what my proposal would do is set the bit for all functions.
> > I don't think it matters - do you?
> > If you want to try and match the behaviour you observe
> > in actual hardware exactly, we can add
> > 	/* Some devices only set multifunction status bit in function 0. */
> > 	static void pci_clear_multifunction(...) {
> > 		if (PCI_FUNC(dev->devfn))
> > 			dev->config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
> > 	}
> > 
> > and devices can call this in their init routine.
> 
> Personally I'm okay with either way as long as you accept the patch series.
> 
> In fact the existing qemu PIIX3/4 sets the bit of only function 0
> and doesn't set the bit of function > 0.
> - It would be better not to change the existing behavior.
> - If all functions in a device are required to set multifunction bit,
>   pci ide and ochi usb initialization code must be touched
>   for pc and mips malta.
> Said that, which way do you want to go?
> - The current patches.(v5 9/9)
>   My preference.

I think that your patchset is correct, I'll take it after a bit of review.
I will try to find a bit of time to rearrange the code in pci.c a bit,
but this can come afterwards.

I think it's unfortunate that we need to scan the bus to check
other devices in the same function, but I don't have better ideas.

> - require all functions in a device to set multi function bit.
>   patch pci ide, ochi usb
>   It will result in qemu behavior change.
> 
> - require all functions in a device to set multi function bit.
>   patch pci ide, ochi usb.
>   But try not to chage the existing qemu behavior by using
>   pci_clear_multifunction()
> 
> -- 
> yamahata
Isaku Yamahata - June 23, 2010, 11:48 p.m.
On Wed, Jun 23, 2010 at 01:41:57PM +0300, Michael S. Tsirkin wrote:
> I will try to find a bit of time to rearrange the code in pci.c a bit,
> but this can come afterwards.

I'd like to move pci_bridge_xxx into pci_bridge.c because
pci.c has grown.
Are you okay with it? Or should I wait for your rearrange?
I'd like to avoid stepping on each other.
Michael S. Tsirkin - June 24, 2010, 8:19 a.m.
On Thu, Jun 24, 2010 at 08:48:42AM +0900, Isaku Yamahata wrote:
> On Wed, Jun 23, 2010 at 01:41:57PM +0300, Michael S. Tsirkin wrote:
> > I will try to find a bit of time to rearrange the code in pci.c a bit,
> > but this can come afterwards.
> 
> I'd like to move pci_bridge_xxx into pci_bridge.c because
> pci.c has grown.
> Are you okay with it? Or should I wait for your rearrange?
> I'd like to avoid stepping on each other.

I'm looking at rearranging it myself, but doing some API cleanup first.
It will be easier if you keep adding to pci.c meanwhile.

> -- 
> yamahata

Patch

diff --git a/hw/pci.c b/hw/pci.c
index b6c0a10..abc3c1d 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -67,6 +67,7 @@  static struct BusInfo pci_bus_info = {
         DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1),
         DEFINE_PROP_STRING("romfile", PCIDevice, romfile),
         DEFINE_PROP_UINT32("rombar",  PCIDevice, rom_bar, 1),
+        DEFINE_PROP_UINT8("multifunction",  PCIDevice, mf, 0),
         DEFINE_PROP_END_OF_LIST()
     }
 };
@@ -575,6 +576,44 @@  static void pci_init_wmask_bridge(PCIDevice *d)
     pci_set_word(d->wmask + PCI_BRIDGE_CONTROL, 0xffff);
 }
 
+static int pci_init_multifunction(PCIBus *bus, PCIDevice *dev)
+{
+    uint8_t slot = PCI_SLOT(dev->devfn);
+    uint8_t func = PCI_FUNC(dev->devfn);
+
+    /* we are here before bus->devices[dev->devfn] = dev */
+    assert(!bus->devices[dev->devfn]);
+
+    if (dev->mf) {
+        dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
+    }
+
+    if (func) {
+        PCIDevice *d = bus->devices[PCI_DEVFN(slot, 0)];
+        if (d && !d->mf) {
+            /* function 0 should set multifunction bit */
+            error_report("PCI: single function device can't be populated "
+                         "in function %x.%x", slot, func);
+            return -1;
+        }
+        return 0;
+    }
+
+    if (dev->mf) {
+        return 0;
+    }
+    /* function 0 indicates single function, so function > 0 must be NULL */
+    for (func = 1; func < PCI_FUNC_MAX; ++func) {
+        if (bus->devices[PCI_DEVFN(slot, func)]) {
+            error_report("PCI: %x.0 indicates single function, "
+                         "but %x.%x is already populated.",
+                         slot, slot, func);
+            return -1;
+        }
+    }
+    return 0;
+}
+
 static void pci_config_alloc(PCIDevice *pci_dev)
 {
     int config_size = pci_config_size(pci_dev);
@@ -629,6 +668,9 @@  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     if (is_bridge) {
         pci_init_wmask_bridge(pci_dev);
     }
+    if (pci_init_multifunction(bus, pci_dev)) {
+        return NULL;
+    }
 
     if (!config_read)
         config_read = pci_default_read_config;
@@ -1652,22 +1694,34 @@  void pci_qdev_register_many(PCIDeviceInfo *info)
     }
 }
 
-PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
+PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name)
 {
     DeviceState *dev;
 
     dev = qdev_create(&bus->qbus, name);
     qdev_prop_set_uint32(dev, "addr", devfn);
+    qdev_prop_set_uint8(dev, "multifunction", mf);
     return DO_UPCAST(PCIDevice, qdev, dev);
 }
 
-PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
+PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
+                                const char *name)
 {
-    PCIDevice *dev = pci_create(bus, devfn, name);
+    PCIDevice *dev = pci_create_mf(bus, devfn, mf, name);
     qdev_init_nofail(&dev->qdev);
     return dev;
 }
 
+PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name)
+{
+    return pci_create_mf(bus, devfn, 0, name);
+}
+
+PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
+{
+    return pci_create_simple_mf(bus, devfn, 0, name);
+}
+
 static int pci_find_space(PCIDevice *pdev, uint8_t size)
 {
     int config_size = pci_config_size(pdev);
diff --git a/hw/pci.h b/hw/pci.h
index 76adc66..685fd44 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -131,6 +131,7 @@  struct PCIDevice {
     /* the following fields are read only */
     PCIBus *bus;
     uint32_t devfn;
+    uint8_t mf;         /* multi function capabile device */
     char name[64];
     PCIIORegion io_regions[PCI_NUM_REGIONS];
 
@@ -343,6 +344,9 @@  typedef struct {
 void pci_qdev_register(PCIDeviceInfo *info);
 void pci_qdev_register_many(PCIDeviceInfo *info);
 
+PCIDevice *pci_create_mf(PCIBus *bus, int devfn, uint8_t mf, const char *name);
+PCIDevice *pci_create_simple_mf(PCIBus *bus, int devfn, uint8_t mf,
+                                const char *name);
 PCIDevice *pci_create(PCIBus *bus, int devfn, const char *name);
 PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name);