Patchwork [1/6] Make config space accessor host bus trapable

login
register
mail settings
Submitter Alexander Graf
Date Jan. 3, 2010, 1:50 a.m.
Message ID <1262483450-15206-2-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/42034/
State New
Headers show

Comments

Alexander Graf - Jan. 3, 2010, 1:50 a.m.
Different host buses may have different layouts for config space accessors.

The Mac U3 for example uses the following define to access Type 0 (directly
attached) devices:

  #define MACRISC_CFA0(devfn, off)        \
        ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
        | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
        | (((unsigned int)(off)) & 0xFCUL))

Obviously, this is different from the encoding we interpret in qemu. In
order to let host controllers take some influence there, we can just
introduce a converter function that takes whatever accessor we have and
makes a qemu understandable one out of it.

This patch is the groundwork for Uninorth PCI config space fixes.

Signed-off-by: Alexander Graf <agraf@suse.de>
CC: Michael S. Tsirkin <mst@redhat.com>
---
 hw/apb_pci.c           |    1 +
 hw/grackle_pci.c       |    1 +
 hw/gt64xxx.c           |    1 +
 hw/pci_host.c          |   11 +++++++++++
 hw/pci_host.h          |    5 +++++
 hw/pci_host_template.h |   30 ++++++++++++++++++------------
 hw/piix_pci.c          |    1 +
 hw/ppc4xx_pci.c        |    1 +
 hw/ppce500_pci.c       |    1 +
 hw/unin_pci.c          |    1 +
 10 files changed, 41 insertions(+), 12 deletions(-)
Michael S. Tsirkin - Jan. 3, 2010, 3:45 p.m.
On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
> Different host buses may have different layouts for config space accessors.
> 
> The Mac U3 for example uses the following define to access Type 0 (directly
> attached) devices:
> 
>   #define MACRISC_CFA0(devfn, off)        \
>         ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>         | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>         | (((unsigned int)(off)) & 0xFCUL))
> 
> Obviously, this is different from the encoding we interpret in qemu. In
> order to let host controllers take some influence there, we can just
> introduce a converter function that takes whatever accessor we have and
> makes a qemu understandable one out of it.
> 
> This patch is the groundwork for Uninorth PCI config space fixes.
> 
> Signed-off-by: Alexander Graf <agraf@suse.de>
> CC: Michael S. Tsirkin <mst@redhat.com>

Thanks!

It always bothered me that the code in pci_host uses x86 encoding and
other architectures are converted to x86.  As long as we are adding
redirection, maybe we should get rid of this, and make get_config_reg
return register and devfn separately, so we don't need to encode/decode
back and forth?

OTOH if we are after a quick fix, won't it be simpler to have
unin_pci simply use its own routines instead of pci_host_conf_register_mmio
etc?

> ---
>  hw/apb_pci.c           |    1 +
>  hw/grackle_pci.c       |    1 +
>  hw/gt64xxx.c           |    1 +
>  hw/pci_host.c          |   11 +++++++++++
>  hw/pci_host.h          |    5 +++++
>  hw/pci_host_template.h |   30 ++++++++++++++++++------------
>  hw/piix_pci.c          |    1 +
>  hw/ppc4xx_pci.c        |    1 +
>  hw/ppce500_pci.c       |    1 +
>  hw/unin_pci.c          |    1 +
>  10 files changed, 41 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/apb_pci.c b/hw/apb_pci.c
> index f05308b..fedb84c 100644
> --- a/hw/apb_pci.c
> +++ b/hw/apb_pci.c
> @@ -222,6 +222,7 @@ PCIBus *pci_apb_init(target_phys_addr_t special_base,
>      /* mem_data */
>      sysbus_mmio_map(s, 3, mem_base);
>      d = FROM_SYSBUS(APBState, s);
> +    pci_host_init(&d->host_state);
>      d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
>                                           pci_apb_set_irq, pci_pbm_map_irq, pic,
>                                           0, 32);
> diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c
> index ee4fed5..7feba63 100644
> --- a/hw/grackle_pci.c
> +++ b/hw/grackle_pci.c
> @@ -88,6 +88,7 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic)
>      qdev_init_nofail(dev);
>      s = sysbus_from_qdev(dev);
>      d = FROM_SYSBUS(GrackleState, s);
> +    pci_host_init(&d->host_state);
>      d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
>                                           pci_grackle_set_irq,
>                                           pci_grackle_map_irq,
> diff --git a/hw/gt64xxx.c b/hw/gt64xxx.c
> index fb7f5bd..8cf93ca 100644
> --- a/hw/gt64xxx.c
> +++ b/hw/gt64xxx.c
> @@ -1120,6 +1120,7 @@ PCIBus *pci_gt64120_init(qemu_irq *pic)
>      s = qemu_mallocz(sizeof(GT64120State));
>      s->pci = qemu_mallocz(sizeof(GT64120PCIState));
>  
> +    pci_host_init(s->pci);
>      s->pci->bus = pci_register_bus(NULL, "pci",
>                                     pci_gt64120_set_irq, pci_gt64120_map_irq,
>                                     pic, 144, 4);
> diff --git a/hw/pci_host.c b/hw/pci_host.c
> index eeb8dee..2d12887 100644
> --- a/hw/pci_host.c
> +++ b/hw/pci_host.c
> @@ -228,3 +228,14 @@ void pci_host_data_register_ioport(pio_addr_t ioport, PCIHostState *s)
>      register_ioport_read(ioport, 4, 2, pci_host_data_readw_ioport, s);
>      register_ioport_read(ioport, 4, 4, pci_host_data_readl_ioport, s);
>  }
> +
> +/* This implements the default x86 way of interpreting the config register */
> +static uint32_t pci_host_get_config_reg(PCIHostState *s, uint32_t addr)
> +{
> +    return s->config_reg | (addr & 3);
> +}
> +
> +void pci_host_init(PCIHostState *s)
> +{
> +    s->get_config_reg = pci_host_get_config_reg;
> +}

So config_reg is always set but only used for PC now?
This is not very pretty ...

> diff --git a/hw/pci_host.h b/hw/pci_host.h
> index a006687..90a4c63 100644
> --- a/hw/pci_host.h
> +++ b/hw/pci_host.h
> @@ -30,14 +30,19 @@
>  
>  #include "sysbus.h"
>  
> +/* for config space access */
> +typedef uint32_t (*pci_config_reg_fn)(PCIHostState *s, uint32_t addr);
> +
>  struct PCIHostState {
>      SysBusDevice busdev;
> +    pci_config_reg_fn get_config_reg;
>      uint32_t config_reg;
>      PCIBus *bus;
>  };
>  
>  void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
>  uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
> +void pci_host_init(PCIHostState *s);
>  
>  /* for mmio */
>  int pci_host_conf_register_mmio(PCIHostState *s);
> diff --git a/hw/pci_host_template.h b/hw/pci_host_template.h
> index 11e6c88..55aa85e 100644
> --- a/hw/pci_host_template.h
> +++ b/hw/pci_host_template.h
> @@ -29,48 +29,52 @@ static void glue(pci_host_data_writeb, PCI_HOST_SUFFIX)(
>      void* opaque, PCI_ADDR_T addr, uint32_t val)
>  {
>      PCIHostState *s = opaque;
> +    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
>  
>      PCI_DPRINTF("writeb addr " TARGET_FMT_plx " val %x\n",
>                  (target_phys_addr_t)addr, val);
> -    if (s->config_reg & (1u << 31))
> -        pci_data_write(s->bus, s->config_reg | (addr & 3), val, 1);
> +    if (config_reg & (1u << 31))
> +        pci_data_write(s->bus, config_reg, val, 1);
>  }
>  
>  static void glue(pci_host_data_writew, PCI_HOST_SUFFIX)(
>      void* opaque, PCI_ADDR_T addr, uint32_t val)
>  {
>      PCIHostState *s = opaque;
> +    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
>  #ifdef TARGET_WORDS_BIGENDIAN
>      val = bswap16(val);
>  #endif
>      PCI_DPRINTF("writew addr " TARGET_FMT_plx " val %x\n",
>                  (target_phys_addr_t)addr, val);
> -    if (s->config_reg & (1u << 31))
> -        pci_data_write(s->bus, s->config_reg | (addr & 3), val, 2);
> +    if (config_reg & (1u << 31))
> +        pci_data_write(s->bus, config_reg, val, 2);
>  }
>  
>  static void glue(pci_host_data_writel, PCI_HOST_SUFFIX)(
>      void* opaque, PCI_ADDR_T addr, uint32_t val)
>  {
>      PCIHostState *s = opaque;
> +    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
>  #ifdef TARGET_WORDS_BIGENDIAN
>      val = bswap32(val);
>  #endif
>      PCI_DPRINTF("writel addr " TARGET_FMT_plx " val %x\n",
>                  (target_phys_addr_t)addr, val);
> -    if (s->config_reg & (1u << 31))
> -        pci_data_write(s->bus, s->config_reg, val, 4);
> +    if (config_reg & (1u << 31))
> +        pci_data_write(s->bus, config_reg, val, 4);
>  }
>  
>  static uint32_t glue(pci_host_data_readb, PCI_HOST_SUFFIX)(
>      void* opaque, PCI_ADDR_T addr)
>  {
>      PCIHostState *s = opaque;
> +    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
>      uint32_t val;
>  
> -    if (!(s->config_reg & (1 << 31)))
> +    if (!(config_reg & (1 << 31)))
>          return 0xff;
> -    val = pci_data_read(s->bus, s->config_reg | (addr & 3), 1);
> +    val = pci_data_read(s->bus, config_reg, 1);
>      PCI_DPRINTF("readb addr " TARGET_FMT_plx " val %x\n",
>                  (target_phys_addr_t)addr, val);
>      return val;
> @@ -80,10 +84,11 @@ static uint32_t glue(pci_host_data_readw, PCI_HOST_SUFFIX)(
>      void* opaque, PCI_ADDR_T addr)
>  {
>      PCIHostState *s = opaque;
> +    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
>      uint32_t val;
> -    if (!(s->config_reg & (1 << 31)))
> +    if (!(config_reg & (1 << 31)))
>          return 0xffff;
> -    val = pci_data_read(s->bus, s->config_reg | (addr & 3), 2);
> +    val = pci_data_read(s->bus, config_reg, 2);
>      PCI_DPRINTF("readw addr " TARGET_FMT_plx " val %x\n",
>                  (target_phys_addr_t)addr, val);
>  #ifdef TARGET_WORDS_BIGENDIAN
> @@ -96,10 +101,11 @@ static uint32_t glue(pci_host_data_readl, PCI_HOST_SUFFIX)(
>      void* opaque, PCI_ADDR_T addr)
>  {
>      PCIHostState *s = opaque;
> +    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
>      uint32_t val;
> -    if (!(s->config_reg & (1 << 31)))
> +    if (!(config_reg))
>          return 0xffffffff;
> -    val = pci_data_read(s->bus, s->config_reg | (addr & 3), 4);
> +    val = pci_data_read(s->bus, config_reg, 4);
>      PCI_DPRINTF("readl addr " TARGET_FMT_plx " val %x\n",
>                  (target_phys_addr_t)addr, val);
>  #ifdef TARGET_WORDS_BIGENDIAN
> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> index 1b67475..97500e0 100644
> --- a/hw/piix_pci.c
> +++ b/hw/piix_pci.c
> @@ -211,6 +211,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *
>  
>      dev = qdev_create(NULL, "i440FX-pcihost");
>      s = FROM_SYSBUS(I440FXState, sysbus_from_qdev(dev));
> +    pci_host_init(s);
>      b = pci_bus_new(&s->busdev.qdev, NULL, 0);
>      s->bus = b;
>      qdev_init_nofail(dev);
> diff --git a/hw/ppc4xx_pci.c b/hw/ppc4xx_pci.c
> index 2d00b61..dd8e290 100644
> --- a/hw/ppc4xx_pci.c
> +++ b/hw/ppc4xx_pci.c
> @@ -357,6 +357,7 @@ PCIBus *ppc4xx_pci_init(CPUState *env, qemu_irq pci_irqs[4],
>  
>      controller = qemu_mallocz(sizeof(PPC4xxPCIState));
>  
> +    pci_host_init(&controller->pci_state);
>      controller->pci_state.bus = pci_register_bus(NULL, "pci",
>                                                   ppc4xx_pci_set_irq,
>                                                   ppc4xx_pci_map_irq,
> diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
> index a72fb86..5914183 100644
> --- a/hw/ppce500_pci.c
> +++ b/hw/ppce500_pci.c
> @@ -278,6 +278,7 @@ PCIBus *ppce500_pci_init(qemu_irq pci_irqs[4], target_phys_addr_t registers)
>  
>      controller = qemu_mallocz(sizeof(PPCE500PCIState));
>  
> +    pci_host_init(&controller->pci_state);
>      controller->pci_state.bus = pci_register_bus(NULL, "pci",
>                                                   mpc85xx_pci_set_irq,
>                                                   mpc85xx_pci_map_irq,
> diff --git a/hw/unin_pci.c b/hw/unin_pci.c
> index 3ae4e7a..fdb9401 100644
> --- a/hw/unin_pci.c
> +++ b/hw/unin_pci.c
> @@ -84,6 +84,7 @@ static int pci_unin_main_init_device(SysBusDevice *dev)
>      /* Uninorth main bus */
>      s = FROM_SYSBUS(UNINState, dev);
>  
> +    pci_host_init(&s->host_state);
>      pci_mem_config = pci_host_conf_register_mmio(&s->host_state);
>      pci_mem_data = pci_host_data_register_mmio(&s->host_state);
>      sysbus_init_mmio(dev, 0x1000, pci_mem_config);
> -- 
> 1.6.0.2
> 
>
Alexander Graf - Jan. 3, 2010, 4:09 p.m.
On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:

> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>> Different host buses may have different layouts for config space accessors.
>> 
>> The Mac U3 for example uses the following define to access Type 0 (directly
>> attached) devices:
>> 
>>  #define MACRISC_CFA0(devfn, off)        \
>>        ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>        | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>        | (((unsigned int)(off)) & 0xFCUL))
>> 
>> Obviously, this is different from the encoding we interpret in qemu. In
>> order to let host controllers take some influence there, we can just
>> introduce a converter function that takes whatever accessor we have and
>> makes a qemu understandable one out of it.
>> 
>> This patch is the groundwork for Uninorth PCI config space fixes.
>> 
>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> CC: Michael S. Tsirkin <mst@redhat.com>
> 
> Thanks!
> 
> It always bothered me that the code in pci_host uses x86 encoding and
> other architectures are converted to x86.  As long as we are adding
> redirection, maybe we should get rid of this, and make get_config_reg
> return register and devfn separately, so we don't need to encode/decode
> back and forth?

Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.

I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.

> OTOH if we are after a quick fix, won't it be simpler to have
> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
> etc?

Hm, I figured this is less work :-).

> 
>> ---
>> hw/apb_pci.c           |    1 +
>> hw/grackle_pci.c       |    1 +
>> hw/gt64xxx.c           |    1 +
>> hw/pci_host.c          |   11 +++++++++++
>> hw/pci_host.h          |    5 +++++
>> hw/pci_host_template.h |   30 ++++++++++++++++++------------
>> hw/piix_pci.c          |    1 +
>> hw/ppc4xx_pci.c        |    1 +
>> hw/ppce500_pci.c       |    1 +
>> hw/unin_pci.c          |    1 +
>> 10 files changed, 41 insertions(+), 12 deletions(-)
>> 
>> diff --git a/hw/apb_pci.c b/hw/apb_pci.c
>> index f05308b..fedb84c 100644
>> --- a/hw/apb_pci.c
>> +++ b/hw/apb_pci.c
>> @@ -222,6 +222,7 @@ PCIBus *pci_apb_init(target_phys_addr_t special_base,
>>     /* mem_data */
>>     sysbus_mmio_map(s, 3, mem_base);
>>     d = FROM_SYSBUS(APBState, s);
>> +    pci_host_init(&d->host_state);
>>     d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
>>                                          pci_apb_set_irq, pci_pbm_map_irq, pic,
>>                                          0, 32);
>> diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c
>> index ee4fed5..7feba63 100644
>> --- a/hw/grackle_pci.c
>> +++ b/hw/grackle_pci.c
>> @@ -88,6 +88,7 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic)
>>     qdev_init_nofail(dev);
>>     s = sysbus_from_qdev(dev);
>>     d = FROM_SYSBUS(GrackleState, s);
>> +    pci_host_init(&d->host_state);
>>     d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
>>                                          pci_grackle_set_irq,
>>                                          pci_grackle_map_irq,
>> diff --git a/hw/gt64xxx.c b/hw/gt64xxx.c
>> index fb7f5bd..8cf93ca 100644
>> --- a/hw/gt64xxx.c
>> +++ b/hw/gt64xxx.c
>> @@ -1120,6 +1120,7 @@ PCIBus *pci_gt64120_init(qemu_irq *pic)
>>     s = qemu_mallocz(sizeof(GT64120State));
>>     s->pci = qemu_mallocz(sizeof(GT64120PCIState));
>> 
>> +    pci_host_init(s->pci);
>>     s->pci->bus = pci_register_bus(NULL, "pci",
>>                                    pci_gt64120_set_irq, pci_gt64120_map_irq,
>>                                    pic, 144, 4);
>> diff --git a/hw/pci_host.c b/hw/pci_host.c
>> index eeb8dee..2d12887 100644
>> --- a/hw/pci_host.c
>> +++ b/hw/pci_host.c
>> @@ -228,3 +228,14 @@ void pci_host_data_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>     register_ioport_read(ioport, 4, 2, pci_host_data_readw_ioport, s);
>>     register_ioport_read(ioport, 4, 4, pci_host_data_readl_ioport, s);
>> }
>> +
>> +/* This implements the default x86 way of interpreting the config register */
>> +static uint32_t pci_host_get_config_reg(PCIHostState *s, uint32_t addr)
>> +{
>> +    return s->config_reg | (addr & 3);
>> +}
>> +
>> +void pci_host_init(PCIHostState *s)
>> +{
>> +    s->get_config_reg = pci_host_get_config_reg;
>> +}
> 
> So config_reg is always set but only used for PC now?
> This is not very pretty ...

config_reg is used by the template.h helpers. So everyone should use it. get_config_reg is always set. It's set to pci_host_get_config_reg as default and can be overridden by a host bus if it knows it's using a different layout.


Alex
Michael S. Tsirkin - Jan. 3, 2010, 5:29 p.m.
On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
> 
> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
> 
> > On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
> >> Different host buses may have different layouts for config space accessors.
> >> 
> >> The Mac U3 for example uses the following define to access Type 0 (directly
> >> attached) devices:
> >> 
> >>  #define MACRISC_CFA0(devfn, off)        \
> >>        ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
> >>        | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
> >>        | (((unsigned int)(off)) & 0xFCUL))
> >> 
> >> Obviously, this is different from the encoding we interpret in qemu. In
> >> order to let host controllers take some influence there, we can just
> >> introduce a converter function that takes whatever accessor we have and
> >> makes a qemu understandable one out of it.
> >> 
> >> This patch is the groundwork for Uninorth PCI config space fixes.
> >> 
> >> Signed-off-by: Alexander Graf <agraf@suse.de>
> >> CC: Michael S. Tsirkin <mst@redhat.com>
> > 
> > Thanks!
> > 
> > It always bothered me that the code in pci_host uses x86 encoding and
> > other architectures are converted to x86.  As long as we are adding
> > redirection, maybe we should get rid of this, and make get_config_reg
> > return register and devfn separately, so we don't need to encode/decode
> > back and forth?
> 
> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
> 
> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
> 
> > OTOH if we are after a quick fix, won't it be simpler to have
> > unin_pci simply use its own routines instead of pci_host_conf_register_mmio
> > etc?
> 
> Hm, I figured this is less work :-).

Let me explain the idea: we have:

	static void pci_host_config_writel_ioport(void *opaque,
						  uint32_t addr, uint32_t val)
	{
	    PCIHostState *s = opaque;

	    PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
	val);
	    s->config_reg = val;
	}

	static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
	addr)
	{
	    PCIHostState *s = opaque;
	    uint32_t val = s->config_reg;

	    PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
	val);
	    return val;
	}

	void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
	{
	    register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
	s);
	    register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
	}

If instead of a simple config_reg = val we translate to pc format
here, the rest will work. No?


> > 
> >> ---
> >> hw/apb_pci.c           |    1 +
> >> hw/grackle_pci.c       |    1 +
> >> hw/gt64xxx.c           |    1 +
> >> hw/pci_host.c          |   11 +++++++++++
> >> hw/pci_host.h          |    5 +++++
> >> hw/pci_host_template.h |   30 ++++++++++++++++++------------
> >> hw/piix_pci.c          |    1 +
> >> hw/ppc4xx_pci.c        |    1 +
> >> hw/ppce500_pci.c       |    1 +
> >> hw/unin_pci.c          |    1 +
> >> 10 files changed, 41 insertions(+), 12 deletions(-)
> >> 
> >> diff --git a/hw/apb_pci.c b/hw/apb_pci.c
> >> index f05308b..fedb84c 100644
> >> --- a/hw/apb_pci.c
> >> +++ b/hw/apb_pci.c
> >> @@ -222,6 +222,7 @@ PCIBus *pci_apb_init(target_phys_addr_t special_base,
> >>     /* mem_data */
> >>     sysbus_mmio_map(s, 3, mem_base);
> >>     d = FROM_SYSBUS(APBState, s);
> >> +    pci_host_init(&d->host_state);
> >>     d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
> >>                                          pci_apb_set_irq, pci_pbm_map_irq, pic,
> >>                                          0, 32);
> >> diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c
> >> index ee4fed5..7feba63 100644
> >> --- a/hw/grackle_pci.c
> >> +++ b/hw/grackle_pci.c
> >> @@ -88,6 +88,7 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic)
> >>     qdev_init_nofail(dev);
> >>     s = sysbus_from_qdev(dev);
> >>     d = FROM_SYSBUS(GrackleState, s);
> >> +    pci_host_init(&d->host_state);
> >>     d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
> >>                                          pci_grackle_set_irq,
> >>                                          pci_grackle_map_irq,
> >> diff --git a/hw/gt64xxx.c b/hw/gt64xxx.c
> >> index fb7f5bd..8cf93ca 100644
> >> --- a/hw/gt64xxx.c
> >> +++ b/hw/gt64xxx.c
> >> @@ -1120,6 +1120,7 @@ PCIBus *pci_gt64120_init(qemu_irq *pic)
> >>     s = qemu_mallocz(sizeof(GT64120State));
> >>     s->pci = qemu_mallocz(sizeof(GT64120PCIState));
> >> 
> >> +    pci_host_init(s->pci);
> >>     s->pci->bus = pci_register_bus(NULL, "pci",
> >>                                    pci_gt64120_set_irq, pci_gt64120_map_irq,
> >>                                    pic, 144, 4);
> >> diff --git a/hw/pci_host.c b/hw/pci_host.c
> >> index eeb8dee..2d12887 100644
> >> --- a/hw/pci_host.c
> >> +++ b/hw/pci_host.c
> >> @@ -228,3 +228,14 @@ void pci_host_data_register_ioport(pio_addr_t ioport, PCIHostState *s)
> >>     register_ioport_read(ioport, 4, 2, pci_host_data_readw_ioport, s);
> >>     register_ioport_read(ioport, 4, 4, pci_host_data_readl_ioport, s);
> >> }
> >> +
> >> +/* This implements the default x86 way of interpreting the config register */
> >> +static uint32_t pci_host_get_config_reg(PCIHostState *s, uint32_t addr)
> >> +{
> >> +    return s->config_reg | (addr & 3);
> >> +}
> >> +
> >> +void pci_host_init(PCIHostState *s)
> >> +{
> >> +    s->get_config_reg = pci_host_get_config_reg;
> >> +}
> > 
> > So config_reg is always set but only used for PC now?
> > This is not very pretty ...
> 
> config_reg is used by the template.h helpers. So everyone should use it. get_config_reg is always set. It's set to pci_host_get_config_reg as default and can be overridden by a host bus if it knows it's using a different layout.
> 
> 
> Alex
Alexander Graf - Jan. 3, 2010, 5:40 p.m.
On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:

> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>> 
>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>> 
>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>>> Different host buses may have different layouts for config space accessors.
>>>> 
>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>>> attached) devices:
>>>> 
>>>> #define MACRISC_CFA0(devfn, off)        \
>>>>       ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>>>       | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>>>       | (((unsigned int)(off)) & 0xFCUL))
>>>> 
>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>>> order to let host controllers take some influence there, we can just
>>>> introduce a converter function that takes whatever accessor we have and
>>>> makes a qemu understandable one out of it.
>>>> 
>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>>> 
>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>> 
>>> Thanks!
>>> 
>>> It always bothered me that the code in pci_host uses x86 encoding and
>>> other architectures are converted to x86.  As long as we are adding
>>> redirection, maybe we should get rid of this, and make get_config_reg
>>> return register and devfn separately, so we don't need to encode/decode
>>> back and forth?
>> 
>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>> 
>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>> 
>>> OTOH if we are after a quick fix, won't it be simpler to have
>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>> etc?
>> 
>> Hm, I figured this is less work :-).
> 
> Let me explain the idea: we have:
> 
> 	static void pci_host_config_writel_ioport(void *opaque,
> 						  uint32_t addr, uint32_t val)
> 	{
> 	    PCIHostState *s = opaque;
> 
> 	    PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
> 	val);
> 	    s->config_reg = val;
> 	}
> 
> 	static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
> 	addr)
> 	{
> 	    PCIHostState *s = opaque;
> 	    uint32_t val = s->config_reg;
> 
> 	    PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
> 	val);
> 	    return val;
> 	}
> 
> 	void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
> 	{
> 	    register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
> 	s);
> 	    register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
> 	}
> 
> If instead of a simple config_reg = val we translate to pc format
> here, the rest will work. No?

Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...

Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.

Alex
Michael S. Tsirkin - Jan. 3, 2010, 5:44 p.m.
On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
> 
> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
> 
> > On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
> >> 
> >> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
> >> 
> >>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
> >>>> Different host buses may have different layouts for config space accessors.
> >>>> 
> >>>> The Mac U3 for example uses the following define to access Type 0 (directly
> >>>> attached) devices:
> >>>> 
> >>>> #define MACRISC_CFA0(devfn, off)        \
> >>>>       ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
> >>>>       | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
> >>>>       | (((unsigned int)(off)) & 0xFCUL))
> >>>> 
> >>>> Obviously, this is different from the encoding we interpret in qemu. In
> >>>> order to let host controllers take some influence there, we can just
> >>>> introduce a converter function that takes whatever accessor we have and
> >>>> makes a qemu understandable one out of it.
> >>>> 
> >>>> This patch is the groundwork for Uninorth PCI config space fixes.
> >>>> 
> >>>> Signed-off-by: Alexander Graf <agraf@suse.de>
> >>>> CC: Michael S. Tsirkin <mst@redhat.com>
> >>> 
> >>> Thanks!
> >>> 
> >>> It always bothered me that the code in pci_host uses x86 encoding and
> >>> other architectures are converted to x86.  As long as we are adding
> >>> redirection, maybe we should get rid of this, and make get_config_reg
> >>> return register and devfn separately, so we don't need to encode/decode
> >>> back and forth?
> >> 
> >> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
> >> 
> >> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
> >> 
> >>> OTOH if we are after a quick fix, won't it be simpler to have
> >>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
> >>> etc?
> >> 
> >> Hm, I figured this is less work :-).
> > 
> > Let me explain the idea: we have:
> > 
> > 	static void pci_host_config_writel_ioport(void *opaque,
> > 						  uint32_t addr, uint32_t val)
> > 	{
> > 	    PCIHostState *s = opaque;
> > 
> > 	    PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
> > 	val);
> > 	    s->config_reg = val;
> > 	}
> > 
> > 	static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
> > 	addr)
> > 	{
> > 	    PCIHostState *s = opaque;
> > 	    uint32_t val = s->config_reg;
> > 
> > 	    PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
> > 	val);
> > 	    return val;
> > 	}
> > 
> > 	void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
> > 	{
> > 	    register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
> > 	s);
> > 	    register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
> > 	}
> > 
> > If instead of a simple config_reg = val we translate to pc format
> > here, the rest will work. No?
> 
> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...

Right.

> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
> 
> Alex

There's already ugliness with swap/noswap versions in there.  Anything
that claims to be a proper framework would need to address that as well
IMO.
Alexander Graf - Jan. 3, 2010, 5:50 p.m.
On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:

> On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>> 
>> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>> 
>>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>>>> 
>>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>>>> 
>>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>>>>> Different host buses may have different layouts for config space accessors.
>>>>>> 
>>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>>>>> attached) devices:
>>>>>> 
>>>>>> #define MACRISC_CFA0(devfn, off)        \
>>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>>>>>      | (((unsigned int)(off)) & 0xFCUL))
>>>>>> 
>>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>>>>> order to let host controllers take some influence there, we can just
>>>>>> introduce a converter function that takes whatever accessor we have and
>>>>>> makes a qemu understandable one out of it.
>>>>>> 
>>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>>>>> 
>>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>>> 
>>>>> Thanks!
>>>>> 
>>>>> It always bothered me that the code in pci_host uses x86 encoding and
>>>>> other architectures are converted to x86.  As long as we are adding
>>>>> redirection, maybe we should get rid of this, and make get_config_reg
>>>>> return register and devfn separately, so we don't need to encode/decode
>>>>> back and forth?
>>>> 
>>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>>>> 
>>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>>>> 
>>>>> OTOH if we are after a quick fix, won't it be simpler to have
>>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>>>> etc?
>>>> 
>>>> Hm, I figured this is less work :-).
>>> 
>>> Let me explain the idea: we have:
>>> 
>>> 	static void pci_host_config_writel_ioport(void *opaque,
>>> 						  uint32_t addr, uint32_t val)
>>> 	{
>>> 	    PCIHostState *s = opaque;
>>> 
>>> 	    PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>>> 	val);
>>> 	    s->config_reg = val;
>>> 	}
>>> 
>>> 	static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>>> 	addr)
>>> 	{
>>> 	    PCIHostState *s = opaque;
>>> 	    uint32_t val = s->config_reg;
>>> 
>>> 	    PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>>> 	val);
>>> 	    return val;
>>> 	}
>>> 
>>> 	void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>> 	{
>>> 	    register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>>> 	s);
>>> 	    register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>>> 	}
>>> 
>>> If instead of a simple config_reg = val we translate to pc format
>>> here, the rest will work. No?
>> 
>> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
> 
> Right.
> 
>> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>> 
>> Alex
> 
> There's already ugliness with swap/noswap versions in there.  Anything
> that claims to be a proper framework would need to address that as well
> IMO.

Hm, so you'd rather wait for 5 years to have an all-in-one rework than incrementally improving the existing code? I can do the hacky version then :-).


Alex
Michael S. Tsirkin - Jan. 3, 2010, 6:06 p.m.
On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
> 
> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
> 
> > On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
> >> 
> >> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
> >> 
> >>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
> >>>> 
> >>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
> >>>> 
> >>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
> >>>>>> Different host buses may have different layouts for config space accessors.
> >>>>>> 
> >>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
> >>>>>> attached) devices:
> >>>>>> 
> >>>>>> #define MACRISC_CFA0(devfn, off)        \
> >>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
> >>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
> >>>>>>      | (((unsigned int)(off)) & 0xFCUL))
> >>>>>> 
> >>>>>> Obviously, this is different from the encoding we interpret in qemu. In
> >>>>>> order to let host controllers take some influence there, we can just
> >>>>>> introduce a converter function that takes whatever accessor we have and
> >>>>>> makes a qemu understandable one out of it.
> >>>>>> 
> >>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
> >>>>>> 
> >>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
> >>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
> >>>>> 
> >>>>> Thanks!
> >>>>> 
> >>>>> It always bothered me that the code in pci_host uses x86 encoding and
> >>>>> other architectures are converted to x86.  As long as we are adding
> >>>>> redirection, maybe we should get rid of this, and make get_config_reg
> >>>>> return register and devfn separately, so we don't need to encode/decode
> >>>>> back and forth?
> >>>> 
> >>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
> >>>> 
> >>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
> >>>> 
> >>>>> OTOH if we are after a quick fix, won't it be simpler to have
> >>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
> >>>>> etc?
> >>>> 
> >>>> Hm, I figured this is less work :-).
> >>> 
> >>> Let me explain the idea: we have:
> >>> 
> >>> 	static void pci_host_config_writel_ioport(void *opaque,
> >>> 						  uint32_t addr, uint32_t val)
> >>> 	{
> >>> 	    PCIHostState *s = opaque;
> >>> 
> >>> 	    PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
> >>> 	val);
> >>> 	    s->config_reg = val;
> >>> 	}
> >>> 
> >>> 	static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
> >>> 	addr)
> >>> 	{
> >>> 	    PCIHostState *s = opaque;
> >>> 	    uint32_t val = s->config_reg;
> >>> 
> >>> 	    PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
> >>> 	val);
> >>> 	    return val;
> >>> 	}
> >>> 
> >>> 	void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
> >>> 	{
> >>> 	    register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
> >>> 	s);
> >>> 	    register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
> >>> 	}
> >>> 
> >>> If instead of a simple config_reg = val we translate to pc format
> >>> here, the rest will work. No?
> >> 
> >> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
> > 
> > Right.
> > 
> >> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
> >> 
> >> Alex
> > 
> > There's already ugliness with swap/noswap versions in there.  Anything
> > that claims to be a proper framework would need to address that as well
> > IMO.
> 
> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
> incrementally improving the existing code?

Not really, incremental improvements are good.  But what you posted does
not seem to clean up most code, what it really does is fixes ppc
unin_pci.  My feeling was since there's only one user for now maybe it
is better to just have device specific code rather than complicate
generic code. Makes sense?

We need to see several users before we add yet another level of
indirection.  If there is a level of indirection that solves the
swap/noswap ugliness as well that would show this is a good abstraction.
I think I even know what a good abstraction would be: decode address on
write and keep it in decoded form.

> I can do the hacky version then :-).
> 
> 
> Alex


I haven't seen the hacky version one so it's hard to evaluate which way
of fixing unin_pci is better. What is your opinion?
Blue Swirl - Jan. 3, 2010, 7:18 p.m.
On Sun, Jan 3, 2010 at 6:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
>>
>> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
>>
>> > On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>> >>
>> >> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>> >>
>> >>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>> >>>>
>> >>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>> >>>>
>> >>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>> >>>>>> Different host buses may have different layouts for config space accessors.
>> >>>>>>
>> >>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>> >>>>>> attached) devices:
>> >>>>>>
>> >>>>>> #define MACRISC_CFA0(devfn, off)        \
>> >>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>> >>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>> >>>>>>      | (((unsigned int)(off)) & 0xFCUL))
>> >>>>>>
>> >>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>> >>>>>> order to let host controllers take some influence there, we can just
>> >>>>>> introduce a converter function that takes whatever accessor we have and
>> >>>>>> makes a qemu understandable one out of it.
>> >>>>>>
>> >>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>> >>>>>>
>> >>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>> >>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>> >>>>>
>> >>>>> Thanks!
>> >>>>>
>> >>>>> It always bothered me that the code in pci_host uses x86 encoding and
>> >>>>> other architectures are converted to x86.  As long as we are adding
>> >>>>> redirection, maybe we should get rid of this, and make get_config_reg
>> >>>>> return register and devfn separately, so we don't need to encode/decode
>> >>>>> back and forth?
>> >>>>
>> >>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>> >>>>
>> >>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>> >>>>
>> >>>>> OTOH if we are after a quick fix, won't it be simpler to have
>> >>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>> >>>>> etc?
>> >>>>
>> >>>> Hm, I figured this is less work :-).
>> >>>
>> >>> Let me explain the idea: we have:
>> >>>
>> >>>   static void pci_host_config_writel_ioport(void *opaque,
>> >>>                                             uint32_t addr, uint32_t val)
>> >>>   {
>> >>>       PCIHostState *s = opaque;
>> >>>
>> >>>       PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>> >>>   val);
>> >>>       s->config_reg = val;
>> >>>   }
>> >>>
>> >>>   static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>> >>>   addr)
>> >>>   {
>> >>>       PCIHostState *s = opaque;
>> >>>       uint32_t val = s->config_reg;
>> >>>
>> >>>       PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>> >>>   val);
>> >>>       return val;
>> >>>   }
>> >>>
>> >>>   void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>> >>>   {
>> >>>       register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>> >>>   s);
>> >>>       register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>> >>>   }
>> >>>
>> >>> If instead of a simple config_reg = val we translate to pc format
>> >>> here, the rest will work. No?
>> >>
>> >> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
>> >
>> > Right.
>> >
>> >> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>> >>
>> >> Alex
>> >
>> > There's already ugliness with swap/noswap versions in there.  Anything
>> > that claims to be a proper framework would need to address that as well
>> > IMO.
>>
>> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
>> incrementally improving the existing code?
>
> Not really, incremental improvements are good.  But what you posted does
> not seem to clean up most code, what it really does is fixes ppc
> unin_pci.  My feeling was since there's only one user for now maybe it
> is better to just have device specific code rather than complicate
> generic code. Makes sense?
>
> We need to see several users before we add yet another level of
> indirection.  If there is a level of indirection that solves the
> swap/noswap ugliness as well that would show this is a good abstraction.
> I think I even know what a good abstraction would be: decode address on
> write and keep it in decoded form.

I think Sparc64 PBM configuration cycles are also a bit different.
It's described in UltraSPARC User's Manual 805-0087, p.325.

Currently both QEMU and OpenBIOS just use something common, but as
soon as Linux boot gets so far as to probe PCI bus, we'd have to fix
that.
Alexander Graf - Jan. 3, 2010, 8:27 p.m.
On 03.01.2010, at 19:06, Michael S. Tsirkin wrote:

> On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
>> 
>> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
>> 
>>> On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>>>> 
>>>> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>>>> 
>>>>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>>>>>> 
>>>>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>>>>>> 
>>>>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>>>>>>> Different host buses may have different layouts for config space accessors.
>>>>>>>> 
>>>>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>>>>>>> attached) devices:
>>>>>>>> 
>>>>>>>> #define MACRISC_CFA0(devfn, off)        \
>>>>>>>>     ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>>>>>>>     | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>>>>>>>     | (((unsigned int)(off)) & 0xFCUL))
>>>>>>>> 
>>>>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>>>>>>> order to let host controllers take some influence there, we can just
>>>>>>>> introduce a converter function that takes whatever accessor we have and
>>>>>>>> makes a qemu understandable one out of it.
>>>>>>>> 
>>>>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>>>>>>> 
>>>>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>>>>> 
>>>>>>> Thanks!
>>>>>>> 
>>>>>>> It always bothered me that the code in pci_host uses x86 encoding and
>>>>>>> other architectures are converted to x86.  As long as we are adding
>>>>>>> redirection, maybe we should get rid of this, and make get_config_reg
>>>>>>> return register and devfn separately, so we don't need to encode/decode
>>>>>>> back and forth?
>>>>>> 
>>>>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>>>>>> 
>>>>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>>>>>> 
>>>>>>> OTOH if we are after a quick fix, won't it be simpler to have
>>>>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>>>>>> etc?
>>>>>> 
>>>>>> Hm, I figured this is less work :-).
>>>>> 
>>>>> Let me explain the idea: we have:
>>>>> 
>>>>> 	static void pci_host_config_writel_ioport(void *opaque,
>>>>> 						  uint32_t addr, uint32_t val)
>>>>> 	{
>>>>> 	    PCIHostState *s = opaque;
>>>>> 
>>>>> 	    PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>>>>> 	val);
>>>>> 	    s->config_reg = val;
>>>>> 	}
>>>>> 
>>>>> 	static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>>>>> 	addr)
>>>>> 	{
>>>>> 	    PCIHostState *s = opaque;
>>>>> 	    uint32_t val = s->config_reg;
>>>>> 
>>>>> 	    PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>>>>> 	val);
>>>>> 	    return val;
>>>>> 	}
>>>>> 
>>>>> 	void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>>>> 	{
>>>>> 	    register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>>>>> 	s);
>>>>> 	    register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>>>>> 	}
>>>>> 
>>>>> If instead of a simple config_reg = val we translate to pc format
>>>>> here, the rest will work. No?
>>>> 
>>>> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
>>> 
>>> Right.
>>> 
>>>> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>>>> 
>>>> Alex
>>> 
>>> There's already ugliness with swap/noswap versions in there.  Anything
>>> that claims to be a proper framework would need to address that as well
>>> IMO.
>> 
>> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
>> incrementally improving the existing code?
> 
> Not really, incremental improvements are good.  But what you posted does
> not seem to clean up most code, what it really does is fixes ppc
> unin_pci.  My feeling was since there's only one user for now maybe it
> is better to just have device specific code rather than complicate
> generic code. Makes sense?
> 
> We need to see several users before we add yet another level of
> indirection.  If there is a level of indirection that solves the
> swap/noswap ugliness as well that would show this is a good abstraction.
> I think I even know what a good abstraction would be: decode address on
> write and keep it in decoded form.

Sounds like a good idea.

> 
>> I can do the hacky version then :-).
>> 
>> 
>> Alex
> 
> 
> I haven't seen the hacky version one so it's hard to evaluate which way
> of fixing unin_pci is better. What is your opinion?

I think if unin_pci is the only user, it'd be better to do it hacky inside unin_pci.c. But if there's a chance there's another user, it'd be better to make it generic.

Since this is the first time I ever stumbled across type 0 and type 1 PCI config space addresses, I simply don't know if there are any. Blue Swirls comment indicated that there are. Ben also sounded as if it's rather common to not use the x86 format. On the other hand, it looks like nobody in qemu needed it so far - and we're implementing ARM, MIPS and all other sorts of platforms.

So if anyone with knowledge in PCI could shed some light here, please do so.

Alex
Benjamin Herrenschmidt - Jan. 3, 2010, 8:50 p.m.
On Sun, 2010-01-03 at 21:27 +0100, Alexander Graf wrote:

> I think if unin_pci is the only user, it'd be better to do it hacky
> inside unin_pci.c. But if there's a chance there's another user, it'd
> be better to make it generic.
> 
> Since this is the first time I ever stumbled across type 0 and type 1
> PCI config space addresses, I simply don't know if there are any. Blue
> Swirls comment indicated that there are. Ben also sounded as if it's
> rather common to not use the x86 format. On the other hand, it looks
> like nobody in qemu needed it so far - and we're implementing ARM,
> MIPS and all other sorts of platforms.
> 
> So if anyone with knowledge in PCI could shed some light here, please
> do so.

My feeling is that what you're better off doing is to have the qemu core
take an abstract struct to identify a device config space location, that
consists of separate fields for:

 - The PCI domain (which is what host bridge it hangs off since bus
numbers are not unique between domains)

 - The bus number

 - The device number (aka slot ID)

 - The function number

Now, you can then provide a "helper" that translate the standard x86
type 0 and type 1 cf8/cfc accesses into the above for most bridges to
use, and uninorth or other non-x86 that use different scheme can do
their own decoding.

The reason you want the above is to be more future proof, ie, you'll
probably want to deal with PCIe extended function numbers for example,
or implement different methods of MMCONFIG etc... and it's better to
disconnect the low level decoding of the config space access from the
internal representation in qemu.

Cheers,
Ben.
Alexander Graf - Jan. 4, 2010, 3:26 a.m.
On 03.01.2010, at 21:50, Benjamin Herrenschmidt wrote:

> On Sun, 2010-01-03 at 21:27 +0100, Alexander Graf wrote:
> 
>> I think if unin_pci is the only user, it'd be better to do it hacky
>> inside unin_pci.c. But if there's a chance there's another user, it'd
>> be better to make it generic.
>> 
>> Since this is the first time I ever stumbled across type 0 and type 1
>> PCI config space addresses, I simply don't know if there are any. Blue
>> Swirls comment indicated that there are. Ben also sounded as if it's
>> rather common to not use the x86 format. On the other hand, it looks
>> like nobody in qemu needed it so far - and we're implementing ARM,
>> MIPS and all other sorts of platforms.
>> 
>> So if anyone with knowledge in PCI could shed some light here, please
>> do so.
> 
> My feeling is that what you're better off doing is to have the qemu core
> take an abstract struct to identify a device config space location, that
> consists of separate fields for:
> 
> - The PCI domain (which is what host bridge it hangs off since bus
> numbers are not unique between domains)
> 
> - The bus number

Hm, I think it'd make more sense to just store a PCIBus pointer in there. We could then fetch the bus and domain id from there.

I'll write something up :-).


Alex
Blue Swirl - Jan. 10, 2010, 6:41 p.m.
On Sun, Jan 3, 2010 at 7:18 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Sun, Jan 3, 2010 at 6:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
>>>
>>> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
>>>
>>> > On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>>> >>
>>> >> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>>> >>
>>> >>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>>> >>>>
>>> >>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>>> >>>>
>>> >>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>> >>>>>> Different host buses may have different layouts for config space accessors.
>>> >>>>>>
>>> >>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>> >>>>>> attached) devices:
>>> >>>>>>
>>> >>>>>> #define MACRISC_CFA0(devfn, off)        \
>>> >>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>> >>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>> >>>>>>      | (((unsigned int)(off)) & 0xFCUL))
>>> >>>>>>
>>> >>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>> >>>>>> order to let host controllers take some influence there, we can just
>>> >>>>>> introduce a converter function that takes whatever accessor we have and
>>> >>>>>> makes a qemu understandable one out of it.
>>> >>>>>>
>>> >>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>> >>>>>>
>>> >>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>> >>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>> >>>>>
>>> >>>>> Thanks!
>>> >>>>>
>>> >>>>> It always bothered me that the code in pci_host uses x86 encoding and
>>> >>>>> other architectures are converted to x86.  As long as we are adding
>>> >>>>> redirection, maybe we should get rid of this, and make get_config_reg
>>> >>>>> return register and devfn separately, so we don't need to encode/decode
>>> >>>>> back and forth?
>>> >>>>
>>> >>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>>> >>>>
>>> >>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>>> >>>>
>>> >>>>> OTOH if we are after a quick fix, won't it be simpler to have
>>> >>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>> >>>>> etc?
>>> >>>>
>>> >>>> Hm, I figured this is less work :-).
>>> >>>
>>> >>> Let me explain the idea: we have:
>>> >>>
>>> >>>   static void pci_host_config_writel_ioport(void *opaque,
>>> >>>                                             uint32_t addr, uint32_t val)
>>> >>>   {
>>> >>>       PCIHostState *s = opaque;
>>> >>>
>>> >>>       PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>>> >>>   val);
>>> >>>       s->config_reg = val;
>>> >>>   }
>>> >>>
>>> >>>   static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>>> >>>   addr)
>>> >>>   {
>>> >>>       PCIHostState *s = opaque;
>>> >>>       uint32_t val = s->config_reg;
>>> >>>
>>> >>>       PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>>> >>>   val);
>>> >>>       return val;
>>> >>>   }
>>> >>>
>>> >>>   void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>> >>>   {
>>> >>>       register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>>> >>>   s);
>>> >>>       register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>>> >>>   }
>>> >>>
>>> >>> If instead of a simple config_reg = val we translate to pc format
>>> >>> here, the rest will work. No?
>>> >>
>>> >> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
>>> >
>>> > Right.
>>> >
>>> >> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>>> >>
>>> >> Alex
>>> >
>>> > There's already ugliness with swap/noswap versions in there.  Anything
>>> > that claims to be a proper framework would need to address that as well
>>> > IMO.
>>>
>>> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
>>> incrementally improving the existing code?
>>
>> Not really, incremental improvements are good.  But what you posted does
>> not seem to clean up most code, what it really does is fixes ppc
>> unin_pci.  My feeling was since there's only one user for now maybe it
>> is better to just have device specific code rather than complicate
>> generic code. Makes sense?
>>
>> We need to see several users before we add yet another level of
>> indirection.  If there is a level of indirection that solves the
>> swap/noswap ugliness as well that would show this is a good abstraction.
>> I think I even know what a good abstraction would be: decode address on
>> write and keep it in decoded form.
>
> I think Sparc64 PBM configuration cycles are also a bit different.
> It's described in UltraSPARC User's Manual 805-0087, p.325.
>
> Currently both QEMU and OpenBIOS just use something common, but as
> soon as Linux boot gets so far as to probe PCI bus, we'd have to fix
> that.

That time is very close, with latest QEMU and OpenBIOS, PCI probe
starts (with GDB tricks for calibrate_delay, which won't be needed
after %tick_cmpr is fixed):

[sparc64] Kernel already loaded

PROMLIB: Sun IEEE Boot Prom 'OBP 3.10.24 1999/01/01 01:01'
PROMLIB: Root node compatible: sun4u
Linux version 2.6.32 (test@host) (gcc version 4.2.4) #3 Sat Jan 9
20:36:42 UTC 2010
bootconsole [earlyprom0] enabled
ARCH: SUN4U
Ethernet address: 52:54:00:12:34:56
Kernel: Using 1 locked TLB entries for main kernel image.
Remapping the kernel... done.
OF stdout device is: /pci@1fe,0/pci@1/pci@1,1/ebus@3/su@1fe
PROM: Built device tree with 32165 bytes of memory.
Top of RAM: 0x7e80000, Total RAM: 0x7e80000
Memory hole size: 0MB
Zone PFN ranges:
  Normal   0x00000000 -> 0x00003f40
Movable zone start PFN for each node
early_node_map[1] active PFN ranges
    0: 0x00000000 -> 0x00003f40
On node 0 totalpages: 16192
  Normal zone: 127 pages used for memmap
  Normal zone: 0 pages reserved
  Normal zone: 16065 pages, LIFO batch:3
Booting Linux...
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 16065
Kernel command line: root=/dev/hdc1 console=ttyS0 -p debug
PID hash table entries: 512 (order: -1, 4096 bytes)
Dentry cache hash table entries: 16384 (order: 4, 131072 bytes)
Inode-cache hash table entries: 8192 (order: 3, 65536 bytes)
Memory: 118480k available (1472k kernel code, 488k data, 104k init)
[fffff80000000000,0000000007e80000]
SLUB: Genslabs=14, HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
Hierarchical RCU implementation.
NR_IRQS:255
clocksource: mult[a0000] shift[16]
clockevent: mult[19999999] shift[32]
Console: colour dummy device 80x25
Calibrating delay loop (skipped) preset value.. 5000.00 BogoMIPS (lpj=10000000)
Mount-cache hash table entries: 512
/pci: PCI IO[1fe02000000] MEM[1ff00000000]
/pci: SABRE PCI Bus Module ver[0:0]
PCI: Scanning PBM /pci

There it hangs because the probed address is bad:
(gdb)
0x000000000043a1a8      75              __asm__ __volatile__("membar #Sync\n\t"
1: x/i $pc
0x43a1a8 <pci_config_read16+40>:        lduha  [ %o0 ] (29), %o4
(gdb) p $o0
$12 = 0x1fe01000806
Blue Swirl - Jan. 11, 2010, 9:29 p.m.
On Sun, Jan 10, 2010 at 6:41 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Sun, Jan 3, 2010 at 7:18 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
>> On Sun, Jan 3, 2010 at 6:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>>> On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
>>>>
>>>> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
>>>>
>>>> > On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>>>> >>
>>>> >> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>>>> >>
>>>> >>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>>>> >>>>
>>>> >>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>>>> >>>>
>>>> >>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>>> >>>>>> Different host buses may have different layouts for config space accessors.
>>>> >>>>>>
>>>> >>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>>> >>>>>> attached) devices:
>>>> >>>>>>
>>>> >>>>>> #define MACRISC_CFA0(devfn, off)        \
>>>> >>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>>> >>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>>> >>>>>>      | (((unsigned int)(off)) & 0xFCUL))
>>>> >>>>>>
>>>> >>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>>> >>>>>> order to let host controllers take some influence there, we can just
>>>> >>>>>> introduce a converter function that takes whatever accessor we have and
>>>> >>>>>> makes a qemu understandable one out of it.
>>>> >>>>>>
>>>> >>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>>> >>>>>>
>>>> >>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>> >>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>> >>>>>
>>>> >>>>> Thanks!
>>>> >>>>>
>>>> >>>>> It always bothered me that the code in pci_host uses x86 encoding and
>>>> >>>>> other architectures are converted to x86.  As long as we are adding
>>>> >>>>> redirection, maybe we should get rid of this, and make get_config_reg
>>>> >>>>> return register and devfn separately, so we don't need to encode/decode
>>>> >>>>> back and forth?
>>>> >>>>
>>>> >>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>>>> >>>>
>>>> >>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>>>> >>>>
>>>> >>>>> OTOH if we are after a quick fix, won't it be simpler to have
>>>> >>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>>> >>>>> etc?
>>>> >>>>
>>>> >>>> Hm, I figured this is less work :-).
>>>> >>>
>>>> >>> Let me explain the idea: we have:
>>>> >>>
>>>> >>>   static void pci_host_config_writel_ioport(void *opaque,
>>>> >>>                                             uint32_t addr, uint32_t val)
>>>> >>>   {
>>>> >>>       PCIHostState *s = opaque;
>>>> >>>
>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>>>> >>>   val);
>>>> >>>       s->config_reg = val;
>>>> >>>   }
>>>> >>>
>>>> >>>   static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>>>> >>>   addr)
>>>> >>>   {
>>>> >>>       PCIHostState *s = opaque;
>>>> >>>       uint32_t val = s->config_reg;
>>>> >>>
>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>>>> >>>   val);
>>>> >>>       return val;
>>>> >>>   }
>>>> >>>
>>>> >>>   void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>>> >>>   {
>>>> >>>       register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>>>> >>>   s);
>>>> >>>       register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>>>> >>>   }
>>>> >>>
>>>> >>> If instead of a simple config_reg = val we translate to pc format
>>>> >>> here, the rest will work. No?
>>>> >>
>>>> >> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
>>>> >
>>>> > Right.
>>>> >
>>>> >> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>>>> >>
>>>> >> Alex
>>>> >
>>>> > There's already ugliness with swap/noswap versions in there.  Anything
>>>> > that claims to be a proper framework would need to address that as well
>>>> > IMO.
>>>>
>>>> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
>>>> incrementally improving the existing code?
>>>
>>> Not really, incremental improvements are good.  But what you posted does
>>> not seem to clean up most code, what it really does is fixes ppc
>>> unin_pci.  My feeling was since there's only one user for now maybe it
>>> is better to just have device specific code rather than complicate
>>> generic code. Makes sense?
>>>
>>> We need to see several users before we add yet another level of
>>> indirection.  If there is a level of indirection that solves the
>>> swap/noswap ugliness as well that would show this is a good abstraction.
>>> I think I even know what a good abstraction would be: decode address on
>>> write and keep it in decoded form.
>>
>> I think Sparc64 PBM configuration cycles are also a bit different.
>> It's described in UltraSPARC User's Manual 805-0087, p.325.
>>
>> Currently both QEMU and OpenBIOS just use something common, but as
>> soon as Linux boot gets so far as to probe PCI bus, we'd have to fix
>> that.
>
> That time is very close, with latest QEMU and OpenBIOS, PCI probe
> starts (with GDB tricks for calibrate_delay, which won't be needed
> after %tick_cmpr is fixed):

Now I get this:
[sparc64] Kernel already loaded

PROMLIB: Sun IEEE Boot Prom 'OBP 3.10.24 1999/01/01 01:01'
PROMLIB: Root node compatible: sun4u
Linux version 2.6.32 (test@host) (gcc version 4.2.4) #3 Sat Jan 9
20:36:42 UTC 2010
bootconsole [earlyprom0] enabled
ARCH: SUN4U
Ethernet address: 52:54:00:12:34:56
Kernel: Using 1 locked TLB entries for main kernel image.
Remapping the kernel... done.
OF stdout device is: /pci@1fe,0/pci@1/pci@1,1/ebus@3/su@1fe
PROM: Built device tree with 32176 bytes of memory.
Top of RAM: 0x7e80000, Total RAM: 0x7e80000
Memory hole size: 0MB
Zone PFN ranges:
  Normal   0x00000000 -> 0x00003f40
Movable zone start PFN for each node
early_node_map[1] active PFN ranges
    0: 0x00000000 -> 0x00003f40
On node 0 totalpages: 16192
  Normal zone: 127 pages used for memmap
  Normal zone: 0 pages reserved
  Normal zone: 16065 pages, LIFO batch:3
Booting Linux...
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 16065
Kernel command line: root=/dev/hda1 console=ttyS0 -p debug lpj=100000
PID hash table entries: 512 (order: -1, 4096 bytes)
Dentry cache hash table entries: 16384 (order: 4, 131072 bytes)
Inode-cache hash table entries: 8192 (order: 3, 65536 bytes)
Memory: 118480k available (1472k kernel code, 488k data, 104k init)
[fffff80000000000,0000000007e80000]
SLUB: Genslabs=14, HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
Hierarchical RCU implementation.
NR_IRQS:255
clocksource: mult[a0000] shift[16]
clockevent: mult[19999999] shift[32]
Console: colour dummy device 80x25
Calibrating delay loop (skipped) preset value.. 50.00 BogoMIPS (lpj=100000)
Mount-cache hash table entries: 512
/pci: PCI IO[1fe02000000] MEM[1ff00000000]
/pci: SABRE PCI Bus Module ver[0:0]
PCI: Scanning PBM /pci
------------[ cut here ]------------
WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
Call Trace:
 [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
 [00000000004e92ec] sysfs_add_one+0x8c/0xc0
 [00000000004ea480] sysfs_do_create_link+0x80/0x140
 [000000000053fc88] device_add+0x3c8/0x500
 [0000000000513f34] pci_bus_add_child+0x14/0x80
 [0000000000514144] pci_bus_add_devices+0x144/0x200
 [00000000005140ec] pci_bus_add_devices+0xec/0x200
 [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
 [000000000056d6f0] sabre_probe+0x2d0/0x5a0
 [0000000000569d9c] of_platform_device_probe+0x3c/0x80
 [00000000005427b0] driver_probe_device+0x70/0x160
 [0000000000542918] __driver_attach+0x78/0xa0
 [0000000000541bcc] bus_for_each_dev+0x4c/0x80
 [00000000005421dc] bus_add_driver+0x9c/0x240
 [0000000000542c10] driver_register+0x50/0x160
 [0000000000426a98] do_one_initcall+0x18/0x160
---[ end trace 139ce121c98e96c9 ]---
pci 0000:00:01.1: Error adding bus, continuing
------------[ cut here ]------------
WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
Call Trace:
 [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
 [00000000004e92ec] sysfs_add_one+0x8c/0xc0
 [00000000004ea480] sysfs_do_create_link+0x80/0x140
 [000000000053fc88] device_add+0x3c8/0x500
 [0000000000513f34] pci_bus_add_child+0x14/0x80
 [0000000000514144] pci_bus_add_devices+0x144/0x200
 [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
 [000000000056d6f0] sabre_probe+0x2d0/0x5a0
 [0000000000569d9c] of_platform_device_probe+0x3c/0x80
 [00000000005427b0] driver_probe_device+0x70/0x160
 [0000000000542918] __driver_attach+0x78/0xa0
 [0000000000541bcc] bus_for_each_dev+0x4c/0x80
 [00000000005421dc] bus_add_driver+0x9c/0x240
 [0000000000542c10] driver_register+0x50/0x160
 [0000000000426a98] do_one_initcall+0x18/0x160
 [00000000005f4274] kernel_init+0xd4/0x140
---[ end trace 139ce121c98e96ca ]---
pci 0000:00:01.0: Error adding bus, continuing
bio: create slab <bio-0> at 0
vgaarb: loaded
Switching to clocksource tick
io scheduler noop registered (default)
------------[ cut here ]------------
WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
proc_dir_entry 'pci/0000:00' already registered
Call Trace:
 [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
 [00000000004e2948] proc_register+0xe8/0x1c0
 [00000000004e2c30] proc_mkdir_mode+0x30/0x60
 [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
 [00000000006027fc] pci_proc_init+0x5c/0xa0
 [0000000000426a98] do_one_initcall+0x18/0x160
 [00000000005f4274] kernel_init+0xd4/0x140
 [000000000042b130] kernel_thread+0x30/0x60
 [000000000056a8f8] rest_init+0x18/0x80
---[ end trace 139ce121c98e96cb ]---
------------[ cut here ]------------
WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
proc_dir_entry 'pci/0000:00' already registered
Call Trace:
 [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
 [00000000004e2948] proc_register+0xe8/0x1c0
 [00000000004e2c30] proc_mkdir_mode+0x30/0x60
 [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
 [00000000006027fc] pci_proc_init+0x5c/0xa0
 [0000000000426a98] do_one_initcall+0x18/0x160
 [00000000005f4274] kernel_init+0xd4/0x140
 [000000000042b130] kernel_thread+0x30/0x60
 [000000000056a8f8] rest_init+0x18/0x80
---[ end trace 139ce121c98e96cc ]---
Uniform Multi-Platform E-IDE driver
cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
PCI: Enabling device: (0000:00:05.0), cmd 301
cmd64x 0000:00:05.0: unable to enable IDE controller
cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
CMD64x_IDE 0000:00:05.0: BAR 0: can't reserve I/O region
[0x1fe02000500-0x1fe02000507]
cmd64x 0000:00:05.0: can't reserve resources
CMD64x_IDE: probe of 0000:00:05.0 failed with error -16
ide-gd driver 1.18
ide-cd driver 5.00
mice: PS/2 mouse device common for all mice
turn off boot console earlyprom0

So PCI probe seems to work. The errors show some bugs with the APB bridges.
Igor V. Kovalenko - Jan. 11, 2010, 10:33 p.m.
On Tue, Jan 12, 2010 at 12:29 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Sun, Jan 10, 2010 at 6:41 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
>> On Sun, Jan 3, 2010 at 7:18 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
>>> On Sun, Jan 3, 2010 at 6:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>>>> On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
>>>>>
>>>>> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
>>>>>
>>>>> > On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>>>>> >>
>>>>> >> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>>>>> >>
>>>>> >>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>>>>> >>>>
>>>>> >>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>>>>> >>>>
>>>>> >>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>>>> >>>>>> Different host buses may have different layouts for config space accessors.
>>>>> >>>>>>
>>>>> >>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>>>> >>>>>> attached) devices:
>>>>> >>>>>>
>>>>> >>>>>> #define MACRISC_CFA0(devfn, off)        \
>>>>> >>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>>>> >>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>>>> >>>>>>      | (((unsigned int)(off)) & 0xFCUL))
>>>>> >>>>>>
>>>>> >>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>>>> >>>>>> order to let host controllers take some influence there, we can just
>>>>> >>>>>> introduce a converter function that takes whatever accessor we have and
>>>>> >>>>>> makes a qemu understandable one out of it.
>>>>> >>>>>>
>>>>> >>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>>>> >>>>>>
>>>>> >>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>>> >>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>>> >>>>>
>>>>> >>>>> Thanks!
>>>>> >>>>>
>>>>> >>>>> It always bothered me that the code in pci_host uses x86 encoding and
>>>>> >>>>> other architectures are converted to x86.  As long as we are adding
>>>>> >>>>> redirection, maybe we should get rid of this, and make get_config_reg
>>>>> >>>>> return register and devfn separately, so we don't need to encode/decode
>>>>> >>>>> back and forth?
>>>>> >>>>
>>>>> >>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>>>>> >>>>
>>>>> >>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>>>>> >>>>
>>>>> >>>>> OTOH if we are after a quick fix, won't it be simpler to have
>>>>> >>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>>>> >>>>> etc?
>>>>> >>>>
>>>>> >>>> Hm, I figured this is less work :-).
>>>>> >>>
>>>>> >>> Let me explain the idea: we have:
>>>>> >>>
>>>>> >>>   static void pci_host_config_writel_ioport(void *opaque,
>>>>> >>>                                             uint32_t addr, uint32_t val)
>>>>> >>>   {
>>>>> >>>       PCIHostState *s = opaque;
>>>>> >>>
>>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>>>>> >>>   val);
>>>>> >>>       s->config_reg = val;
>>>>> >>>   }
>>>>> >>>
>>>>> >>>   static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>>>>> >>>   addr)
>>>>> >>>   {
>>>>> >>>       PCIHostState *s = opaque;
>>>>> >>>       uint32_t val = s->config_reg;
>>>>> >>>
>>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>>>>> >>>   val);
>>>>> >>>       return val;
>>>>> >>>   }
>>>>> >>>
>>>>> >>>   void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>>>> >>>   {
>>>>> >>>       register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>>>>> >>>   s);
>>>>> >>>       register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>>>>> >>>   }
>>>>> >>>
>>>>> >>> If instead of a simple config_reg = val we translate to pc format
>>>>> >>> here, the rest will work. No?
>>>>> >>
>>>>> >> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
>>>>> >
>>>>> > Right.
>>>>> >
>>>>> >> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>>>>> >>
>>>>> >> Alex
>>>>> >
>>>>> > There's already ugliness with swap/noswap versions in there.  Anything
>>>>> > that claims to be a proper framework would need to address that as well
>>>>> > IMO.
>>>>>
>>>>> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
>>>>> incrementally improving the existing code?
>>>>
>>>> Not really, incremental improvements are good.  But what you posted does
>>>> not seem to clean up most code, what it really does is fixes ppc
>>>> unin_pci.  My feeling was since there's only one user for now maybe it
>>>> is better to just have device specific code rather than complicate
>>>> generic code. Makes sense?
>>>>
>>>> We need to see several users before we add yet another level of
>>>> indirection.  If there is a level of indirection that solves the
>>>> swap/noswap ugliness as well that would show this is a good abstraction.
>>>> I think I even know what a good abstraction would be: decode address on
>>>> write and keep it in decoded form.
>>>
>>> I think Sparc64 PBM configuration cycles are also a bit different.
>>> It's described in UltraSPARC User's Manual 805-0087, p.325.
>>>
>>> Currently both QEMU and OpenBIOS just use something common, but as
>>> soon as Linux boot gets so far as to probe PCI bus, we'd have to fix
>>> that.
>>
>> That time is very close, with latest QEMU and OpenBIOS, PCI probe
>> starts (with GDB tricks for calibrate_delay, which won't be needed
>> after %tick_cmpr is fixed):
>
> Now I get this:
> [sparc64] Kernel already loaded
>
> PROMLIB: Sun IEEE Boot Prom 'OBP 3.10.24 1999/01/01 01:01'
> PROMLIB: Root node compatible: sun4u
> Linux version 2.6.32 (test@host) (gcc version 4.2.4) #3 Sat Jan 9
> 20:36:42 UTC 2010
> bootconsole [earlyprom0] enabled
> ARCH: SUN4U
> Ethernet address: 52:54:00:12:34:56
> Kernel: Using 1 locked TLB entries for main kernel image.
> Remapping the kernel... done.
> OF stdout device is: /pci@1fe,0/pci@1/pci@1,1/ebus@3/su@1fe
> PROM: Built device tree with 32176 bytes of memory.
> Top of RAM: 0x7e80000, Total RAM: 0x7e80000
> Memory hole size: 0MB
> Zone PFN ranges:
>  Normal   0x00000000 -> 0x00003f40
> Movable zone start PFN for each node
> early_node_map[1] active PFN ranges
>    0: 0x00000000 -> 0x00003f40
> On node 0 totalpages: 16192
>  Normal zone: 127 pages used for memmap
>  Normal zone: 0 pages reserved
>  Normal zone: 16065 pages, LIFO batch:3
> Booting Linux...
> Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 16065
> Kernel command line: root=/dev/hda1 console=ttyS0 -p debug lpj=100000
> PID hash table entries: 512 (order: -1, 4096 bytes)
> Dentry cache hash table entries: 16384 (order: 4, 131072 bytes)
> Inode-cache hash table entries: 8192 (order: 3, 65536 bytes)
> Memory: 118480k available (1472k kernel code, 488k data, 104k init)
> [fffff80000000000,0000000007e80000]
> SLUB: Genslabs=14, HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
> Hierarchical RCU implementation.
> NR_IRQS:255
> clocksource: mult[a0000] shift[16]
> clockevent: mult[19999999] shift[32]
> Console: colour dummy device 80x25
> Calibrating delay loop (skipped) preset value.. 50.00 BogoMIPS (lpj=100000)
> Mount-cache hash table entries: 512
> /pci: PCI IO[1fe02000000] MEM[1ff00000000]
> /pci: SABRE PCI Bus Module ver[0:0]
> PCI: Scanning PBM /pci
> ------------[ cut here ]------------
> WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
> sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
> Call Trace:
>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>  [00000000004e92ec] sysfs_add_one+0x8c/0xc0
>  [00000000004ea480] sysfs_do_create_link+0x80/0x140
>  [000000000053fc88] device_add+0x3c8/0x500
>  [0000000000513f34] pci_bus_add_child+0x14/0x80
>  [0000000000514144] pci_bus_add_devices+0x144/0x200
>  [00000000005140ec] pci_bus_add_devices+0xec/0x200
>  [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
>  [000000000056d6f0] sabre_probe+0x2d0/0x5a0
>  [0000000000569d9c] of_platform_device_probe+0x3c/0x80
>  [00000000005427b0] driver_probe_device+0x70/0x160
>  [0000000000542918] __driver_attach+0x78/0xa0
>  [0000000000541bcc] bus_for_each_dev+0x4c/0x80
>  [00000000005421dc] bus_add_driver+0x9c/0x240
>  [0000000000542c10] driver_register+0x50/0x160
>  [0000000000426a98] do_one_initcall+0x18/0x160
> ---[ end trace 139ce121c98e96c9 ]---
> pci 0000:00:01.1: Error adding bus, continuing
> ------------[ cut here ]------------
> WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
> sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
> Call Trace:
>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>  [00000000004e92ec] sysfs_add_one+0x8c/0xc0
>  [00000000004ea480] sysfs_do_create_link+0x80/0x140
>  [000000000053fc88] device_add+0x3c8/0x500
>  [0000000000513f34] pci_bus_add_child+0x14/0x80
>  [0000000000514144] pci_bus_add_devices+0x144/0x200
>  [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
>  [000000000056d6f0] sabre_probe+0x2d0/0x5a0
>  [0000000000569d9c] of_platform_device_probe+0x3c/0x80
>  [00000000005427b0] driver_probe_device+0x70/0x160
>  [0000000000542918] __driver_attach+0x78/0xa0
>  [0000000000541bcc] bus_for_each_dev+0x4c/0x80
>  [00000000005421dc] bus_add_driver+0x9c/0x240
>  [0000000000542c10] driver_register+0x50/0x160
>  [0000000000426a98] do_one_initcall+0x18/0x160
>  [00000000005f4274] kernel_init+0xd4/0x140
> ---[ end trace 139ce121c98e96ca ]---
> pci 0000:00:01.0: Error adding bus, continuing
> bio: create slab <bio-0> at 0
> vgaarb: loaded
> Switching to clocksource tick
> io scheduler noop registered (default)
> ------------[ cut here ]------------
> WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
> proc_dir_entry 'pci/0000:00' already registered
> Call Trace:
>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>  [00000000004e2948] proc_register+0xe8/0x1c0
>  [00000000004e2c30] proc_mkdir_mode+0x30/0x60
>  [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
>  [00000000006027fc] pci_proc_init+0x5c/0xa0
>  [0000000000426a98] do_one_initcall+0x18/0x160
>  [00000000005f4274] kernel_init+0xd4/0x140
>  [000000000042b130] kernel_thread+0x30/0x60
>  [000000000056a8f8] rest_init+0x18/0x80
> ---[ end trace 139ce121c98e96cb ]---
> ------------[ cut here ]------------
> WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
> proc_dir_entry 'pci/0000:00' already registered
> Call Trace:
>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>  [00000000004e2948] proc_register+0xe8/0x1c0
>  [00000000004e2c30] proc_mkdir_mode+0x30/0x60
>  [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
>  [00000000006027fc] pci_proc_init+0x5c/0xa0
>  [0000000000426a98] do_one_initcall+0x18/0x160
>  [00000000005f4274] kernel_init+0xd4/0x140
>  [000000000042b130] kernel_thread+0x30/0x60
>  [000000000056a8f8] rest_init+0x18/0x80
> ---[ end trace 139ce121c98e96cc ]---
> Uniform Multi-Platform E-IDE driver
> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
> PCI: Enabling device: (0000:00:05.0), cmd 301
> cmd64x 0000:00:05.0: unable to enable IDE controller
> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
> CMD64x_IDE 0000:00:05.0: BAR 0: can't reserve I/O region
> [0x1fe02000500-0x1fe02000507]
> cmd64x 0000:00:05.0: can't reserve resources
> CMD64x_IDE: probe of 0000:00:05.0 failed with error -16
> ide-gd driver 1.18
> ide-cd driver 5.00
> mice: PS/2 mouse device common for all mice
> turn off boot console earlyprom0
>
> So PCI probe seems to work. The errors show some bugs with the APB bridges.

You have just committed a bug, writes and reads should list methods in
{b,w,l} order.
See cpu_register_io_memory_fixed where we do map byte access at index 0 etc..
Not shure if it helps solving current issue, but the code should look
like the following:

 static CPUWriteMemoryFunc * const apb_pci_config_writes[] = {
-    &apb_pci_config_writel,
-    &apb_pci_config_writew,
     &apb_pci_config_writeb,
+    &apb_pci_config_writew,
+    &apb_pci_config_writel,
 };

 static CPUReadMemoryFunc * const apb_pci_config_reads[] = {
-    &apb_pci_config_readl,
-    &apb_pci_config_readw,
     &apb_pci_config_readb,
+    &apb_pci_config_readw,
+    &apb_pci_config_readl,
 };
Blue Swirl - Jan. 12, 2010, 7:29 p.m.
On Mon, Jan 11, 2010 at 10:33 PM, Igor Kovalenko
<igor.v.kovalenko@gmail.com> wrote:
> On Tue, Jan 12, 2010 at 12:29 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
>> On Sun, Jan 10, 2010 at 6:41 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
>>> On Sun, Jan 3, 2010 at 7:18 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
>>>> On Sun, Jan 3, 2010 at 6:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>>>>> On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
>>>>>>
>>>>>> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
>>>>>>
>>>>>> > On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>>>>>> >>
>>>>>> >> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>>>>>> >>
>>>>>> >>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>>>>>> >>>>
>>>>>> >>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>>>>>> >>>>
>>>>>> >>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>>>>> >>>>>> Different host buses may have different layouts for config space accessors.
>>>>>> >>>>>>
>>>>>> >>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>>>>> >>>>>> attached) devices:
>>>>>> >>>>>>
>>>>>> >>>>>> #define MACRISC_CFA0(devfn, off)        \
>>>>>> >>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>>>>> >>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>>>>> >>>>>>      | (((unsigned int)(off)) & 0xFCUL))
>>>>>> >>>>>>
>>>>>> >>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>>>>> >>>>>> order to let host controllers take some influence there, we can just
>>>>>> >>>>>> introduce a converter function that takes whatever accessor we have and
>>>>>> >>>>>> makes a qemu understandable one out of it.
>>>>>> >>>>>>
>>>>>> >>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>>>>> >>>>>>
>>>>>> >>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>>>> >>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>>>> >>>>>
>>>>>> >>>>> Thanks!
>>>>>> >>>>>
>>>>>> >>>>> It always bothered me that the code in pci_host uses x86 encoding and
>>>>>> >>>>> other architectures are converted to x86.  As long as we are adding
>>>>>> >>>>> redirection, maybe we should get rid of this, and make get_config_reg
>>>>>> >>>>> return register and devfn separately, so we don't need to encode/decode
>>>>>> >>>>> back and forth?
>>>>>> >>>>
>>>>>> >>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>>>>>> >>>>
>>>>>> >>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>>>>>> >>>>
>>>>>> >>>>> OTOH if we are after a quick fix, won't it be simpler to have
>>>>>> >>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>>>>> >>>>> etc?
>>>>>> >>>>
>>>>>> >>>> Hm, I figured this is less work :-).
>>>>>> >>>
>>>>>> >>> Let me explain the idea: we have:
>>>>>> >>>
>>>>>> >>>   static void pci_host_config_writel_ioport(void *opaque,
>>>>>> >>>                                             uint32_t addr, uint32_t val)
>>>>>> >>>   {
>>>>>> >>>       PCIHostState *s = opaque;
>>>>>> >>>
>>>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>>>>>> >>>   val);
>>>>>> >>>       s->config_reg = val;
>>>>>> >>>   }
>>>>>> >>>
>>>>>> >>>   static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>>>>>> >>>   addr)
>>>>>> >>>   {
>>>>>> >>>       PCIHostState *s = opaque;
>>>>>> >>>       uint32_t val = s->config_reg;
>>>>>> >>>
>>>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>>>>>> >>>   val);
>>>>>> >>>       return val;
>>>>>> >>>   }
>>>>>> >>>
>>>>>> >>>   void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>>>>> >>>   {
>>>>>> >>>       register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>>>>>> >>>   s);
>>>>>> >>>       register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>>>>>> >>>   }
>>>>>> >>>
>>>>>> >>> If instead of a simple config_reg = val we translate to pc format
>>>>>> >>> here, the rest will work. No?
>>>>>> >>
>>>>>> >> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
>>>>>> >
>>>>>> > Right.
>>>>>> >
>>>>>> >> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>>>>>> >>
>>>>>> >> Alex
>>>>>> >
>>>>>> > There's already ugliness with swap/noswap versions in there.  Anything
>>>>>> > that claims to be a proper framework would need to address that as well
>>>>>> > IMO.
>>>>>>
>>>>>> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
>>>>>> incrementally improving the existing code?
>>>>>
>>>>> Not really, incremental improvements are good.  But what you posted does
>>>>> not seem to clean up most code, what it really does is fixes ppc
>>>>> unin_pci.  My feeling was since there's only one user for now maybe it
>>>>> is better to just have device specific code rather than complicate
>>>>> generic code. Makes sense?
>>>>>
>>>>> We need to see several users before we add yet another level of
>>>>> indirection.  If there is a level of indirection that solves the
>>>>> swap/noswap ugliness as well that would show this is a good abstraction.
>>>>> I think I even know what a good abstraction would be: decode address on
>>>>> write and keep it in decoded form.
>>>>
>>>> I think Sparc64 PBM configuration cycles are also a bit different.
>>>> It's described in UltraSPARC User's Manual 805-0087, p.325.
>>>>
>>>> Currently both QEMU and OpenBIOS just use something common, but as
>>>> soon as Linux boot gets so far as to probe PCI bus, we'd have to fix
>>>> that.
>>>
>>> That time is very close, with latest QEMU and OpenBIOS, PCI probe
>>> starts (with GDB tricks for calibrate_delay, which won't be needed
>>> after %tick_cmpr is fixed):
>>
>> Now I get this:
>> [sparc64] Kernel already loaded
>>
>> PROMLIB: Sun IEEE Boot Prom 'OBP 3.10.24 1999/01/01 01:01'
>> PROMLIB: Root node compatible: sun4u
>> Linux version 2.6.32 (test@host) (gcc version 4.2.4) #3 Sat Jan 9
>> 20:36:42 UTC 2010
>> bootconsole [earlyprom0] enabled
>> ARCH: SUN4U
>> Ethernet address: 52:54:00:12:34:56
>> Kernel: Using 1 locked TLB entries for main kernel image.
>> Remapping the kernel... done.
>> OF stdout device is: /pci@1fe,0/pci@1/pci@1,1/ebus@3/su@1fe
>> PROM: Built device tree with 32176 bytes of memory.
>> Top of RAM: 0x7e80000, Total RAM: 0x7e80000
>> Memory hole size: 0MB
>> Zone PFN ranges:
>>  Normal   0x00000000 -> 0x00003f40
>> Movable zone start PFN for each node
>> early_node_map[1] active PFN ranges
>>    0: 0x00000000 -> 0x00003f40
>> On node 0 totalpages: 16192
>>  Normal zone: 127 pages used for memmap
>>  Normal zone: 0 pages reserved
>>  Normal zone: 16065 pages, LIFO batch:3
>> Booting Linux...
>> Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 16065
>> Kernel command line: root=/dev/hda1 console=ttyS0 -p debug lpj=100000
>> PID hash table entries: 512 (order: -1, 4096 bytes)
>> Dentry cache hash table entries: 16384 (order: 4, 131072 bytes)
>> Inode-cache hash table entries: 8192 (order: 3, 65536 bytes)
>> Memory: 118480k available (1472k kernel code, 488k data, 104k init)
>> [fffff80000000000,0000000007e80000]
>> SLUB: Genslabs=14, HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
>> Hierarchical RCU implementation.
>> NR_IRQS:255
>> clocksource: mult[a0000] shift[16]
>> clockevent: mult[19999999] shift[32]
>> Console: colour dummy device 80x25
>> Calibrating delay loop (skipped) preset value.. 50.00 BogoMIPS (lpj=100000)
>> Mount-cache hash table entries: 512
>> /pci: PCI IO[1fe02000000] MEM[1ff00000000]
>> /pci: SABRE PCI Bus Module ver[0:0]
>> PCI: Scanning PBM /pci
>> ------------[ cut here ]------------
>> WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
>> sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
>> Call Trace:
>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>  [00000000004e92ec] sysfs_add_one+0x8c/0xc0
>>  [00000000004ea480] sysfs_do_create_link+0x80/0x140
>>  [000000000053fc88] device_add+0x3c8/0x500
>>  [0000000000513f34] pci_bus_add_child+0x14/0x80
>>  [0000000000514144] pci_bus_add_devices+0x144/0x200
>>  [00000000005140ec] pci_bus_add_devices+0xec/0x200
>>  [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
>>  [000000000056d6f0] sabre_probe+0x2d0/0x5a0
>>  [0000000000569d9c] of_platform_device_probe+0x3c/0x80
>>  [00000000005427b0] driver_probe_device+0x70/0x160
>>  [0000000000542918] __driver_attach+0x78/0xa0
>>  [0000000000541bcc] bus_for_each_dev+0x4c/0x80
>>  [00000000005421dc] bus_add_driver+0x9c/0x240
>>  [0000000000542c10] driver_register+0x50/0x160
>>  [0000000000426a98] do_one_initcall+0x18/0x160
>> ---[ end trace 139ce121c98e96c9 ]---
>> pci 0000:00:01.1: Error adding bus, continuing
>> ------------[ cut here ]------------
>> WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
>> sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
>> Call Trace:
>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>  [00000000004e92ec] sysfs_add_one+0x8c/0xc0
>>  [00000000004ea480] sysfs_do_create_link+0x80/0x140
>>  [000000000053fc88] device_add+0x3c8/0x500
>>  [0000000000513f34] pci_bus_add_child+0x14/0x80
>>  [0000000000514144] pci_bus_add_devices+0x144/0x200
>>  [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
>>  [000000000056d6f0] sabre_probe+0x2d0/0x5a0
>>  [0000000000569d9c] of_platform_device_probe+0x3c/0x80
>>  [00000000005427b0] driver_probe_device+0x70/0x160
>>  [0000000000542918] __driver_attach+0x78/0xa0
>>  [0000000000541bcc] bus_for_each_dev+0x4c/0x80
>>  [00000000005421dc] bus_add_driver+0x9c/0x240
>>  [0000000000542c10] driver_register+0x50/0x160
>>  [0000000000426a98] do_one_initcall+0x18/0x160
>>  [00000000005f4274] kernel_init+0xd4/0x140
>> ---[ end trace 139ce121c98e96ca ]---
>> pci 0000:00:01.0: Error adding bus, continuing
>> bio: create slab <bio-0> at 0
>> vgaarb: loaded
>> Switching to clocksource tick
>> io scheduler noop registered (default)
>> ------------[ cut here ]------------
>> WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
>> proc_dir_entry 'pci/0000:00' already registered
>> Call Trace:
>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>  [00000000004e2948] proc_register+0xe8/0x1c0
>>  [00000000004e2c30] proc_mkdir_mode+0x30/0x60
>>  [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
>>  [00000000006027fc] pci_proc_init+0x5c/0xa0
>>  [0000000000426a98] do_one_initcall+0x18/0x160
>>  [00000000005f4274] kernel_init+0xd4/0x140
>>  [000000000042b130] kernel_thread+0x30/0x60
>>  [000000000056a8f8] rest_init+0x18/0x80
>> ---[ end trace 139ce121c98e96cb ]---
>> ------------[ cut here ]------------
>> WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
>> proc_dir_entry 'pci/0000:00' already registered
>> Call Trace:
>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>  [00000000004e2948] proc_register+0xe8/0x1c0
>>  [00000000004e2c30] proc_mkdir_mode+0x30/0x60
>>  [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
>>  [00000000006027fc] pci_proc_init+0x5c/0xa0
>>  [0000000000426a98] do_one_initcall+0x18/0x160
>>  [00000000005f4274] kernel_init+0xd4/0x140
>>  [000000000042b130] kernel_thread+0x30/0x60
>>  [000000000056a8f8] rest_init+0x18/0x80
>> ---[ end trace 139ce121c98e96cc ]---
>> Uniform Multi-Platform E-IDE driver
>> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
>> PCI: Enabling device: (0000:00:05.0), cmd 301
>> cmd64x 0000:00:05.0: unable to enable IDE controller
>> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
>> CMD64x_IDE 0000:00:05.0: BAR 0: can't reserve I/O region
>> [0x1fe02000500-0x1fe02000507]
>> cmd64x 0000:00:05.0: can't reserve resources
>> CMD64x_IDE: probe of 0000:00:05.0 failed with error -16
>> ide-gd driver 1.18
>> ide-cd driver 5.00
>> mice: PS/2 mouse device common for all mice
>> turn off boot console earlyprom0
>>
>> So PCI probe seems to work. The errors show some bugs with the APB bridges.
>
> You have just committed a bug, writes and reads should list methods in
> {b,w,l} order.
> See cpu_register_io_memory_fixed where we do map byte access at index 0 etc..
> Not shure if it helps solving current issue, but the code should look
> like the following:
>
>  static CPUWriteMemoryFunc * const apb_pci_config_writes[] = {
> -    &apb_pci_config_writel,
> -    &apb_pci_config_writew,
>     &apb_pci_config_writeb,
> +    &apb_pci_config_writew,
> +    &apb_pci_config_writel,
>  };
>
>  static CPUReadMemoryFunc * const apb_pci_config_reads[] = {
> -    &apb_pci_config_readl,
> -    &apb_pci_config_readw,
>     &apb_pci_config_readb,
> +    &apb_pci_config_readw,
> +    &apb_pci_config_readl,
>  };

Good catch. I actually did try the correct order, but maybe I edited
the wrong place and it didn't work then.

With your change, CMD646 revision is shown correctly:
cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x01)
PCI: Enabling device: (0000:00:05.0), cmd 301
cmd64x 0000:00:05.0: unable to enable IDE controller
cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x01)
Blue Swirl - Jan. 18, 2010, 7:47 p.m.
On Tue, Jan 12, 2010 at 7:29 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
> On Mon, Jan 11, 2010 at 10:33 PM, Igor Kovalenko
> <igor.v.kovalenko@gmail.com> wrote:
>> On Tue, Jan 12, 2010 at 12:29 AM, Blue Swirl <blauwirbel@gmail.com> wrote:
>>> On Sun, Jan 10, 2010 at 6:41 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
>>>> On Sun, Jan 3, 2010 at 7:18 PM, Blue Swirl <blauwirbel@gmail.com> wrote:
>>>>> On Sun, Jan 3, 2010 at 6:06 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>>>>>> On Sun, Jan 03, 2010 at 06:50:15PM +0100, Alexander Graf wrote:
>>>>>>>
>>>>>>> On 03.01.2010, at 18:44, Michael S. Tsirkin wrote:
>>>>>>>
>>>>>>> > On Sun, Jan 03, 2010 at 06:40:52PM +0100, Alexander Graf wrote:
>>>>>>> >>
>>>>>>> >> On 03.01.2010, at 18:29, Michael S. Tsirkin wrote:
>>>>>>> >>
>>>>>>> >>> On Sun, Jan 03, 2010 at 05:09:32PM +0100, Alexander Graf wrote:
>>>>>>> >>>>
>>>>>>> >>>> On 03.01.2010, at 16:45, Michael S. Tsirkin wrote:
>>>>>>> >>>>
>>>>>>> >>>>> On Sun, Jan 03, 2010 at 02:50:45AM +0100, Alexander Graf wrote:
>>>>>>> >>>>>> Different host buses may have different layouts for config space accessors.
>>>>>>> >>>>>>
>>>>>>> >>>>>> The Mac U3 for example uses the following define to access Type 0 (directly
>>>>>>> >>>>>> attached) devices:
>>>>>>> >>>>>>
>>>>>>> >>>>>> #define MACRISC_CFA0(devfn, off)        \
>>>>>>> >>>>>>      ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
>>>>>>> >>>>>>      | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
>>>>>>> >>>>>>      | (((unsigned int)(off)) & 0xFCUL))
>>>>>>> >>>>>>
>>>>>>> >>>>>> Obviously, this is different from the encoding we interpret in qemu. In
>>>>>>> >>>>>> order to let host controllers take some influence there, we can just
>>>>>>> >>>>>> introduce a converter function that takes whatever accessor we have and
>>>>>>> >>>>>> makes a qemu understandable one out of it.
>>>>>>> >>>>>>
>>>>>>> >>>>>> This patch is the groundwork for Uninorth PCI config space fixes.
>>>>>>> >>>>>>
>>>>>>> >>>>>> Signed-off-by: Alexander Graf <agraf@suse.de>
>>>>>>> >>>>>> CC: Michael S. Tsirkin <mst@redhat.com>
>>>>>>> >>>>>
>>>>>>> >>>>> Thanks!
>>>>>>> >>>>>
>>>>>>> >>>>> It always bothered me that the code in pci_host uses x86 encoding and
>>>>>>> >>>>> other architectures are converted to x86.  As long as we are adding
>>>>>>> >>>>> redirection, maybe we should get rid of this, and make get_config_reg
>>>>>>> >>>>> return register and devfn separately, so we don't need to encode/decode
>>>>>>> >>>>> back and forth?
>>>>>>> >>>>
>>>>>>> >>>> Hmm, when touching code I'm not 100% sure what I'm doing I usually try to build on stuff that is there already. That's exactly what happened here.
>>>>>>> >>>>
>>>>>>> >>>> I'm personally not against defining the x86 format as qemu default. That way everyone knows what to deal with. I'm not sure if PCI defines anything that couldn't be represented by the x86 encoding in 32 bits. I actually doubt it. So it seems like a pretty good fit, especially given that all the other code is already in place.
>>>>>>> >>>>
>>>>>>> >>>>> OTOH if we are after a quick fix, won't it be simpler to have
>>>>>>> >>>>> unin_pci simply use its own routines instead of pci_host_conf_register_mmio
>>>>>>> >>>>> etc?
>>>>>>> >>>>
>>>>>>> >>>> Hm, I figured this is less work :-).
>>>>>>> >>>
>>>>>>> >>> Let me explain the idea: we have:
>>>>>>> >>>
>>>>>>> >>>   static void pci_host_config_writel_ioport(void *opaque,
>>>>>>> >>>                                             uint32_t addr, uint32_t val)
>>>>>>> >>>   {
>>>>>>> >>>       PCIHostState *s = opaque;
>>>>>>> >>>
>>>>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32 " val %"PRIx32"\n", __func__, addr,
>>>>>>> >>>   val);
>>>>>>> >>>       s->config_reg = val;
>>>>>>> >>>   }
>>>>>>> >>>
>>>>>>> >>>   static uint32_t pci_host_config_readl_ioport(void *opaque, uint32_t
>>>>>>> >>>   addr)
>>>>>>> >>>   {
>>>>>>> >>>       PCIHostState *s = opaque;
>>>>>>> >>>       uint32_t val = s->config_reg;
>>>>>>> >>>
>>>>>>> >>>       PCI_DPRINTF("%s addr %"PRIx32" val %"PRIx32"\n", __func__, addr,
>>>>>>> >>>   val);
>>>>>>> >>>       return val;
>>>>>>> >>>   }
>>>>>>> >>>
>>>>>>> >>>   void pci_host_conf_register_ioport(pio_addr_t ioport, PCIHostState *s)
>>>>>>> >>>   {
>>>>>>> >>>       register_ioport_write(ioport, 4, 4, pci_host_config_writel_ioport,
>>>>>>> >>>   s);
>>>>>>> >>>       register_ioport_read(ioport, 4, 4, pci_host_config_readl_ioport, s);
>>>>>>> >>>   }
>>>>>>> >>>
>>>>>>> >>> If instead of a simple config_reg = val we translate to pc format
>>>>>>> >>> here, the rest will work. No?
>>>>>>> >>
>>>>>>> >> Well, that'd mean I'd have to implement a config_reg read function and do the conversion backwards again there. Or I could just save it off in the unin state ... hmm ...
>>>>>>> >
>>>>>>> > Right.
>>>>>>> >
>>>>>>> >> Either way, let's better get this done right. I'd rather want to have a proper framework in place in case someone else stumbles across something similar.
>>>>>>> >>
>>>>>>> >> Alex
>>>>>>> >
>>>>>>> > There's already ugliness with swap/noswap versions in there.  Anything
>>>>>>> > that claims to be a proper framework would need to address that as well
>>>>>>> > IMO.
>>>>>>>
>>>>>>> Hm, so you'd rather wait for 5 years to have an all-in-one rework than
>>>>>>> incrementally improving the existing code?
>>>>>>
>>>>>> Not really, incremental improvements are good.  But what you posted does
>>>>>> not seem to clean up most code, what it really does is fixes ppc
>>>>>> unin_pci.  My feeling was since there's only one user for now maybe it
>>>>>> is better to just have device specific code rather than complicate
>>>>>> generic code. Makes sense?
>>>>>>
>>>>>> We need to see several users before we add yet another level of
>>>>>> indirection.  If there is a level of indirection that solves the
>>>>>> swap/noswap ugliness as well that would show this is a good abstraction.
>>>>>> I think I even know what a good abstraction would be: decode address on
>>>>>> write and keep it in decoded form.
>>>>>
>>>>> I think Sparc64 PBM configuration cycles are also a bit different.
>>>>> It's described in UltraSPARC User's Manual 805-0087, p.325.
>>>>>
>>>>> Currently both QEMU and OpenBIOS just use something common, but as
>>>>> soon as Linux boot gets so far as to probe PCI bus, we'd have to fix
>>>>> that.
>>>>
>>>> That time is very close, with latest QEMU and OpenBIOS, PCI probe
>>>> starts (with GDB tricks for calibrate_delay, which won't be needed
>>>> after %tick_cmpr is fixed):
>>>
>>> Now I get this:
>>> [sparc64] Kernel already loaded
>>>
>>> PROMLIB: Sun IEEE Boot Prom 'OBP 3.10.24 1999/01/01 01:01'
>>> PROMLIB: Root node compatible: sun4u
>>> Linux version 2.6.32 (test@host) (gcc version 4.2.4) #3 Sat Jan 9
>>> 20:36:42 UTC 2010
>>> bootconsole [earlyprom0] enabled
>>> ARCH: SUN4U
>>> Ethernet address: 52:54:00:12:34:56
>>> Kernel: Using 1 locked TLB entries for main kernel image.
>>> Remapping the kernel... done.
>>> OF stdout device is: /pci@1fe,0/pci@1/pci@1,1/ebus@3/su@1fe
>>> PROM: Built device tree with 32176 bytes of memory.
>>> Top of RAM: 0x7e80000, Total RAM: 0x7e80000
>>> Memory hole size: 0MB
>>> Zone PFN ranges:
>>>  Normal   0x00000000 -> 0x00003f40
>>> Movable zone start PFN for each node
>>> early_node_map[1] active PFN ranges
>>>    0: 0x00000000 -> 0x00003f40
>>> On node 0 totalpages: 16192
>>>  Normal zone: 127 pages used for memmap
>>>  Normal zone: 0 pages reserved
>>>  Normal zone: 16065 pages, LIFO batch:3
>>> Booting Linux...
>>> Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 16065
>>> Kernel command line: root=/dev/hda1 console=ttyS0 -p debug lpj=100000
>>> PID hash table entries: 512 (order: -1, 4096 bytes)
>>> Dentry cache hash table entries: 16384 (order: 4, 131072 bytes)
>>> Inode-cache hash table entries: 8192 (order: 3, 65536 bytes)
>>> Memory: 118480k available (1472k kernel code, 488k data, 104k init)
>>> [fffff80000000000,0000000007e80000]
>>> SLUB: Genslabs=14, HWalign=32, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
>>> Hierarchical RCU implementation.
>>> NR_IRQS:255
>>> clocksource: mult[a0000] shift[16]
>>> clockevent: mult[19999999] shift[32]
>>> Console: colour dummy device 80x25
>>> Calibrating delay loop (skipped) preset value.. 50.00 BogoMIPS (lpj=100000)
>>> Mount-cache hash table entries: 512
>>> /pci: PCI IO[1fe02000000] MEM[1ff00000000]
>>> /pci: SABRE PCI Bus Module ver[0:0]
>>> PCI: Scanning PBM /pci
>>> ------------[ cut here ]------------
>>> WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
>>> sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
>>> Call Trace:
>>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>>  [00000000004e92ec] sysfs_add_one+0x8c/0xc0
>>>  [00000000004ea480] sysfs_do_create_link+0x80/0x140
>>>  [000000000053fc88] device_add+0x3c8/0x500
>>>  [0000000000513f34] pci_bus_add_child+0x14/0x80
>>>  [0000000000514144] pci_bus_add_devices+0x144/0x200
>>>  [00000000005140ec] pci_bus_add_devices+0xec/0x200
>>>  [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
>>>  [000000000056d6f0] sabre_probe+0x2d0/0x5a0
>>>  [0000000000569d9c] of_platform_device_probe+0x3c/0x80
>>>  [00000000005427b0] driver_probe_device+0x70/0x160
>>>  [0000000000542918] __driver_attach+0x78/0xa0
>>>  [0000000000541bcc] bus_for_each_dev+0x4c/0x80
>>>  [00000000005421dc] bus_add_driver+0x9c/0x240
>>>  [0000000000542c10] driver_register+0x50/0x160
>>>  [0000000000426a98] do_one_initcall+0x18/0x160
>>> ---[ end trace 139ce121c98e96c9 ]---
>>> pci 0000:00:01.1: Error adding bus, continuing
>>> ------------[ cut here ]------------
>>> WARNING: at /src/linux.git/fs/sysfs/dir.c:491 sysfs_add_one+0x8c/0xc0()
>>> sysfs: cannot create duplicate filename '/class/pci_bus/0000:00'
>>> Call Trace:
>>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>>  [00000000004e92ec] sysfs_add_one+0x8c/0xc0
>>>  [00000000004ea480] sysfs_do_create_link+0x80/0x140
>>>  [000000000053fc88] device_add+0x3c8/0x500
>>>  [0000000000513f34] pci_bus_add_child+0x14/0x80
>>>  [0000000000514144] pci_bus_add_devices+0x144/0x200
>>>  [000000000056ceac] pci_scan_one_pbm+0x6c/0xa0
>>>  [000000000056d6f0] sabre_probe+0x2d0/0x5a0
>>>  [0000000000569d9c] of_platform_device_probe+0x3c/0x80
>>>  [00000000005427b0] driver_probe_device+0x70/0x160
>>>  [0000000000542918] __driver_attach+0x78/0xa0
>>>  [0000000000541bcc] bus_for_each_dev+0x4c/0x80
>>>  [00000000005421dc] bus_add_driver+0x9c/0x240
>>>  [0000000000542c10] driver_register+0x50/0x160
>>>  [0000000000426a98] do_one_initcall+0x18/0x160
>>>  [00000000005f4274] kernel_init+0xd4/0x140
>>> ---[ end trace 139ce121c98e96ca ]---
>>> pci 0000:00:01.0: Error adding bus, continuing
>>> bio: create slab <bio-0> at 0
>>> vgaarb: loaded
>>> Switching to clocksource tick
>>> io scheduler noop registered (default)
>>> ------------[ cut here ]------------
>>> WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
>>> proc_dir_entry 'pci/0000:00' already registered
>>> Call Trace:
>>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>>  [00000000004e2948] proc_register+0xe8/0x1c0
>>>  [00000000004e2c30] proc_mkdir_mode+0x30/0x60
>>>  [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
>>>  [00000000006027fc] pci_proc_init+0x5c/0xa0
>>>  [0000000000426a98] do_one_initcall+0x18/0x160
>>>  [00000000005f4274] kernel_init+0xd4/0x140
>>>  [000000000042b130] kernel_thread+0x30/0x60
>>>  [000000000056a8f8] rest_init+0x18/0x80
>>> ---[ end trace 139ce121c98e96cb ]---
>>> ------------[ cut here ]------------
>>> WARNING: at /src/linux.git/fs/proc/generic.c:590 proc_register+0xe8/0x1c0()
>>> proc_dir_entry 'pci/0000:00' already registered
>>> Call Trace:
>>>  [0000000000450ea8] warn_slowpath_fmt+0x28/0x40
>>>  [00000000004e2948] proc_register+0xe8/0x1c0
>>>  [00000000004e2c30] proc_mkdir_mode+0x30/0x60
>>>  [000000000051ce14] pci_proc_attach_device+0xb4/0xe0
>>>  [00000000006027fc] pci_proc_init+0x5c/0xa0
>>>  [0000000000426a98] do_one_initcall+0x18/0x160
>>>  [00000000005f4274] kernel_init+0xd4/0x140
>>>  [000000000042b130] kernel_thread+0x30/0x60
>>>  [000000000056a8f8] rest_init+0x18/0x80
>>> ---[ end trace 139ce121c98e96cc ]---
>>> Uniform Multi-Platform E-IDE driver
>>> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
>>> PCI: Enabling device: (0000:00:05.0), cmd 301
>>> cmd64x 0000:00:05.0: unable to enable IDE controller
>>> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x00)
>>> CMD64x_IDE 0000:00:05.0: BAR 0: can't reserve I/O region
>>> [0x1fe02000500-0x1fe02000507]
>>> cmd64x 0000:00:05.0: can't reserve resources
>>> CMD64x_IDE: probe of 0000:00:05.0 failed with error -16
>>> ide-gd driver 1.18
>>> ide-cd driver 5.00
>>> mice: PS/2 mouse device common for all mice
>>> turn off boot console earlyprom0
>>>
>>> So PCI probe seems to work. The errors show some bugs with the APB bridges.
>>
>> You have just committed a bug, writes and reads should list methods in
>> {b,w,l} order.
>> See cpu_register_io_memory_fixed where we do map byte access at index 0 etc..
>> Not shure if it helps solving current issue, but the code should look
>> like the following:
>>
>>  static CPUWriteMemoryFunc * const apb_pci_config_writes[] = {
>> -    &apb_pci_config_writel,
>> -    &apb_pci_config_writew,
>>     &apb_pci_config_writeb,
>> +    &apb_pci_config_writew,
>> +    &apb_pci_config_writel,
>>  };
>>
>>  static CPUReadMemoryFunc * const apb_pci_config_reads[] = {
>> -    &apb_pci_config_readl,
>> -    &apb_pci_config_readw,
>>     &apb_pci_config_readb,
>> +    &apb_pci_config_readw,
>> +    &apb_pci_config_readl,
>>  };
>
> Good catch. I actually did try the correct order, but maybe I edited
> the wrong place and it didn't work then.
>
> With your change, CMD646 revision is shown correctly:
> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x01)
> PCI: Enabling device: (0000:00:05.0), cmd 301
> cmd64x 0000:00:05.0: unable to enable IDE controller
> cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x01)

Maybe there is some other bug with the revision field. Just for fun, I
enabled virtio for Sparc64 but Linux sees the zero revision field as 1
(used as ABI version):

cmd64x 0000:00:05.0: IDE controller (0x1095:0x0646 rev 0x01)
CMD64x_IDE 0000:00:05.0: BAR 0: can't reserve I/O region
[0x1fe02000500-0x1fe02000507]
cmd64x 0000:00:05.0: can't reserve resources
CMD64x_IDE: probe of 0000:00:05.0 failed with error -16
ide-gd driver 1.18
ne2k-pci.c:v1.03 9/22/2003 D. Becker/P. Gortmaker
PCI: Enabling device: (0000:00:04.0), cmd 301
eth0: RealTek RTL-8029 found at 0x1fe02000400, IRQ 1, 52:54:00:12:34:56.
pcnet32.c:v1.35 21.Apr.2008 tsbogend@alpha.franken.de
mice: PS/2 mouse device common for all mice
virtio_pci: expected ABI version 0, got 1

However, RTL-8029 gets the MAC address read out correctly.

Patch

diff --git a/hw/apb_pci.c b/hw/apb_pci.c
index f05308b..fedb84c 100644
--- a/hw/apb_pci.c
+++ b/hw/apb_pci.c
@@ -222,6 +222,7 @@  PCIBus *pci_apb_init(target_phys_addr_t special_base,
     /* mem_data */
     sysbus_mmio_map(s, 3, mem_base);
     d = FROM_SYSBUS(APBState, s);
+    pci_host_init(&d->host_state);
     d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
                                          pci_apb_set_irq, pci_pbm_map_irq, pic,
                                          0, 32);
diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c
index ee4fed5..7feba63 100644
--- a/hw/grackle_pci.c
+++ b/hw/grackle_pci.c
@@ -88,6 +88,7 @@  PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic)
     qdev_init_nofail(dev);
     s = sysbus_from_qdev(dev);
     d = FROM_SYSBUS(GrackleState, s);
+    pci_host_init(&d->host_state);
     d->host_state.bus = pci_register_bus(&d->busdev.qdev, "pci",
                                          pci_grackle_set_irq,
                                          pci_grackle_map_irq,
diff --git a/hw/gt64xxx.c b/hw/gt64xxx.c
index fb7f5bd..8cf93ca 100644
--- a/hw/gt64xxx.c
+++ b/hw/gt64xxx.c
@@ -1120,6 +1120,7 @@  PCIBus *pci_gt64120_init(qemu_irq *pic)
     s = qemu_mallocz(sizeof(GT64120State));
     s->pci = qemu_mallocz(sizeof(GT64120PCIState));
 
+    pci_host_init(s->pci);
     s->pci->bus = pci_register_bus(NULL, "pci",
                                    pci_gt64120_set_irq, pci_gt64120_map_irq,
                                    pic, 144, 4);
diff --git a/hw/pci_host.c b/hw/pci_host.c
index eeb8dee..2d12887 100644
--- a/hw/pci_host.c
+++ b/hw/pci_host.c
@@ -228,3 +228,14 @@  void pci_host_data_register_ioport(pio_addr_t ioport, PCIHostState *s)
     register_ioport_read(ioport, 4, 2, pci_host_data_readw_ioport, s);
     register_ioport_read(ioport, 4, 4, pci_host_data_readl_ioport, s);
 }
+
+/* This implements the default x86 way of interpreting the config register */
+static uint32_t pci_host_get_config_reg(PCIHostState *s, uint32_t addr)
+{
+    return s->config_reg | (addr & 3);
+}
+
+void pci_host_init(PCIHostState *s)
+{
+    s->get_config_reg = pci_host_get_config_reg;
+}
diff --git a/hw/pci_host.h b/hw/pci_host.h
index a006687..90a4c63 100644
--- a/hw/pci_host.h
+++ b/hw/pci_host.h
@@ -30,14 +30,19 @@ 
 
 #include "sysbus.h"
 
+/* for config space access */
+typedef uint32_t (*pci_config_reg_fn)(PCIHostState *s, uint32_t addr);
+
 struct PCIHostState {
     SysBusDevice busdev;
+    pci_config_reg_fn get_config_reg;
     uint32_t config_reg;
     PCIBus *bus;
 };
 
 void pci_data_write(PCIBus *s, uint32_t addr, uint32_t val, int len);
 uint32_t pci_data_read(PCIBus *s, uint32_t addr, int len);
+void pci_host_init(PCIHostState *s);
 
 /* for mmio */
 int pci_host_conf_register_mmio(PCIHostState *s);
diff --git a/hw/pci_host_template.h b/hw/pci_host_template.h
index 11e6c88..55aa85e 100644
--- a/hw/pci_host_template.h
+++ b/hw/pci_host_template.h
@@ -29,48 +29,52 @@  static void glue(pci_host_data_writeb, PCI_HOST_SUFFIX)(
     void* opaque, PCI_ADDR_T addr, uint32_t val)
 {
     PCIHostState *s = opaque;
+    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
 
     PCI_DPRINTF("writeb addr " TARGET_FMT_plx " val %x\n",
                 (target_phys_addr_t)addr, val);
-    if (s->config_reg & (1u << 31))
-        pci_data_write(s->bus, s->config_reg | (addr & 3), val, 1);
+    if (config_reg & (1u << 31))
+        pci_data_write(s->bus, config_reg, val, 1);
 }
 
 static void glue(pci_host_data_writew, PCI_HOST_SUFFIX)(
     void* opaque, PCI_ADDR_T addr, uint32_t val)
 {
     PCIHostState *s = opaque;
+    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
 #ifdef TARGET_WORDS_BIGENDIAN
     val = bswap16(val);
 #endif
     PCI_DPRINTF("writew addr " TARGET_FMT_plx " val %x\n",
                 (target_phys_addr_t)addr, val);
-    if (s->config_reg & (1u << 31))
-        pci_data_write(s->bus, s->config_reg | (addr & 3), val, 2);
+    if (config_reg & (1u << 31))
+        pci_data_write(s->bus, config_reg, val, 2);
 }
 
 static void glue(pci_host_data_writel, PCI_HOST_SUFFIX)(
     void* opaque, PCI_ADDR_T addr, uint32_t val)
 {
     PCIHostState *s = opaque;
+    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
 #ifdef TARGET_WORDS_BIGENDIAN
     val = bswap32(val);
 #endif
     PCI_DPRINTF("writel addr " TARGET_FMT_plx " val %x\n",
                 (target_phys_addr_t)addr, val);
-    if (s->config_reg & (1u << 31))
-        pci_data_write(s->bus, s->config_reg, val, 4);
+    if (config_reg & (1u << 31))
+        pci_data_write(s->bus, config_reg, val, 4);
 }
 
 static uint32_t glue(pci_host_data_readb, PCI_HOST_SUFFIX)(
     void* opaque, PCI_ADDR_T addr)
 {
     PCIHostState *s = opaque;
+    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
     uint32_t val;
 
-    if (!(s->config_reg & (1 << 31)))
+    if (!(config_reg & (1 << 31)))
         return 0xff;
-    val = pci_data_read(s->bus, s->config_reg | (addr & 3), 1);
+    val = pci_data_read(s->bus, config_reg, 1);
     PCI_DPRINTF("readb addr " TARGET_FMT_plx " val %x\n",
                 (target_phys_addr_t)addr, val);
     return val;
@@ -80,10 +84,11 @@  static uint32_t glue(pci_host_data_readw, PCI_HOST_SUFFIX)(
     void* opaque, PCI_ADDR_T addr)
 {
     PCIHostState *s = opaque;
+    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
     uint32_t val;
-    if (!(s->config_reg & (1 << 31)))
+    if (!(config_reg & (1 << 31)))
         return 0xffff;
-    val = pci_data_read(s->bus, s->config_reg | (addr & 3), 2);
+    val = pci_data_read(s->bus, config_reg, 2);
     PCI_DPRINTF("readw addr " TARGET_FMT_plx " val %x\n",
                 (target_phys_addr_t)addr, val);
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -96,10 +101,11 @@  static uint32_t glue(pci_host_data_readl, PCI_HOST_SUFFIX)(
     void* opaque, PCI_ADDR_T addr)
 {
     PCIHostState *s = opaque;
+    uint32_t config_reg = s->get_config_reg(s, (uint32_t)addr);
     uint32_t val;
-    if (!(s->config_reg & (1 << 31)))
+    if (!(config_reg))
         return 0xffffffff;
-    val = pci_data_read(s->bus, s->config_reg | (addr & 3), 4);
+    val = pci_data_read(s->bus, config_reg, 4);
     PCI_DPRINTF("readl addr " TARGET_FMT_plx " val %x\n",
                 (target_phys_addr_t)addr, val);
 #ifdef TARGET_WORDS_BIGENDIAN
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 1b67475..97500e0 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -211,6 +211,7 @@  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *
 
     dev = qdev_create(NULL, "i440FX-pcihost");
     s = FROM_SYSBUS(I440FXState, sysbus_from_qdev(dev));
+    pci_host_init(s);
     b = pci_bus_new(&s->busdev.qdev, NULL, 0);
     s->bus = b;
     qdev_init_nofail(dev);
diff --git a/hw/ppc4xx_pci.c b/hw/ppc4xx_pci.c
index 2d00b61..dd8e290 100644
--- a/hw/ppc4xx_pci.c
+++ b/hw/ppc4xx_pci.c
@@ -357,6 +357,7 @@  PCIBus *ppc4xx_pci_init(CPUState *env, qemu_irq pci_irqs[4],
 
     controller = qemu_mallocz(sizeof(PPC4xxPCIState));
 
+    pci_host_init(&controller->pci_state);
     controller->pci_state.bus = pci_register_bus(NULL, "pci",
                                                  ppc4xx_pci_set_irq,
                                                  ppc4xx_pci_map_irq,
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index a72fb86..5914183 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -278,6 +278,7 @@  PCIBus *ppce500_pci_init(qemu_irq pci_irqs[4], target_phys_addr_t registers)
 
     controller = qemu_mallocz(sizeof(PPCE500PCIState));
 
+    pci_host_init(&controller->pci_state);
     controller->pci_state.bus = pci_register_bus(NULL, "pci",
                                                  mpc85xx_pci_set_irq,
                                                  mpc85xx_pci_map_irq,
diff --git a/hw/unin_pci.c b/hw/unin_pci.c
index 3ae4e7a..fdb9401 100644
--- a/hw/unin_pci.c
+++ b/hw/unin_pci.c
@@ -84,6 +84,7 @@  static int pci_unin_main_init_device(SysBusDevice *dev)
     /* Uninorth main bus */
     s = FROM_SYSBUS(UNINState, dev);
 
+    pci_host_init(&s->host_state);
     pci_mem_config = pci_host_conf_register_mmio(&s->host_state);
     pci_mem_data = pci_host_data_register_mmio(&s->host_state);
     sysbus_init_mmio(dev, 0x1000, pci_mem_config);