Patchwork [v2] piix: define a TOM register to report the base of PCI

login
register
mail settings
Submitter Hao, Xudong
Date Feb. 25, 2013, 6:53 a.m.
Message ID <1361775217-3454-1-git-send-email-xudong.hao@intel.com>
Download mbox | patch
Permalink /patch/222853/
State New
Headers show

Comments

Hao, Xudong - Feb. 25, 2013, 6:53 a.m.
v2:
* Use "piix: " in the subject rather than "qemu: "
* Define TOM register as one byte
* Define default TOM value instead of hardcode 0xe0000000 in more that one place
* Use API pci_set_byte for pci config access
* Use dev->config instead of the indirect d->dev.config

Define a TOM(top of memory) register to report the base of PCI memory, update
memory region dynamically. TOM register are defined to one byte in PCI configure
space, because that only upper 4 bit of PCI memory takes effect for Xen, so
it requires bios set TOM with 16M-aligned.

Signed-off-by: Xudong Hao <xudong.hao@intel.com>
Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
---
 hw/pc.h       |  7 +++---
 hw/pc_piix.c  | 12 +++-------
 hw/piix_pci.c | 73 +++++++++++++++++++++++++++++++++++++++++++----------------
 3 files changed, 59 insertions(+), 33 deletions(-)
Ian Campbell - Feb. 25, 2013, 3:38 p.m.
On Mon, 2013-02-25 at 06:53 +0000, Xudong Hao wrote:
> v2:
> * Use "piix: " in the subject rather than "qemu: "
> * Define TOM register as one byte
> * Define default TOM value instead of hardcode 0xe0000000 in more that one place
> * Use API pci_set_byte for pci config access
> * Use dev->config instead of the indirect d->dev.config
> 
> Define a TOM(top of memory) register to report the base of PCI memory, update
> memory region dynamically. TOM register are defined to one byte in PCI configure
> space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> it requires bios set TOM with 16M-aligned.

Is it really OK to just add registers to the PIIX like this? This is
supposed to be emulating an actual real life device after all.

Perhaps an HVM param or some other out of band mechanism would be better
here?

Ian.
Stefano Stabellini - Feb. 25, 2013, 4:05 p.m.
On Mon, 25 Feb 2013, Xudong Hao wrote:
> v2:
> * Use "piix: " in the subject rather than "qemu: "
> * Define TOM register as one byte
> * Define default TOM value instead of hardcode 0xe0000000 in more that one place
> * Use API pci_set_byte for pci config access
> * Use dev->config instead of the indirect d->dev.config
> 
> Define a TOM(top of memory) register to report the base of PCI memory, update
> memory region dynamically. TOM register are defined to one byte in PCI configure
> space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> it requires bios set TOM with 16M-aligned.
> 
> Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>

The patch is OK from my POV, but I think that Ian raised a valid
concern: we are supposed to emulate a real piece of hardware, maybe
another mechanism (xenbus? hvmop?) to pass the information from
hvmloader to QEMU would be a better fit than this.
Otherwise at least we would need to advertize this feature somehow: if
hvmloader can use it, the guest kernel can use it too...



>  hw/pc.h       |  7 +++---
>  hw/pc_piix.c  | 12 +++-------
>  hw/piix_pci.c | 73 +++++++++++++++++++++++++++++++++++++++++++----------------
>  3 files changed, 59 insertions(+), 33 deletions(-)
> 
> diff --git a/hw/pc.h b/hw/pc.h
> index fbcf43d..62adcc5 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -129,15 +129,14 @@ extern int no_hpet;
>  struct PCII440FXState;
>  typedef struct PCII440FXState PCII440FXState;
>  
> +#define I440FX_TOM     0xe0000000
> +#define I440FX_XEN_TOM 0xf0000000
> +
>  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
>                      ISABus **isa_bus, qemu_irq *pic,
>                      MemoryRegion *address_space_mem,
>                      MemoryRegion *address_space_io,
>                      ram_addr_t ram_size,
> -                    hwaddr pci_hole_start,
> -                    hwaddr pci_hole_size,
> -                    hwaddr pci_hole64_start,
> -                    hwaddr pci_hole64_size,
>                      MemoryRegion *pci_memory,
>                      MemoryRegion *ram_memory);
>  
> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> index 0a6923d..2eef510 100644
> --- a/hw/pc_piix.c
> +++ b/hw/pc_piix.c
> @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
>          kvmclock_create();
>      }
>  
> -    if (ram_size >= 0xe0000000 ) {
> -        above_4g_mem_size = ram_size - 0xe0000000;
> -        below_4g_mem_size = 0xe0000000;
> +    if (ram_size >= I440FX_TOM) {
> +        above_4g_mem_size = ram_size - I440FX_TOM;
> +        below_4g_mem_size = I440FX_TOM;
>      } else {
>          above_4g_mem_size = 0;
>          below_4g_mem_size = ram_size;
> @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion *system_memory,
>      if (pci_enabled) {
>          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
>                                system_memory, system_io, ram_size,
> -                              below_4g_mem_size,
> -                              0x100000000ULL - below_4g_mem_size,
> -                              0x100000000ULL + above_4g_mem_size,
> -                              (sizeof(hwaddr) == 4
> -                               ? 0
> -                               : ((uint64_t)1 << 62)),
>                                pci_memory, ram_memory);
>      } else {
>          pci_bus = NULL;
> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> index 3d79c73..3e5a25c 100644
> --- a/hw/piix_pci.c
> +++ b/hw/piix_pci.c
> @@ -86,6 +86,14 @@ struct PCII440FXState {
>  #define I440FX_PAM_SIZE 7
>  #define I440FX_SMRAM    0x72
>  
> +/* The maximum vaule of TOM(top of memory) register in I440FX
> + * is 1G, so it doesn't meet any popular virutal machines, so
> + * define another register to report the base of PCI memory.
> + * Use one byte 0xb0 for the upper 8 bit, they are originally
> + * resevered for host bridge.
> + * */
> +#define I440FX_PCI_HOLE_BASE 0xb0
> +
>  static void piix3_set_irq(void *opaque, int pirq, int level);
>  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pci_intx);
>  static void piix3_write_config_xen(PCIDevice *dev,
> @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
>      return (pci_intx + slot_addend) & 3;
>  }
>  
> +
> +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
> +{
> +    ram_addr_t above_4g_mem_size;
> +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start, pci_hole64_size;
> +
> +    pci_hole_start = pci_default_read_config(&f->dev, I440FX_PCI_HOLE_BASE, 1) << 24;
> +    pci_hole_size = 0x100000000ULL - pci_hole_start;
> +
> +    if (ram_size >= pci_hole_start) {
> +        above_4g_mem_size = ram_size - pci_hole_start;
> +    } else {
> +        above_4g_mem_size = 0;
> +    }
> +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
> +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
> +
> +    if (del) {
> +        memory_region_del_subregion(f->system_memory, &f->pci_hole);
> +        if (pci_hole64_size) {
> +            memory_region_del_subregion(f->system_memory, &f->pci_hole_64bit);
> +        }
> +    }
> +
> +    memory_region_init_alias(&f->pci_hole, "pci-hole", f->pci_address_space,
> +                             pci_hole_start, pci_hole_size);
> +    memory_region_add_subregion(f->system_memory, pci_hole_start, &f->pci_hole);
> +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> +                             f->pci_address_space,
> +                             pci_hole64_start, pci_hole64_size);
> +    if (pci_hole64_size) {
> +        memory_region_add_subregion(f->system_memory, pci_hole64_start,
> +                                    &f->pci_hole_64bit);
> +    }
> +}
> +
> +
>  static void i440fx_update_memory_mappings(PCII440FXState *d)
>  {
>      int i;
> @@ -136,6 +181,9 @@ static void i440fx_write_config(PCIDevice *dev,
>          range_covers_byte(address, len, I440FX_SMRAM)) {
>          i440fx_update_memory_mappings(d);
>      }
> +    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {
> +        i440fx_update_pci_mem_hole(d, true);
> +    }
>  }
>  
>  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> @@ -203,6 +251,10 @@ static int i440fx_initfn(PCIDevice *dev)
>  
>      d->dev.config[I440FX_SMRAM] = 0x02;
>  
> +    /* Emulate top of memory, here use 0xe0000000 as default val*/
> +    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;
> +    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE, (uint8_t)(addr >> 24));
> +
>      cpu_smm_register(&i440fx_set_smm, d);
>      return 0;
>  }
> @@ -214,10 +266,6 @@ static PCIBus *i440fx_common_init(const char *device_name,
>                                    MemoryRegion *address_space_mem,
>                                    MemoryRegion *address_space_io,
>                                    ram_addr_t ram_size,
> -                                  hwaddr pci_hole_start,
> -                                  hwaddr pci_hole_size,
> -                                  hwaddr pci_hole64_start,
> -                                  hwaddr pci_hole64_size,
>                                    MemoryRegion *pci_address_space,
>                                    MemoryRegion *ram_memory)
>  {
> @@ -244,16 +292,6 @@ static PCIBus *i440fx_common_init(const char *device_name,
>      f->system_memory = address_space_mem;
>      f->pci_address_space = pci_address_space;
>      f->ram_memory = ram_memory;
> -    memory_region_init_alias(&f->pci_hole, "pci-hole", f->pci_address_space,
> -                             pci_hole_start, pci_hole_size);
> -    memory_region_add_subregion(f->system_memory, pci_hole_start, &f->pci_hole);
> -    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> -                             f->pci_address_space,
> -                             pci_hole64_start, pci_hole64_size);
> -    if (pci_hole64_size) {
> -        memory_region_add_subregion(f->system_memory, pci_hole64_start,
> -                                    &f->pci_hole_64bit);
> -    }
>      memory_region_init_alias(&f->smram_region, "smram-region",
>                               f->pci_address_space, 0xa0000, 0x20000);
>      memory_region_add_subregion_overlap(f->system_memory, 0xa0000,
> @@ -295,6 +333,7 @@ static PCIBus *i440fx_common_init(const char *device_name,
>      (*pi440fx_state)->dev.config[0x57]=ram_size;
>  
>      i440fx_update_memory_mappings(f);
> +    i440fx_update_pci_mem_hole(f, false);
>  
>      return b;
>  }
> @@ -304,10 +343,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
>                      MemoryRegion *address_space_mem,
>                      MemoryRegion *address_space_io,
>                      ram_addr_t ram_size,
> -                    hwaddr pci_hole_start,
> -                    hwaddr pci_hole_size,
> -                    hwaddr pci_hole64_start,
> -                    hwaddr pci_hole64_size,
>                      MemoryRegion *pci_memory, MemoryRegion *ram_memory)
>  
>  {
> @@ -315,8 +350,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
>  
>      b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, isa_bus, pic,
>                             address_space_mem, address_space_io, ram_size,
> -                           pci_hole_start, pci_hole_size,
> -                           pci_hole64_start, pci_hole64_size,
>                             pci_memory, ram_memory);
>      return b;
>  }
> -- 
> 1.7.12.1
>
Hao, Xudong - Feb. 26, 2013, 3:32 a.m.
> -----Original Message-----
> From: qemu-devel-bounces+xudong.hao=intel.com@nongnu.org
> [mailto:qemu-devel-bounces+xudong.hao=intel.com@nongnu.org] On Behalf
> Of Stefano Stabellini
> Sent: Tuesday, February 26, 2013 12:06 AM
> To: Hao, Xudong
> Cc: aliguori@us.ibm.com; Stefano Stabellini; mst@redhat.com;
> qemu-devel@nongnu.org; xen-devel@lists.xen.org; JBeulich@suse.com;
> afaerber@suse.de
> Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report the
> base of PCI
> 
> On Mon, 25 Feb 2013, Xudong Hao wrote:
> > v2:
> > * Use "piix: " in the subject rather than "qemu: "
> > * Define TOM register as one byte
> > * Define default TOM value instead of hardcode 0xe0000000 in more that one
> place
> > * Use API pci_set_byte for pci config access
> > * Use dev->config instead of the indirect d->dev.config
> >
> > Define a TOM(top of memory) register to report the base of PCI memory,
> update
> > memory region dynamically. TOM register are defined to one byte in PCI
> configure
> > space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> > it requires bios set TOM with 16M-aligned.
> >
> > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> 
> The patch is OK from my POV, but I think that Ian raised a valid
> concern: we are supposed to emulate a real piece of hardware, maybe
> another mechanism (xenbus? hvmop?) to pass the information from
> hvmloader to QEMU would be a better fit than this.
> Otherwise at least we would need to advertize this feature somehow: if
> hvmloader can use it, the guest kernel can use it too...
> 
Hi, Ian and Stefano

Here adding this faking register in bios is a hack way. 
However, what we emulated is not full real piece of hardware at all times, the max of TOM for i440fx is 1G, and we already adjust the TOM in qemu.
The faking register is only effective by bios and device model. This register is reserved by host bridge, so guest can't access this register, guest kernel should handle well when accessing a reserved region. 

-Thanks
Xudong
> 
> 
> >  hw/pc.h       |  7 +++---
> >  hw/pc_piix.c  | 12 +++-------
> >  hw/piix_pci.c | 73
> +++++++++++++++++++++++++++++++++++++++++++----------------
> >  3 files changed, 59 insertions(+), 33 deletions(-)
> >
> > diff --git a/hw/pc.h b/hw/pc.h
> > index fbcf43d..62adcc5 100644
> > --- a/hw/pc.h
> > +++ b/hw/pc.h
> > @@ -129,15 +129,14 @@ extern int no_hpet;
> >  struct PCII440FXState;
> >  typedef struct PCII440FXState PCII440FXState;
> >
> > +#define I440FX_TOM     0xe0000000
> > +#define I440FX_XEN_TOM 0xf0000000
> > +
> >  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
> >                      ISABus **isa_bus, qemu_irq *pic,
> >                      MemoryRegion *address_space_mem,
> >                      MemoryRegion *address_space_io,
> >                      ram_addr_t ram_size,
> > -                    hwaddr pci_hole_start,
> > -                    hwaddr pci_hole_size,
> > -                    hwaddr pci_hole64_start,
> > -                    hwaddr pci_hole64_size,
> >                      MemoryRegion *pci_memory,
> >                      MemoryRegion *ram_memory);
> >
> > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> > index 0a6923d..2eef510 100644
> > --- a/hw/pc_piix.c
> > +++ b/hw/pc_piix.c
> > @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
> >          kvmclock_create();
> >      }
> >
> > -    if (ram_size >= 0xe0000000 ) {
> > -        above_4g_mem_size = ram_size - 0xe0000000;
> > -        below_4g_mem_size = 0xe0000000;
> > +    if (ram_size >= I440FX_TOM) {
> > +        above_4g_mem_size = ram_size - I440FX_TOM;
> > +        below_4g_mem_size = I440FX_TOM;
> >      } else {
> >          above_4g_mem_size = 0;
> >          below_4g_mem_size = ram_size;
> > @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion
> *system_memory,
> >      if (pci_enabled) {
> >          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
> >                                system_memory, system_io,
> ram_size,
> > -                              below_4g_mem_size,
> > -                              0x100000000ULL -
> below_4g_mem_size,
> > -                              0x100000000ULL +
> above_4g_mem_size,
> > -                              (sizeof(hwaddr) == 4
> > -                               ? 0
> > -                               : ((uint64_t)1 << 62)),
> >                                pci_memory, ram_memory);
> >      } else {
> >          pci_bus = NULL;
> > diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> > index 3d79c73..3e5a25c 100644
> > --- a/hw/piix_pci.c
> > +++ b/hw/piix_pci.c
> > @@ -86,6 +86,14 @@ struct PCII440FXState {
> >  #define I440FX_PAM_SIZE 7
> >  #define I440FX_SMRAM    0x72
> >
> > +/* The maximum vaule of TOM(top of memory) register in I440FX
> > + * is 1G, so it doesn't meet any popular virutal machines, so
> > + * define another register to report the base of PCI memory.
> > + * Use one byte 0xb0 for the upper 8 bit, they are originally
> > + * resevered for host bridge.
> > + * */
> > +#define I440FX_PCI_HOLE_BASE 0xb0
> > +
> >  static void piix3_set_irq(void *opaque, int pirq, int level);
> >  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int
> pci_intx);
> >  static void piix3_write_config_xen(PCIDevice *dev,
> > @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int
> pci_intx)
> >      return (pci_intx + slot_addend) & 3;
> >  }
> >
> > +
> > +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
> > +{
> > +    ram_addr_t above_4g_mem_size;
> > +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start,
> pci_hole64_size;
> > +
> > +    pci_hole_start = pci_default_read_config(&f->dev,
> I440FX_PCI_HOLE_BASE, 1) << 24;
> > +    pci_hole_size = 0x100000000ULL - pci_hole_start;
> > +
> > +    if (ram_size >= pci_hole_start) {
> > +        above_4g_mem_size = ram_size - pci_hole_start;
> > +    } else {
> > +        above_4g_mem_size = 0;
> > +    }
> > +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
> > +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
> > +
> > +    if (del) {
> > +        memory_region_del_subregion(f->system_memory,
> &f->pci_hole);
> > +        if (pci_hole64_size) {
> > +            memory_region_del_subregion(f->system_memory,
> &f->pci_hole_64bit);
> > +        }
> > +    }
> > +
> > +    memory_region_init_alias(&f->pci_hole, "pci-hole",
> f->pci_address_space,
> > +                             pci_hole_start, pci_hole_size);
> > +    memory_region_add_subregion(f->system_memory, pci_hole_start,
> &f->pci_hole);
> > +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > +                             f->pci_address_space,
> > +                             pci_hole64_start, pci_hole64_size);
> > +    if (pci_hole64_size) {
> > +        memory_region_add_subregion(f->system_memory,
> pci_hole64_start,
> > +                                    &f->pci_hole_64bit);
> > +    }
> > +}
> > +
> > +
> >  static void i440fx_update_memory_mappings(PCII440FXState *d)
> >  {
> >      int i;
> > @@ -136,6 +181,9 @@ static void i440fx_write_config(PCIDevice *dev,
> >          range_covers_byte(address, len, I440FX_SMRAM)) {
> >          i440fx_update_memory_mappings(d);
> >      }
> > +    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {
> > +        i440fx_update_pci_mem_hole(d, true);
> > +    }
> >  }
> >
> >  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> > @@ -203,6 +251,10 @@ static int i440fx_initfn(PCIDevice *dev)
> >
> >      d->dev.config[I440FX_SMRAM] = 0x02;
> >
> > +    /* Emulate top of memory, here use 0xe0000000 as default val*/
> > +    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;
> > +    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE, (uint8_t)(addr >>
> 24));
> > +
> >      cpu_smm_register(&i440fx_set_smm, d);
> >      return 0;
> >  }
> > @@ -214,10 +266,6 @@ static PCIBus *i440fx_common_init(const char
> *device_name,
> >                                    MemoryRegion
> *address_space_mem,
> >                                    MemoryRegion
> *address_space_io,
> >                                    ram_addr_t ram_size,
> > -                                  hwaddr pci_hole_start,
> > -                                  hwaddr pci_hole_size,
> > -                                  hwaddr pci_hole64_start,
> > -                                  hwaddr pci_hole64_size,
> >                                    MemoryRegion
> *pci_address_space,
> >                                    MemoryRegion *ram_memory)
> >  {
> > @@ -244,16 +292,6 @@ static PCIBus *i440fx_common_init(const char
> *device_name,
> >      f->system_memory = address_space_mem;
> >      f->pci_address_space = pci_address_space;
> >      f->ram_memory = ram_memory;
> > -    memory_region_init_alias(&f->pci_hole, "pci-hole",
> f->pci_address_space,
> > -                             pci_hole_start, pci_hole_size);
> > -    memory_region_add_subregion(f->system_memory, pci_hole_start,
> &f->pci_hole);
> > -    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > -                             f->pci_address_space,
> > -                             pci_hole64_start, pci_hole64_size);
> > -    if (pci_hole64_size) {
> > -        memory_region_add_subregion(f->system_memory,
> pci_hole64_start,
> > -                                    &f->pci_hole_64bit);
> > -    }
> >      memory_region_init_alias(&f->smram_region, "smram-region",
> >                               f->pci_address_space, 0xa0000,
> 0x20000);
> >      memory_region_add_subregion_overlap(f->system_memory,
> 0xa0000,
> > @@ -295,6 +333,7 @@ static PCIBus *i440fx_common_init(const char
> *device_name,
> >      (*pi440fx_state)->dev.config[0x57]=ram_size;
> >
> >      i440fx_update_memory_mappings(f);
> > +    i440fx_update_pci_mem_hole(f, false);
> >
> >      return b;
> >  }
> > @@ -304,10 +343,6 @@ PCIBus *i440fx_init(PCII440FXState
> **pi440fx_state, int *piix3_devfn,
> >                      MemoryRegion *address_space_mem,
> >                      MemoryRegion *address_space_io,
> >                      ram_addr_t ram_size,
> > -                    hwaddr pci_hole_start,
> > -                    hwaddr pci_hole_size,
> > -                    hwaddr pci_hole64_start,
> > -                    hwaddr pci_hole64_size,
> >                      MemoryRegion *pci_memory, MemoryRegion
> *ram_memory)
> >
> >  {
> > @@ -315,8 +350,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> int *piix3_devfn,
> >
> >      b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, isa_bus,
> pic,
> >                             address_space_mem, address_space_io,
> ram_size,
> > -                           pci_hole_start, pci_hole_size,
> > -                           pci_hole64_start, pci_hole64_size,
> >                             pci_memory, ram_memory);
> >      return b;
> >  }
> > --
> > 1.7.12.1
> >
Zhang, Xiantao - Feb. 26, 2013, 5:10 a.m.
For real i440fx, its TOM is fixed to 1G, I think Xen or other VMMs playing with Qemu should break this hardware rule.  Maybe we can implement this register as a write-only one, so that OS can't see its existence.   If OS reads this register, Qemu always return 0xff, and for any write operations to this register, it may impact hardware's behavior.  This conforms to the behavior of OS accessing a reserved register.  
Xiantao

> -----Original Message-----
> From: qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org
> [mailto:qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org] On Behalf
> Of Hao, Xudong
> Sent: Tuesday, February 26, 2013 11:33 AM
> To: Stefano Stabellini; Ian Campbell
> Cc: aliguori@us.ibm.com; mst@redhat.com; qemu-devel@nongnu.org; xen-
> devel@lists.xen.org; JBeulich@suse.com; afaerber@suse.de
> Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report the
> base of PCI
> 
> > -----Original Message-----
> > From: qemu-devel-bounces+xudong.hao=intel.com@nongnu.org
> > [mailto:qemu-devel-bounces+xudong.hao=intel.com@nongnu.org] On Behalf
> > Of Stefano Stabellini
> > Sent: Tuesday, February 26, 2013 12:06 AM
> > To: Hao, Xudong
> > Cc: aliguori@us.ibm.com; Stefano Stabellini; mst@redhat.com;
> > qemu-devel@nongnu.org; xen-devel@lists.xen.org; JBeulich@suse.com;
> > afaerber@suse.de
> > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report
> the
> > base of PCI
> >
> > On Mon, 25 Feb 2013, Xudong Hao wrote:
> > > v2:
> > > * Use "piix: " in the subject rather than "qemu: "
> > > * Define TOM register as one byte
> > > * Define default TOM value instead of hardcode 0xe0000000 in more that
> one
> > place
> > > * Use API pci_set_byte for pci config access
> > > * Use dev->config instead of the indirect d->dev.config
> > >
> > > Define a TOM(top of memory) register to report the base of PCI memory,
> > update
> > > memory region dynamically. TOM register are defined to one byte in PCI
> > configure
> > > space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> > > it requires bios set TOM with 16M-aligned.
> > >
> > > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> > > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> >
> > The patch is OK from my POV, but I think that Ian raised a valid
> > concern: we are supposed to emulate a real piece of hardware, maybe
> > another mechanism (xenbus? hvmop?) to pass the information from
> > hvmloader to QEMU would be a better fit than this.
> > Otherwise at least we would need to advertize this feature somehow: if
> > hvmloader can use it, the guest kernel can use it too...
> >
> Hi, Ian and Stefano
> 
> Here adding this faking register in bios is a hack way.
> However, what we emulated is not full real piece of hardware at all times, the
> max of TOM for i440fx is 1G, and we already adjust the TOM in qemu.
> The faking register is only effective by bios and device model. This register is
> reserved by host bridge, so guest can't access this register, guest kernel should
> handle well when accessing a reserved region.
> 
> -Thanks
> Xudong
> >
> >
> > >  hw/pc.h       |  7 +++---
> > >  hw/pc_piix.c  | 12 +++-------
> > >  hw/piix_pci.c | 73
> > +++++++++++++++++++++++++++++++++++++++++++----------------
> > >  3 files changed, 59 insertions(+), 33 deletions(-)
> > >
> > > diff --git a/hw/pc.h b/hw/pc.h
> > > index fbcf43d..62adcc5 100644
> > > --- a/hw/pc.h
> > > +++ b/hw/pc.h
> > > @@ -129,15 +129,14 @@ extern int no_hpet;
> > >  struct PCII440FXState;
> > >  typedef struct PCII440FXState PCII440FXState;
> > >
> > > +#define I440FX_TOM     0xe0000000
> > > +#define I440FX_XEN_TOM 0xf0000000
> > > +
> > >  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
> > >                      ISABus **isa_bus, qemu_irq *pic,
> > >                      MemoryRegion *address_space_mem,
> > >                      MemoryRegion *address_space_io,
> > >                      ram_addr_t ram_size,
> > > -                    hwaddr pci_hole_start,
> > > -                    hwaddr pci_hole_size,
> > > -                    hwaddr pci_hole64_start,
> > > -                    hwaddr pci_hole64_size,
> > >                      MemoryRegion *pci_memory,
> > >                      MemoryRegion *ram_memory);
> > >
> > > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> > > index 0a6923d..2eef510 100644
> > > --- a/hw/pc_piix.c
> > > +++ b/hw/pc_piix.c
> > > @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
> > >          kvmclock_create();
> > >      }
> > >
> > > -    if (ram_size >= 0xe0000000 ) {
> > > -        above_4g_mem_size = ram_size - 0xe0000000;
> > > -        below_4g_mem_size = 0xe0000000;
> > > +    if (ram_size >= I440FX_TOM) {
> > > +        above_4g_mem_size = ram_size - I440FX_TOM;
> > > +        below_4g_mem_size = I440FX_TOM;
> > >      } else {
> > >          above_4g_mem_size = 0;
> > >          below_4g_mem_size = ram_size;
> > > @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion
> > *system_memory,
> > >      if (pci_enabled) {
> > >          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
> > >                                system_memory, system_io,
> > ram_size,
> > > -                              below_4g_mem_size,
> > > -                              0x100000000ULL -
> > below_4g_mem_size,
> > > -                              0x100000000ULL +
> > above_4g_mem_size,
> > > -                              (sizeof(hwaddr) == 4
> > > -                               ? 0
> > > -                               : ((uint64_t)1 << 62)),
> > >                                pci_memory, ram_memory);
> > >      } else {
> > >          pci_bus = NULL;
> > > diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> > > index 3d79c73..3e5a25c 100644
> > > --- a/hw/piix_pci.c
> > > +++ b/hw/piix_pci.c
> > > @@ -86,6 +86,14 @@ struct PCII440FXState {
> > >  #define I440FX_PAM_SIZE 7
> > >  #define I440FX_SMRAM    0x72
> > >
> > > +/* The maximum vaule of TOM(top of memory) register in I440FX
> > > + * is 1G, so it doesn't meet any popular virutal machines, so
> > > + * define another register to report the base of PCI memory.
> > > + * Use one byte 0xb0 for the upper 8 bit, they are originally
> > > + * resevered for host bridge.
> > > + * */
> > > +#define I440FX_PCI_HOLE_BASE 0xb0
> > > +
> > >  static void piix3_set_irq(void *opaque, int pirq, int level);
> > >  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int
> > pci_intx);
> > >  static void piix3_write_config_xen(PCIDevice *dev,
> > > @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int
> > pci_intx)
> > >      return (pci_intx + slot_addend) & 3;
> > >  }
> > >
> > > +
> > > +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
> > > +{
> > > +    ram_addr_t above_4g_mem_size;
> > > +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start,
> > pci_hole64_size;
> > > +
> > > +    pci_hole_start = pci_default_read_config(&f->dev,
> > I440FX_PCI_HOLE_BASE, 1) << 24;
> > > +    pci_hole_size = 0x100000000ULL - pci_hole_start;
> > > +
> > > +    if (ram_size >= pci_hole_start) {
> > > +        above_4g_mem_size = ram_size - pci_hole_start;
> > > +    } else {
> > > +        above_4g_mem_size = 0;
> > > +    }
> > > +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
> > > +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
> > > +
> > > +    if (del) {
> > > +        memory_region_del_subregion(f->system_memory,
> > &f->pci_hole);
> > > +        if (pci_hole64_size) {
> > > +            memory_region_del_subregion(f->system_memory,
> > &f->pci_hole_64bit);
> > > +        }
> > > +    }
> > > +
> > > +    memory_region_init_alias(&f->pci_hole, "pci-hole",
> > f->pci_address_space,
> > > +                             pci_hole_start, pci_hole_size);
> > > +    memory_region_add_subregion(f->system_memory, pci_hole_start,
> > &f->pci_hole);
> > > +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > > +                             f->pci_address_space,
> > > +                             pci_hole64_start, pci_hole64_size);
> > > +    if (pci_hole64_size) {
> > > +        memory_region_add_subregion(f->system_memory,
> > pci_hole64_start,
> > > +                                    &f->pci_hole_64bit);
> > > +    }
> > > +}
> > > +
> > > +
> > >  static void i440fx_update_memory_mappings(PCII440FXState *d)
> > >  {
> > >      int i;
> > > @@ -136,6 +181,9 @@ static void i440fx_write_config(PCIDevice *dev,
> > >          range_covers_byte(address, len, I440FX_SMRAM)) {
> > >          i440fx_update_memory_mappings(d);
> > >      }
> > > +    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {
> > > +        i440fx_update_pci_mem_hole(d, true);
> > > +    }
> > >  }
> > >
> > >  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> > > @@ -203,6 +251,10 @@ static int i440fx_initfn(PCIDevice *dev)
> > >
> > >      d->dev.config[I440FX_SMRAM] = 0x02;
> > >
> > > +    /* Emulate top of memory, here use 0xe0000000 as default val*/
> > > +    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;
> > > +    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE, (uint8_t)(addr >>
> > 24));
> > > +
> > >      cpu_smm_register(&i440fx_set_smm, d);
> > >      return 0;
> > >  }
> > > @@ -214,10 +266,6 @@ static PCIBus *i440fx_common_init(const char
> > *device_name,
> > >                                    MemoryRegion
> > *address_space_mem,
> > >                                    MemoryRegion
> > *address_space_io,
> > >                                    ram_addr_t ram_size,
> > > -                                  hwaddr pci_hole_start,
> > > -                                  hwaddr pci_hole_size,
> > > -                                  hwaddr pci_hole64_start,
> > > -                                  hwaddr pci_hole64_size,
> > >                                    MemoryRegion
> > *pci_address_space,
> > >                                    MemoryRegion *ram_memory)
> > >  {
> > > @@ -244,16 +292,6 @@ static PCIBus *i440fx_common_init(const char
> > *device_name,
> > >      f->system_memory = address_space_mem;
> > >      f->pci_address_space = pci_address_space;
> > >      f->ram_memory = ram_memory;
> > > -    memory_region_init_alias(&f->pci_hole, "pci-hole",
> > f->pci_address_space,
> > > -                             pci_hole_start, pci_hole_size);
> > > -    memory_region_add_subregion(f->system_memory, pci_hole_start,
> > &f->pci_hole);
> > > -    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > > -                             f->pci_address_space,
> > > -                             pci_hole64_start, pci_hole64_size);
> > > -    if (pci_hole64_size) {
> > > -        memory_region_add_subregion(f->system_memory,
> > pci_hole64_start,
> > > -                                    &f->pci_hole_64bit);
> > > -    }
> > >      memory_region_init_alias(&f->smram_region, "smram-region",
> > >                               f->pci_address_space, 0xa0000,
> > 0x20000);
> > >      memory_region_add_subregion_overlap(f->system_memory,
> > 0xa0000,
> > > @@ -295,6 +333,7 @@ static PCIBus *i440fx_common_init(const char
> > *device_name,
> > >      (*pi440fx_state)->dev.config[0x57]=ram_size;
> > >
> > >      i440fx_update_memory_mappings(f);
> > > +    i440fx_update_pci_mem_hole(f, false);
> > >
> > >      return b;
> > >  }
> > > @@ -304,10 +343,6 @@ PCIBus *i440fx_init(PCII440FXState
> > **pi440fx_state, int *piix3_devfn,
> > >                      MemoryRegion *address_space_mem,
> > >                      MemoryRegion *address_space_io,
> > >                      ram_addr_t ram_size,
> > > -                    hwaddr pci_hole_start,
> > > -                    hwaddr pci_hole_size,
> > > -                    hwaddr pci_hole64_start,
> > > -                    hwaddr pci_hole64_size,
> > >                      MemoryRegion *pci_memory, MemoryRegion
> > *ram_memory)
> > >
> > >  {
> > > @@ -315,8 +350,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> > int *piix3_devfn,
> > >
> > >      b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, isa_bus,
> > pic,
> > >                             address_space_mem, address_space_io,
> > ram_size,
> > > -                           pci_hole_start, pci_hole_size,
> > > -                           pci_hole64_start, pci_hole64_size,
> > >                             pci_memory, ram_memory);
> > >      return b;
> > >  }
> > > --
> > > 1.7.12.1
> > >
>
Stefano Stabellini - Feb. 26, 2013, 3:43 p.m.
Right, I think that reading as 0xff and write once would be important
improvements for this patch.

I would like to see a document somewhere (maybe just a text file under
xen-unstable/docs/misc) with a list of deviations of the i440fx we
emulate and the real one.

Other than that, it would be OK for me.

On Tue, 26 Feb 2013, Zhang, Xiantao wrote:
> For real i440fx, its TOM is fixed to 1G, I think Xen or other VMMs playing with Qemu should break this hardware rule.  Maybe we can implement this register as a write-only one, so that OS can't see its existence.   If OS reads this register, Qemu always return 0xff, and for any write operations to this register, it may impact hardware's behavior.  This conforms to the behavior of OS accessing a reserved register.
> Xiantao
> 
> > -----Original Message-----
> > From: qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org
> > [mailto:qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org] On Behalf
> > Of Hao, Xudong
> > Sent: Tuesday, February 26, 2013 11:33 AM
> > To: Stefano Stabellini; Ian Campbell
> > Cc: aliguori@us.ibm.com; mst@redhat.com; qemu-devel@nongnu.org; xen-
> > devel@lists.xen.org; JBeulich@suse.com; afaerber@suse.de
> > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report the
> > base of PCI
> >
> > > -----Original Message-----
> > > From: qemu-devel-bounces+xudong.hao=intel.com@nongnu.org
> > > [mailto:qemu-devel-bounces+xudong.hao=intel.com@nongnu.org] On Behalf
> > > Of Stefano Stabellini
> > > Sent: Tuesday, February 26, 2013 12:06 AM
> > > To: Hao, Xudong
> > > Cc: aliguori@us.ibm.com; Stefano Stabellini; mst@redhat.com;
> > > qemu-devel@nongnu.org; xen-devel@lists.xen.org; JBeulich@suse.com;
> > > afaerber@suse.de
> > > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report
> > the
> > > base of PCI
> > >
> > > On Mon, 25 Feb 2013, Xudong Hao wrote:
> > > > v2:
> > > > * Use "piix: " in the subject rather than "qemu: "
> > > > * Define TOM register as one byte
> > > > * Define default TOM value instead of hardcode 0xe0000000 in more that
> > one
> > > place
> > > > * Use API pci_set_byte for pci config access
> > > > * Use dev->config instead of the indirect d->dev.config
> > > >
> > > > Define a TOM(top of memory) register to report the base of PCI memory,
> > > update
> > > > memory region dynamically. TOM register are defined to one byte in PCI
> > > configure
> > > > space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> > > > it requires bios set TOM with 16M-aligned.
> > > >
> > > > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> > > > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> > >
> > > The patch is OK from my POV, but I think that Ian raised a valid
> > > concern: we are supposed to emulate a real piece of hardware, maybe
> > > another mechanism (xenbus? hvmop?) to pass the information from
> > > hvmloader to QEMU would be a better fit than this.
> > > Otherwise at least we would need to advertize this feature somehow: if
> > > hvmloader can use it, the guest kernel can use it too...
> > >
> > Hi, Ian and Stefano
> >
> > Here adding this faking register in bios is a hack way.
> > However, what we emulated is not full real piece of hardware at all times, the
> > max of TOM for i440fx is 1G, and we already adjust the TOM in qemu.
> > The faking register is only effective by bios and device model. This register is
> > reserved by host bridge, so guest can't access this register, guest kernel should
> > handle well when accessing a reserved region.
> >
> > -Thanks
> > Xudong
> > >
> > >
> > > >  hw/pc.h       |  7 +++---
> > > >  hw/pc_piix.c  | 12 +++-------
> > > >  hw/piix_pci.c | 73
> > > +++++++++++++++++++++++++++++++++++++++++++----------------
> > > >  3 files changed, 59 insertions(+), 33 deletions(-)
> > > >
> > > > diff --git a/hw/pc.h b/hw/pc.h
> > > > index fbcf43d..62adcc5 100644
> > > > --- a/hw/pc.h
> > > > +++ b/hw/pc.h
> > > > @@ -129,15 +129,14 @@ extern int no_hpet;
> > > >  struct PCII440FXState;
> > > >  typedef struct PCII440FXState PCII440FXState;
> > > >
> > > > +#define I440FX_TOM     0xe0000000
> > > > +#define I440FX_XEN_TOM 0xf0000000
> > > > +
> > > >  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
> > > >                      ISABus **isa_bus, qemu_irq *pic,
> > > >                      MemoryRegion *address_space_mem,
> > > >                      MemoryRegion *address_space_io,
> > > >                      ram_addr_t ram_size,
> > > > -                    hwaddr pci_hole_start,
> > > > -                    hwaddr pci_hole_size,
> > > > -                    hwaddr pci_hole64_start,
> > > > -                    hwaddr pci_hole64_size,
> > > >                      MemoryRegion *pci_memory,
> > > >                      MemoryRegion *ram_memory);
> > > >
> > > > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> > > > index 0a6923d..2eef510 100644
> > > > --- a/hw/pc_piix.c
> > > > +++ b/hw/pc_piix.c
> > > > @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
> > > >          kvmclock_create();
> > > >      }
> > > >
> > > > -    if (ram_size >= 0xe0000000 ) {
> > > > -        above_4g_mem_size = ram_size - 0xe0000000;
> > > > -        below_4g_mem_size = 0xe0000000;
> > > > +    if (ram_size >= I440FX_TOM) {
> > > > +        above_4g_mem_size = ram_size - I440FX_TOM;
> > > > +        below_4g_mem_size = I440FX_TOM;
> > > >      } else {
> > > >          above_4g_mem_size = 0;
> > > >          below_4g_mem_size = ram_size;
> > > > @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion
> > > *system_memory,
> > > >      if (pci_enabled) {
> > > >          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
> > > >                                system_memory, system_io,
> > > ram_size,
> > > > -                              below_4g_mem_size,
> > > > -                              0x100000000ULL -
> > > below_4g_mem_size,
> > > > -                              0x100000000ULL +
> > > above_4g_mem_size,
> > > > -                              (sizeof(hwaddr) == 4
> > > > -                               ? 0
> > > > -                               : ((uint64_t)1 << 62)),
> > > >                                pci_memory, ram_memory);
> > > >      } else {
> > > >          pci_bus = NULL;
> > > > diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> > > > index 3d79c73..3e5a25c 100644
> > > > --- a/hw/piix_pci.c
> > > > +++ b/hw/piix_pci.c
> > > > @@ -86,6 +86,14 @@ struct PCII440FXState {
> > > >  #define I440FX_PAM_SIZE 7
> > > >  #define I440FX_SMRAM    0x72
> > > >
> > > > +/* The maximum vaule of TOM(top of memory) register in I440FX
> > > > + * is 1G, so it doesn't meet any popular virutal machines, so
> > > > + * define another register to report the base of PCI memory.
> > > > + * Use one byte 0xb0 for the upper 8 bit, they are originally
> > > > + * resevered for host bridge.
> > > > + * */
> > > > +#define I440FX_PCI_HOLE_BASE 0xb0
> > > > +
> > > >  static void piix3_set_irq(void *opaque, int pirq, int level);
> > > >  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int
> > > pci_intx);
> > > >  static void piix3_write_config_xen(PCIDevice *dev,
> > > > @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int
> > > pci_intx)
> > > >      return (pci_intx + slot_addend) & 3;
> > > >  }
> > > >
> > > > +
> > > > +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
> > > > +{
> > > > +    ram_addr_t above_4g_mem_size;
> > > > +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start,
> > > pci_hole64_size;
> > > > +
> > > > +    pci_hole_start = pci_default_read_config(&f->dev,
> > > I440FX_PCI_HOLE_BASE, 1) << 24;
> > > > +    pci_hole_size = 0x100000000ULL - pci_hole_start;
> > > > +
> > > > +    if (ram_size >= pci_hole_start) {
> > > > +        above_4g_mem_size = ram_size - pci_hole_start;
> > > > +    } else {
> > > > +        above_4g_mem_size = 0;
> > > > +    }
> > > > +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
> > > > +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
> > > > +
> > > > +    if (del) {
> > > > +        memory_region_del_subregion(f->system_memory,
> > > &f->pci_hole);
> > > > +        if (pci_hole64_size) {
> > > > +            memory_region_del_subregion(f->system_memory,
> > > &f->pci_hole_64bit);
> > > > +        }
> > > > +    }
> > > > +
> > > > +    memory_region_init_alias(&f->pci_hole, "pci-hole",
> > > f->pci_address_space,
> > > > +                             pci_hole_start, pci_hole_size);
> > > > +    memory_region_add_subregion(f->system_memory, pci_hole_start,
> > > &f->pci_hole);
> > > > +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > > > +                             f->pci_address_space,
> > > > +                             pci_hole64_start, pci_hole64_size);
> > > > +    if (pci_hole64_size) {
> > > > +        memory_region_add_subregion(f->system_memory,
> > > pci_hole64_start,
> > > > +                                    &f->pci_hole_64bit);
> > > > +    }
> > > > +}
> > > > +
> > > > +
> > > >  static void i440fx_update_memory_mappings(PCII440FXState *d)
> > > >  {
> > > >      int i;
> > > > @@ -136,6 +181,9 @@ static void i440fx_write_config(PCIDevice *dev,
> > > >          range_covers_byte(address, len, I440FX_SMRAM)) {
> > > >          i440fx_update_memory_mappings(d);
> > > >      }
> > > > +    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {
> > > > +        i440fx_update_pci_mem_hole(d, true);
> > > > +    }
> > > >  }
> > > >
> > > >  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> > > > @@ -203,6 +251,10 @@ static int i440fx_initfn(PCIDevice *dev)
> > > >
> > > >      d->dev.config[I440FX_SMRAM] = 0x02;
> > > >
> > > > +    /* Emulate top of memory, here use 0xe0000000 as default val*/
> > > > +    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;
> > > > +    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE, (uint8_t)(addr >>
> > > 24));
> > > > +
> > > >      cpu_smm_register(&i440fx_set_smm, d);
> > > >      return 0;
> > > >  }
> > > > @@ -214,10 +266,6 @@ static PCIBus *i440fx_common_init(const char
> > > *device_name,
> > > >                                    MemoryRegion
> > > *address_space_mem,
> > > >                                    MemoryRegion
> > > *address_space_io,
> > > >                                    ram_addr_t ram_size,
> > > > -                                  hwaddr pci_hole_start,
> > > > -                                  hwaddr pci_hole_size,
> > > > -                                  hwaddr pci_hole64_start,
> > > > -                                  hwaddr pci_hole64_size,
> > > >                                    MemoryRegion
> > > *pci_address_space,
> > > >                                    MemoryRegion *ram_memory)
> > > >  {
> > > > @@ -244,16 +292,6 @@ static PCIBus *i440fx_common_init(const char
> > > *device_name,
> > > >      f->system_memory = address_space_mem;
> > > >      f->pci_address_space = pci_address_space;
> > > >      f->ram_memory = ram_memory;
> > > > -    memory_region_init_alias(&f->pci_hole, "pci-hole",
> > > f->pci_address_space,
> > > > -                             pci_hole_start, pci_hole_size);
> > > > -    memory_region_add_subregion(f->system_memory, pci_hole_start,
> > > &f->pci_hole);
> > > > -    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > > > -                             f->pci_address_space,
> > > > -                             pci_hole64_start, pci_hole64_size);
> > > > -    if (pci_hole64_size) {
> > > > -        memory_region_add_subregion(f->system_memory,
> > > pci_hole64_start,
> > > > -                                    &f->pci_hole_64bit);
> > > > -    }
> > > >      memory_region_init_alias(&f->smram_region, "smram-region",
> > > >                               f->pci_address_space, 0xa0000,
> > > 0x20000);
> > > >      memory_region_add_subregion_overlap(f->system_memory,
> > > 0xa0000,
> > > > @@ -295,6 +333,7 @@ static PCIBus *i440fx_common_init(const char
> > > *device_name,
> > > >      (*pi440fx_state)->dev.config[0x57]=ram_size;
> > > >
> > > >      i440fx_update_memory_mappings(f);
> > > > +    i440fx_update_pci_mem_hole(f, false);
> > > >
> > > >      return b;
> > > >  }
> > > > @@ -304,10 +343,6 @@ PCIBus *i440fx_init(PCII440FXState
> > > **pi440fx_state, int *piix3_devfn,
> > > >                      MemoryRegion *address_space_mem,
> > > >                      MemoryRegion *address_space_io,
> > > >                      ram_addr_t ram_size,
> > > > -                    hwaddr pci_hole_start,
> > > > -                    hwaddr pci_hole_size,
> > > > -                    hwaddr pci_hole64_start,
> > > > -                    hwaddr pci_hole64_size,
> > > >                      MemoryRegion *pci_memory, MemoryRegion
> > > *ram_memory)
> > > >
> > > >  {
> > > > @@ -315,8 +350,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> > > int *piix3_devfn,
> > > >
> > > >      b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, isa_bus,
> > > pic,
> > > >                             address_space_mem, address_space_io,
> > > ram_size,
> > > > -                           pci_hole_start, pci_hole_size,
> > > > -                           pci_hole64_start, pci_hole64_size,
> > > >                             pci_memory, ram_memory);
> > > >      return b;
> > > >  }
> > > > --
> > > > 1.7.12.1
> > > >
> >
>
Ian Campbell - Feb. 26, 2013, 3:50 p.m.
Given that Xen has at least two other mechanisms (xenstore and
hvmparams) for passing this sort of information around I'm not sure why
hacking the emulated i440fx device should be the preferred option.

On Tue, 2013-02-26 at 15:43 +0000, Stefano Stabellini wrote:
> Right, I think that reading as 0xff and write once would be important
> improvements for this patch.
> 
> I would like to see a document somewhere (maybe just a text file under
> xen-unstable/docs/misc) with a list of deviations of the i440fx we
> emulate and the real one.
> 
> Other than that, it would be OK for me.
> 
> On Tue, 26 Feb 2013, Zhang, Xiantao wrote:
> > For real i440fx, its TOM is fixed to 1G, I think Xen or other VMMs playing with Qemu should break this hardware rule.  Maybe we can implement this register as a write-only one, so that OS can't see its existence.   If OS reads this register, Qemu always return 0xff, and for any write operations to this register, it may impact hardware's behavior.  This conforms to the behavior of OS accessing a reserved register.
> > Xiantao
> >
> > > -----Original Message-----
> > > From: qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org
> > > [mailto:qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org] On Behalf
> > > Of Hao, Xudong
> > > Sent: Tuesday, February 26, 2013 11:33 AM
> > > To: Stefano Stabellini; Ian Campbell
> > > Cc: aliguori@us.ibm.com; mst@redhat.com; qemu-devel@nongnu.org; xen-
> > > devel@lists.xen.org; JBeulich@suse.com; afaerber@suse.de
> > > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report the
> > > base of PCI
> > >
> > > > -----Original Message-----
> > > > From: qemu-devel-bounces+xudong.hao=intel.com@nongnu.org
> > > > [mailto:qemu-devel-bounces+xudong.hao=intel.com@nongnu.org] On Behalf
> > > > Of Stefano Stabellini
> > > > Sent: Tuesday, February 26, 2013 12:06 AM
> > > > To: Hao, Xudong
> > > > Cc: aliguori@us.ibm.com; Stefano Stabellini; mst@redhat.com;
> > > > qemu-devel@nongnu.org; xen-devel@lists.xen.org; JBeulich@suse.com;
> > > > afaerber@suse.de
> > > > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report
> > > the
> > > > base of PCI
> > > >
> > > > On Mon, 25 Feb 2013, Xudong Hao wrote:
> > > > > v2:
> > > > > * Use "piix: " in the subject rather than "qemu: "
> > > > > * Define TOM register as one byte
> > > > > * Define default TOM value instead of hardcode 0xe0000000 in more that
> > > one
> > > > place
> > > > > * Use API pci_set_byte for pci config access
> > > > > * Use dev->config instead of the indirect d->dev.config
> > > > >
> > > > > Define a TOM(top of memory) register to report the base of PCI memory,
> > > > update
> > > > > memory region dynamically. TOM register are defined to one byte in PCI
> > > > configure
> > > > > space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> > > > > it requires bios set TOM with 16M-aligned.
> > > > >
> > > > > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> > > > > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> > > >
> > > > The patch is OK from my POV, but I think that Ian raised a valid
> > > > concern: we are supposed to emulate a real piece of hardware, maybe
> > > > another mechanism (xenbus? hvmop?) to pass the information from
> > > > hvmloader to QEMU would be a better fit than this.
> > > > Otherwise at least we would need to advertize this feature somehow: if
> > > > hvmloader can use it, the guest kernel can use it too...
> > > >
> > > Hi, Ian and Stefano
> > >
> > > Here adding this faking register in bios is a hack way.
> > > However, what we emulated is not full real piece of hardware at all times, the
> > > max of TOM for i440fx is 1G, and we already adjust the TOM in qemu.
> > > The faking register is only effective by bios and device model. This register is
> > > reserved by host bridge, so guest can't access this register, guest kernel should
> > > handle well when accessing a reserved region.
> > >
> > > -Thanks
> > > Xudong
> > > >
> > > >
> > > > >  hw/pc.h       |  7 +++---
> > > > >  hw/pc_piix.c  | 12 +++-------
> > > > >  hw/piix_pci.c | 73
> > > > +++++++++++++++++++++++++++++++++++++++++++----------------
> > > > >  3 files changed, 59 insertions(+), 33 deletions(-)
> > > > >
> > > > > diff --git a/hw/pc.h b/hw/pc.h
> > > > > index fbcf43d..62adcc5 100644
> > > > > --- a/hw/pc.h
> > > > > +++ b/hw/pc.h
> > > > > @@ -129,15 +129,14 @@ extern int no_hpet;
> > > > >  struct PCII440FXState;
> > > > >  typedef struct PCII440FXState PCII440FXState;
> > > > >
> > > > > +#define I440FX_TOM     0xe0000000
> > > > > +#define I440FX_XEN_TOM 0xf0000000
> > > > > +
> > > > >  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
> > > > >                      ISABus **isa_bus, qemu_irq *pic,
> > > > >                      MemoryRegion *address_space_mem,
> > > > >                      MemoryRegion *address_space_io,
> > > > >                      ram_addr_t ram_size,
> > > > > -                    hwaddr pci_hole_start,
> > > > > -                    hwaddr pci_hole_size,
> > > > > -                    hwaddr pci_hole64_start,
> > > > > -                    hwaddr pci_hole64_size,
> > > > >                      MemoryRegion *pci_memory,
> > > > >                      MemoryRegion *ram_memory);
> > > > >
> > > > > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> > > > > index 0a6923d..2eef510 100644
> > > > > --- a/hw/pc_piix.c
> > > > > +++ b/hw/pc_piix.c
> > > > > @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
> > > > >          kvmclock_create();
> > > > >      }
> > > > >
> > > > > -    if (ram_size >= 0xe0000000 ) {
> > > > > -        above_4g_mem_size = ram_size - 0xe0000000;
> > > > > -        below_4g_mem_size = 0xe0000000;
> > > > > +    if (ram_size >= I440FX_TOM) {
> > > > > +        above_4g_mem_size = ram_size - I440FX_TOM;
> > > > > +        below_4g_mem_size = I440FX_TOM;
> > > > >      } else {
> > > > >          above_4g_mem_size = 0;
> > > > >          below_4g_mem_size = ram_size;
> > > > > @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion
> > > > *system_memory,
> > > > >      if (pci_enabled) {
> > > > >          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
> > > > >                                system_memory, system_io,
> > > > ram_size,
> > > > > -                              below_4g_mem_size,
> > > > > -                              0x100000000ULL -
> > > > below_4g_mem_size,
> > > > > -                              0x100000000ULL +
> > > > above_4g_mem_size,
> > > > > -                              (sizeof(hwaddr) == 4
> > > > > -                               ? 0
> > > > > -                               : ((uint64_t)1 << 62)),
> > > > >                                pci_memory, ram_memory);
> > > > >      } else {
> > > > >          pci_bus = NULL;
> > > > > diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> > > > > index 3d79c73..3e5a25c 100644
> > > > > --- a/hw/piix_pci.c
> > > > > +++ b/hw/piix_pci.c
> > > > > @@ -86,6 +86,14 @@ struct PCII440FXState {
> > > > >  #define I440FX_PAM_SIZE 7
> > > > >  #define I440FX_SMRAM    0x72
> > > > >
> > > > > +/* The maximum vaule of TOM(top of memory) register in I440FX
> > > > > + * is 1G, so it doesn't meet any popular virutal machines, so
> > > > > + * define another register to report the base of PCI memory.
> > > > > + * Use one byte 0xb0 for the upper 8 bit, they are originally
> > > > > + * resevered for host bridge.
> > > > > + * */
> > > > > +#define I440FX_PCI_HOLE_BASE 0xb0
> > > > > +
> > > > >  static void piix3_set_irq(void *opaque, int pirq, int level);
> > > > >  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int
> > > > pci_intx);
> > > > >  static void piix3_write_config_xen(PCIDevice *dev,
> > > > > @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int
> > > > pci_intx)
> > > > >      return (pci_intx + slot_addend) & 3;
> > > > >  }
> > > > >
> > > > > +
> > > > > +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
> > > > > +{
> > > > > +    ram_addr_t above_4g_mem_size;
> > > > > +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start,
> > > > pci_hole64_size;
> > > > > +
> > > > > +    pci_hole_start = pci_default_read_config(&f->dev,
> > > > I440FX_PCI_HOLE_BASE, 1) << 24;
> > > > > +    pci_hole_size = 0x100000000ULL - pci_hole_start;
> > > > > +
> > > > > +    if (ram_size >= pci_hole_start) {
> > > > > +        above_4g_mem_size = ram_size - pci_hole_start;
> > > > > +    } else {
> > > > > +        above_4g_mem_size = 0;
> > > > > +    }
> > > > > +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
> > > > > +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
> > > > > +
> > > > > +    if (del) {
> > > > > +        memory_region_del_subregion(f->system_memory,
> > > > &f->pci_hole);
> > > > > +        if (pci_hole64_size) {
> > > > > +            memory_region_del_subregion(f->system_memory,
> > > > &f->pci_hole_64bit);
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    memory_region_init_alias(&f->pci_hole, "pci-hole",
> > > > f->pci_address_space,
> > > > > +                             pci_hole_start, pci_hole_size);
> > > > > +    memory_region_add_subregion(f->system_memory, pci_hole_start,
> > > > &f->pci_hole);
> > > > > +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > > > > +                             f->pci_address_space,
> > > > > +                             pci_hole64_start, pci_hole64_size);
> > > > > +    if (pci_hole64_size) {
> > > > > +        memory_region_add_subregion(f->system_memory,
> > > > pci_hole64_start,
> > > > > +                                    &f->pci_hole_64bit);
> > > > > +    }
> > > > > +}
> > > > > +
> > > > > +
> > > > >  static void i440fx_update_memory_mappings(PCII440FXState *d)
> > > > >  {
> > > > >      int i;
> > > > > @@ -136,6 +181,9 @@ static void i440fx_write_config(PCIDevice *dev,
> > > > >          range_covers_byte(address, len, I440FX_SMRAM)) {
> > > > >          i440fx_update_memory_mappings(d);
> > > > >      }
> > > > > +    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {
> > > > > +        i440fx_update_pci_mem_hole(d, true);
> > > > > +    }
> > > > >  }
> > > > >
> > > > >  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
> > > > > @@ -203,6 +251,10 @@ static int i440fx_initfn(PCIDevice *dev)
> > > > >
> > > > >      d->dev.config[I440FX_SMRAM] = 0x02;
> > > > >
> > > > > +    /* Emulate top of memory, here use 0xe0000000 as default val*/
> > > > > +    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;
> > > > > +    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE, (uint8_t)(addr >>
> > > > 24));
> > > > > +
> > > > >      cpu_smm_register(&i440fx_set_smm, d);
> > > > >      return 0;
> > > > >  }
> > > > > @@ -214,10 +266,6 @@ static PCIBus *i440fx_common_init(const char
> > > > *device_name,
> > > > >                                    MemoryRegion
> > > > *address_space_mem,
> > > > >                                    MemoryRegion
> > > > *address_space_io,
> > > > >                                    ram_addr_t ram_size,
> > > > > -                                  hwaddr pci_hole_start,
> > > > > -                                  hwaddr pci_hole_size,
> > > > > -                                  hwaddr pci_hole64_start,
> > > > > -                                  hwaddr pci_hole64_size,
> > > > >                                    MemoryRegion
> > > > *pci_address_space,
> > > > >                                    MemoryRegion *ram_memory)
> > > > >  {
> > > > > @@ -244,16 +292,6 @@ static PCIBus *i440fx_common_init(const char
> > > > *device_name,
> > > > >      f->system_memory = address_space_mem;
> > > > >      f->pci_address_space = pci_address_space;
> > > > >      f->ram_memory = ram_memory;
> > > > > -    memory_region_init_alias(&f->pci_hole, "pci-hole",
> > > > f->pci_address_space,
> > > > > -                             pci_hole_start, pci_hole_size);
> > > > > -    memory_region_add_subregion(f->system_memory, pci_hole_start,
> > > > &f->pci_hole);
> > > > > -    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > > > > -                             f->pci_address_space,
> > > > > -                             pci_hole64_start, pci_hole64_size);
> > > > > -    if (pci_hole64_size) {
> > > > > -        memory_region_add_subregion(f->system_memory,
> > > > pci_hole64_start,
> > > > > -                                    &f->pci_hole_64bit);
> > > > > -    }
> > > > >      memory_region_init_alias(&f->smram_region, "smram-region",
> > > > >                               f->pci_address_space, 0xa0000,
> > > > 0x20000);
> > > > >      memory_region_add_subregion_overlap(f->system_memory,
> > > > 0xa0000,
> > > > > @@ -295,6 +333,7 @@ static PCIBus *i440fx_common_init(const char
> > > > *device_name,
> > > > >      (*pi440fx_state)->dev.config[0x57]=ram_size;
> > > > >
> > > > >      i440fx_update_memory_mappings(f);
> > > > > +    i440fx_update_pci_mem_hole(f, false);
> > > > >
> > > > >      return b;
> > > > >  }
> > > > > @@ -304,10 +343,6 @@ PCIBus *i440fx_init(PCII440FXState
> > > > **pi440fx_state, int *piix3_devfn,
> > > > >                      MemoryRegion *address_space_mem,
> > > > >                      MemoryRegion *address_space_io,
> > > > >                      ram_addr_t ram_size,
> > > > > -                    hwaddr pci_hole_start,
> > > > > -                    hwaddr pci_hole_size,
> > > > > -                    hwaddr pci_hole64_start,
> > > > > -                    hwaddr pci_hole64_size,
> > > > >                      MemoryRegion *pci_memory, MemoryRegion
> > > > *ram_memory)
> > > > >
> > > > >  {
> > > > > @@ -315,8 +350,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> > > > int *piix3_devfn,
> > > > >
> > > > >      b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, isa_bus,
> > > > pic,
> > > > >                             address_space_mem, address_space_io,
> > > > ram_size,
> > > > > -                           pci_hole_start, pci_hole_size,
> > > > > -                           pci_hole64_start, pci_hole64_size,
> > > > >                             pci_memory, ram_memory);
> > > > >      return b;
> > > > >  }
> > > > > --
> > > > > 1.7.12.1
> > > > >
> > >
> >
Anthony Liguori - Feb. 26, 2013, 7:32 p.m.
Ian Campbell <Ian.Campbell@citrix.com> writes:

> Given that Xen has at least two other mechanisms (xenstore and
> hvmparams) for passing this sort of information around I'm not sure why
> hacking the emulated i440fx device should be the preferred option.

And QEMU also provides the fw_cfg interface with this information.

Regards,

Anthony Liguori

>
> On Tue, 2013-02-26 at 15:43 +0000, Stefano Stabellini wrote:
>> Right, I think that reading as 0xff and write once would be important
>> improvements for this patch.
>> 
>> I would like to see a document somewhere (maybe just a text file under
>> xen-unstable/docs/misc) with a list of deviations of the i440fx we
>> emulate and the real one.
>> 
>> Other than that, it would be OK for me.
>> 
>> On Tue, 26 Feb 2013, Zhang, Xiantao wrote:
>> > For real i440fx, its TOM is fixed to 1G, I think Xen or other VMMs playing with Qemu should break this hardware rule.  Maybe we can implement this register as a write-only one, so that OS can't see its existence.   If OS reads this register, Qemu always return 0xff, and for any write operations to this register, it may impact hardware's behavior.  This conforms to the behavior of OS accessing a reserved register.
>> > Xiantao
>> >
>> > > -----Original Message-----
>> > > From: qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org
>> > > [mailto:qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org] On Behalf
>> > > Of Hao, Xudong
>> > > Sent: Tuesday, February 26, 2013 11:33 AM
>> > > To: Stefano Stabellini; Ian Campbell
>> > > Cc: aliguori@us.ibm.com; mst@redhat.com; qemu-devel@nongnu.org; xen-
>> > > devel@lists.xen.org; JBeulich@suse.com; afaerber@suse.de
>> > > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report the
>> > > base of PCI
>> > >
>> > > > -----Original Message-----
>> > > > From: qemu-devel-bounces+xudong.hao=intel.com@nongnu.org
>> > > > [mailto:qemu-devel-bounces+xudong.hao=intel.com@nongnu.org] On Behalf
>> > > > Of Stefano Stabellini
>> > > > Sent: Tuesday, February 26, 2013 12:06 AM
>> > > > To: Hao, Xudong
>> > > > Cc: aliguori@us.ibm.com; Stefano Stabellini; mst@redhat.com;
>> > > > qemu-devel@nongnu.org; xen-devel@lists.xen.org; JBeulich@suse.com;
>> > > > afaerber@suse.de
>> > > > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report
>> > > the
>> > > > base of PCI
>> > > >
>> > > > On Mon, 25 Feb 2013, Xudong Hao wrote:
>> > > > > v2:
>> > > > > * Use "piix: " in the subject rather than "qemu: "
>> > > > > * Define TOM register as one byte
>> > > > > * Define default TOM value instead of hardcode 0xe0000000 in more that
>> > > one
>> > > > place
>> > > > > * Use API pci_set_byte for pci config access
>> > > > > * Use dev->config instead of the indirect d->dev.config
>> > > > >
>> > > > > Define a TOM(top of memory) register to report the base of PCI memory,
>> > > > update
>> > > > > memory region dynamically. TOM register are defined to one byte in PCI
>> > > > configure
>> > > > > space, because that only upper 4 bit of PCI memory takes effect for Xen, so
>> > > > > it requires bios set TOM with 16M-aligned.
>> > > > >
>> > > > > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
>> > > > > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
>> > > >
>> > > > The patch is OK from my POV, but I think that Ian raised a valid
>> > > > concern: we are supposed to emulate a real piece of hardware, maybe
>> > > > another mechanism (xenbus? hvmop?) to pass the information from
>> > > > hvmloader to QEMU would be a better fit than this.
>> > > > Otherwise at least we would need to advertize this feature somehow: if
>> > > > hvmloader can use it, the guest kernel can use it too...
>> > > >
>> > > Hi, Ian and Stefano
>> > >
>> > > Here adding this faking register in bios is a hack way.
>> > > However, what we emulated is not full real piece of hardware at all times, the
>> > > max of TOM for i440fx is 1G, and we already adjust the TOM in qemu.
>> > > The faking register is only effective by bios and device model. This register is
>> > > reserved by host bridge, so guest can't access this register, guest kernel should
>> > > handle well when accessing a reserved region.
>> > >
>> > > -Thanks
>> > > Xudong
>> > > >
>> > > >
>> > > > >  hw/pc.h       |  7 +++---
>> > > > >  hw/pc_piix.c  | 12 +++-------
>> > > > >  hw/piix_pci.c | 73
>> > > > +++++++++++++++++++++++++++++++++++++++++++----------------
>> > > > >  3 files changed, 59 insertions(+), 33 deletions(-)
>> > > > >
>> > > > > diff --git a/hw/pc.h b/hw/pc.h
>> > > > > index fbcf43d..62adcc5 100644
>> > > > > --- a/hw/pc.h
>> > > > > +++ b/hw/pc.h
>> > > > > @@ -129,15 +129,14 @@ extern int no_hpet;
>> > > > >  struct PCII440FXState;
>> > > > >  typedef struct PCII440FXState PCII440FXState;
>> > > > >
>> > > > > +#define I440FX_TOM     0xe0000000
>> > > > > +#define I440FX_XEN_TOM 0xf0000000
>> > > > > +
>> > > > >  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
>> > > > >                      ISABus **isa_bus, qemu_irq *pic,
>> > > > >                      MemoryRegion *address_space_mem,
>> > > > >                      MemoryRegion *address_space_io,
>> > > > >                      ram_addr_t ram_size,
>> > > > > -                    hwaddr pci_hole_start,
>> > > > > -                    hwaddr pci_hole_size,
>> > > > > -                    hwaddr pci_hole64_start,
>> > > > > -                    hwaddr pci_hole64_size,
>> > > > >                      MemoryRegion *pci_memory,
>> > > > >                      MemoryRegion *ram_memory);
>> > > > >
>> > > > > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
>> > > > > index 0a6923d..2eef510 100644
>> > > > > --- a/hw/pc_piix.c
>> > > > > +++ b/hw/pc_piix.c
>> > > > > @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
>> > > > >          kvmclock_create();
>> > > > >      }
>> > > > >
>> > > > > -    if (ram_size >= 0xe0000000 ) {
>> > > > > -        above_4g_mem_size = ram_size - 0xe0000000;
>> > > > > -        below_4g_mem_size = 0xe0000000;
>> > > > > +    if (ram_size >= I440FX_TOM) {
>> > > > > +        above_4g_mem_size = ram_size - I440FX_TOM;
>> > > > > +        below_4g_mem_size = I440FX_TOM;
>> > > > >      } else {
>> > > > >          above_4g_mem_size = 0;
>> > > > >          below_4g_mem_size = ram_size;
>> > > > > @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion
>> > > > *system_memory,
>> > > > >      if (pci_enabled) {
>> > > > >          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
>> > > > >                                system_memory, system_io,
>> > > > ram_size,
>> > > > > -                              below_4g_mem_size,
>> > > > > -                              0x100000000ULL -
>> > > > below_4g_mem_size,
>> > > > > -                              0x100000000ULL +
>> > > > above_4g_mem_size,
>> > > > > -                              (sizeof(hwaddr) == 4
>> > > > > -                               ? 0
>> > > > > -                               : ((uint64_t)1 << 62)),
>> > > > >                                pci_memory, ram_memory);
>> > > > >      } else {
>> > > > >          pci_bus = NULL;
>> > > > > diff --git a/hw/piix_pci.c b/hw/piix_pci.c
>> > > > > index 3d79c73..3e5a25c 100644
>> > > > > --- a/hw/piix_pci.c
>> > > > > +++ b/hw/piix_pci.c
>> > > > > @@ -86,6 +86,14 @@ struct PCII440FXState {
>> > > > >  #define I440FX_PAM_SIZE 7
>> > > > >  #define I440FX_SMRAM    0x72
>> > > > >
>> > > > > +/* The maximum vaule of TOM(top of memory) register in I440FX
>> > > > > + * is 1G, so it doesn't meet any popular virutal machines, so
>> > > > > + * define another register to report the base of PCI memory.
>> > > > > + * Use one byte 0xb0 for the upper 8 bit, they are originally
>> > > > > + * resevered for host bridge.
>> > > > > + * */
>> > > > > +#define I440FX_PCI_HOLE_BASE 0xb0
>> > > > > +
>> > > > >  static void piix3_set_irq(void *opaque, int pirq, int level);
>> > > > >  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int
>> > > > pci_intx);
>> > > > >  static void piix3_write_config_xen(PCIDevice *dev,
>> > > > > @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int
>> > > > pci_intx)
>> > > > >      return (pci_intx + slot_addend) & 3;
>> > > > >  }
>> > > > >
>> > > > > +
>> > > > > +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
>> > > > > +{
>> > > > > +    ram_addr_t above_4g_mem_size;
>> > > > > +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start,
>> > > > pci_hole64_size;
>> > > > > +
>> > > > > +    pci_hole_start = pci_default_read_config(&f->dev,
>> > > > I440FX_PCI_HOLE_BASE, 1) << 24;
>> > > > > +    pci_hole_size = 0x100000000ULL - pci_hole_start;
>> > > > > +
>> > > > > +    if (ram_size >= pci_hole_start) {
>> > > > > +        above_4g_mem_size = ram_size - pci_hole_start;
>> > > > > +    } else {
>> > > > > +        above_4g_mem_size = 0;
>> > > > > +    }
>> > > > > +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
>> > > > > +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
>> > > > > +
>> > > > > +    if (del) {
>> > > > > +        memory_region_del_subregion(f->system_memory,
>> > > > &f->pci_hole);
>> > > > > +        if (pci_hole64_size) {
>> > > > > +            memory_region_del_subregion(f->system_memory,
>> > > > &f->pci_hole_64bit);
>> > > > > +        }
>> > > > > +    }
>> > > > > +
>> > > > > +    memory_region_init_alias(&f->pci_hole, "pci-hole",
>> > > > f->pci_address_space,
>> > > > > +                             pci_hole_start, pci_hole_size);
>> > > > > +    memory_region_add_subregion(f->system_memory, pci_hole_start,
>> > > > &f->pci_hole);
>> > > > > +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
>> > > > > +                             f->pci_address_space,
>> > > > > +                             pci_hole64_start, pci_hole64_size);
>> > > > > +    if (pci_hole64_size) {
>> > > > > +        memory_region_add_subregion(f->system_memory,
>> > > > pci_hole64_start,
>> > > > > +                                    &f->pci_hole_64bit);
>> > > > > +    }
>> > > > > +}
>> > > > > +
>> > > > > +
>> > > > >  static void i440fx_update_memory_mappings(PCII440FXState *d)
>> > > > >  {
>> > > > >      int i;
>> > > > > @@ -136,6 +181,9 @@ static void i440fx_write_config(PCIDevice *dev,
>> > > > >          range_covers_byte(address, len, I440FX_SMRAM)) {
>> > > > >          i440fx_update_memory_mappings(d);
>> > > > >      }
>> > > > > +    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {
>> > > > > +        i440fx_update_pci_mem_hole(d, true);
>> > > > > +    }
>> > > > >  }
>> > > > >
>> > > > >  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
>> > > > > @@ -203,6 +251,10 @@ static int i440fx_initfn(PCIDevice *dev)
>> > > > >
>> > > > >      d->dev.config[I440FX_SMRAM] = 0x02;
>> > > > >
>> > > > > +    /* Emulate top of memory, here use 0xe0000000 as default val*/
>> > > > > +    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;
>> > > > > +    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE, (uint8_t)(addr >>
>> > > > 24));
>> > > > > +
>> > > > >      cpu_smm_register(&i440fx_set_smm, d);
>> > > > >      return 0;
>> > > > >  }
>> > > > > @@ -214,10 +266,6 @@ static PCIBus *i440fx_common_init(const char
>> > > > *device_name,
>> > > > >                                    MemoryRegion
>> > > > *address_space_mem,
>> > > > >                                    MemoryRegion
>> > > > *address_space_io,
>> > > > >                                    ram_addr_t ram_size,
>> > > > > -                                  hwaddr pci_hole_start,
>> > > > > -                                  hwaddr pci_hole_size,
>> > > > > -                                  hwaddr pci_hole64_start,
>> > > > > -                                  hwaddr pci_hole64_size,
>> > > > >                                    MemoryRegion
>> > > > *pci_address_space,
>> > > > >                                    MemoryRegion *ram_memory)
>> > > > >  {
>> > > > > @@ -244,16 +292,6 @@ static PCIBus *i440fx_common_init(const char
>> > > > *device_name,
>> > > > >      f->system_memory = address_space_mem;
>> > > > >      f->pci_address_space = pci_address_space;
>> > > > >      f->ram_memory = ram_memory;
>> > > > > -    memory_region_init_alias(&f->pci_hole, "pci-hole",
>> > > > f->pci_address_space,
>> > > > > -                             pci_hole_start, pci_hole_size);
>> > > > > -    memory_region_add_subregion(f->system_memory, pci_hole_start,
>> > > > &f->pci_hole);
>> > > > > -    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
>> > > > > -                             f->pci_address_space,
>> > > > > -                             pci_hole64_start, pci_hole64_size);
>> > > > > -    if (pci_hole64_size) {
>> > > > > -        memory_region_add_subregion(f->system_memory,
>> > > > pci_hole64_start,
>> > > > > -                                    &f->pci_hole_64bit);
>> > > > > -    }
>> > > > >      memory_region_init_alias(&f->smram_region, "smram-region",
>> > > > >                               f->pci_address_space, 0xa0000,
>> > > > 0x20000);
>> > > > >      memory_region_add_subregion_overlap(f->system_memory,
>> > > > 0xa0000,
>> > > > > @@ -295,6 +333,7 @@ static PCIBus *i440fx_common_init(const char
>> > > > *device_name,
>> > > > >      (*pi440fx_state)->dev.config[0x57]=ram_size;
>> > > > >
>> > > > >      i440fx_update_memory_mappings(f);
>> > > > > +    i440fx_update_pci_mem_hole(f, false);
>> > > > >
>> > > > >      return b;
>> > > > >  }
>> > > > > @@ -304,10 +343,6 @@ PCIBus *i440fx_init(PCII440FXState
>> > > > **pi440fx_state, int *piix3_devfn,
>> > > > >                      MemoryRegion *address_space_mem,
>> > > > >                      MemoryRegion *address_space_io,
>> > > > >                      ram_addr_t ram_size,
>> > > > > -                    hwaddr pci_hole_start,
>> > > > > -                    hwaddr pci_hole_size,
>> > > > > -                    hwaddr pci_hole64_start,
>> > > > > -                    hwaddr pci_hole64_size,
>> > > > >                      MemoryRegion *pci_memory, MemoryRegion
>> > > > *ram_memory)
>> > > > >
>> > > > >  {
>> > > > > @@ -315,8 +350,6 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
>> > > > int *piix3_devfn,
>> > > > >
>> > > > >      b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, isa_bus,
>> > > > pic,
>> > > > >                             address_space_mem, address_space_io,
>> > > > ram_size,
>> > > > > -                           pci_hole_start, pci_hole_size,
>> > > > > -                           pci_hole64_start, pci_hole64_size,
>> > > > >                             pci_memory, ram_memory);
>> > > > >      return b;
>> > > > >  }
>> > > > > --
>> > > > > 1.7.12.1
>> > > > >
>> > >
>> >
Zhang, Xiantao - Feb. 27, 2013, 9:49 a.m.
> Given that Xen has at least two other mechanisms (xenstore and

> hvmparams) for passing this sort of information around I'm not sure why

> hacking the emulated i440fx device should be the preferred option.


Actually, even in hardware,  I believe there are many registers which are implemented with write-once attributes, and they are only used by firmware and reserved for OS.   In addition,  with this change,  it can benefit all VMMs (not just Xen) which use Qemu as device model.  If adopt xenstore way, perhaps other VMMs also have to write similar and duplicate logic for the same purpose.  
Xiantao 


> On Tue, 2013-02-26 at 15:43 +0000, Stefano Stabellini wrote:

> > Right, I think that reading as 0xff and write once would be important

> > improvements for this patch.

> >

> > I would like to see a document somewhere (maybe just a text file under

> > xen-unstable/docs/misc) with a list of deviations of the i440fx we

> > emulate and the real one.


> > Other than that, it would be OK for me.

> >

> > On Tue, 26 Feb 2013, Zhang, Xiantao wrote:

> > > For real i440fx, its TOM is fixed to 1G, I think Xen or other VMMs playing

> with Qemu should break this hardware rule.  Maybe we can implement this

> register as a write-only one, so that OS can't see its existence.   If OS reads this

> register, Qemu always return 0xff, and for any write operations to this register,

> it may impact hardware's behavior.  This conforms to the behavior of OS

> accessing a reserved register.

> > > Xiantao

> > >

> > > > -----Original Message-----

> > > > From: qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org

> > > > [mailto:qemu-devel-bounces+xiantao.zhang=intel.com@nongnu.org] On

> Behalf

> > > > Of Hao, Xudong

> > > > Sent: Tuesday, February 26, 2013 11:33 AM

> > > > To: Stefano Stabellini; Ian Campbell

> > > > Cc: aliguori@us.ibm.com; mst@redhat.com; qemu-devel@nongnu.org;

> xen-

> > > > devel@lists.xen.org; JBeulich@suse.com; afaerber@suse.de

> > > > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to

> report the

> > > > base of PCI

> > > >

> > > > > -----Original Message-----

> > > > > From: qemu-devel-bounces+xudong.hao=intel.com@nongnu.org

> > > > > [mailto:qemu-devel-bounces+xudong.hao=intel.com@nongnu.org] On

> Behalf

> > > > > Of Stefano Stabellini

> > > > > Sent: Tuesday, February 26, 2013 12:06 AM

> > > > > To: Hao, Xudong

> > > > > Cc: aliguori@us.ibm.com; Stefano Stabellini; mst@redhat.com;

> > > > > qemu-devel@nongnu.org; xen-devel@lists.xen.org; JBeulich@suse.com;

> > > > > afaerber@suse.de

> > > > > Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to

> report

> > > > the

> > > > > base of PCI

> > > > >

> > > > > On Mon, 25 Feb 2013, Xudong Hao wrote:

> > > > > > v2:

> > > > > > * Use "piix: " in the subject rather than "qemu: "

> > > > > > * Define TOM register as one byte

> > > > > > * Define default TOM value instead of hardcode 0xe0000000 in more

> that

> > > > one

> > > > > place

> > > > > > * Use API pci_set_byte for pci config access

> > > > > > * Use dev->config instead of the indirect d->dev.config

> > > > > >

> > > > > > Define a TOM(top of memory) register to report the base of PCI

> memory,

> > > > > update

> > > > > > memory region dynamically. TOM register are defined to one byte in

> PCI

> > > > > configure

> > > > > > space, because that only upper 4 bit of PCI memory takes effect for

> Xen, so

> > > > > > it requires bios set TOM with 16M-aligned.

> > > > > >

> > > > > > Signed-off-by: Xudong Hao <xudong.hao@intel.com>

> > > > > > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>

> > > > >

> > > > > The patch is OK from my POV, but I think that Ian raised a valid

> > > > > concern: we are supposed to emulate a real piece of hardware, maybe

> > > > > another mechanism (xenbus? hvmop?) to pass the information from

> > > > > hvmloader to QEMU would be a better fit than this.

> > > > > Otherwise at least we would need to advertize this feature somehow: if

> > > > > hvmloader can use it, the guest kernel can use it too...

> > > > >

> > > > Hi, Ian and Stefano

> > > >

> > > > Here adding this faking register in bios is a hack way.

> > > > However, what we emulated is not full real piece of hardware at all times,

> the

> > > > max of TOM for i440fx is 1G, and we already adjust the TOM in qemu.

> > > > The faking register is only effective by bios and device model. This

> register is

> > > > reserved by host bridge, so guest can't access this register, guest kernel

> should

> > > > handle well when accessing a reserved region.

> > > >

> > > > -Thanks

> > > > Xudong

> > > > >

> > > > >

> > > > > >  hw/pc.h       |  7 +++---

> > > > > >  hw/pc_piix.c  | 12 +++-------

> > > > > >  hw/piix_pci.c | 73

> > > > > +++++++++++++++++++++++++++++++++++++++++++----------------

> > > > > >  3 files changed, 59 insertions(+), 33 deletions(-)

> > > > > >

> > > > > > diff --git a/hw/pc.h b/hw/pc.h

> > > > > > index fbcf43d..62adcc5 100644

> > > > > > --- a/hw/pc.h

> > > > > > +++ b/hw/pc.h

> > > > > > @@ -129,15 +129,14 @@ extern int no_hpet;

> > > > > >  struct PCII440FXState;

> > > > > >  typedef struct PCII440FXState PCII440FXState;

> > > > > >

> > > > > > +#define I440FX_TOM     0xe0000000

> > > > > > +#define I440FX_XEN_TOM 0xf0000000

> > > > > > +

> > > > > >  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,

> > > > > >                      ISABus **isa_bus, qemu_irq *pic,

> > > > > >                      MemoryRegion *address_space_mem,

> > > > > >                      MemoryRegion *address_space_io,

> > > > > >                      ram_addr_t ram_size,

> > > > > > -                    hwaddr pci_hole_start,

> > > > > > -                    hwaddr pci_hole_size,

> > > > > > -                    hwaddr pci_hole64_start,

> > > > > > -                    hwaddr pci_hole64_size,

> > > > > >                      MemoryRegion *pci_memory,

> > > > > >                      MemoryRegion *ram_memory);

> > > > > >

> > > > > > diff --git a/hw/pc_piix.c b/hw/pc_piix.c

> > > > > > index 0a6923d..2eef510 100644

> > > > > > --- a/hw/pc_piix.c

> > > > > > +++ b/hw/pc_piix.c

> > > > > > @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion

> *system_memory,

> > > > > >          kvmclock_create();

> > > > > >      }

> > > > > >

> > > > > > -    if (ram_size >= 0xe0000000 ) {

> > > > > > -        above_4g_mem_size = ram_size - 0xe0000000;

> > > > > > -        below_4g_mem_size = 0xe0000000;

> > > > > > +    if (ram_size >= I440FX_TOM) {

> > > > > > +        above_4g_mem_size = ram_size - I440FX_TOM;

> > > > > > +        below_4g_mem_size = I440FX_TOM;

> > > > > >      } else {

> > > > > >          above_4g_mem_size = 0;

> > > > > >          below_4g_mem_size = ram_size;

> > > > > > @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion

> > > > > *system_memory,

> > > > > >      if (pci_enabled) {

> > > > > >          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,

> > > > > >                                system_memory, system_io,

> > > > > ram_size,

> > > > > > -                              below_4g_mem_size,

> > > > > > -                              0x100000000ULL -

> > > > > below_4g_mem_size,

> > > > > > -                              0x100000000ULL +

> > > > > above_4g_mem_size,

> > > > > > -                              (sizeof(hwaddr) == 4

> > > > > > -                               ? 0

> > > > > > -                               : ((uint64_t)1 << 62)),

> > > > > >                                pci_memory, ram_memory);

> > > > > >      } else {

> > > > > >          pci_bus = NULL;

> > > > > > diff --git a/hw/piix_pci.c b/hw/piix_pci.c

> > > > > > index 3d79c73..3e5a25c 100644

> > > > > > --- a/hw/piix_pci.c

> > > > > > +++ b/hw/piix_pci.c

> > > > > > @@ -86,6 +86,14 @@ struct PCII440FXState {

> > > > > >  #define I440FX_PAM_SIZE 7

> > > > > >  #define I440FX_SMRAM    0x72

> > > > > >

> > > > > > +/* The maximum vaule of TOM(top of memory) register in I440FX

> > > > > > + * is 1G, so it doesn't meet any popular virutal machines, so

> > > > > > + * define another register to report the base of PCI memory.

> > > > > > + * Use one byte 0xb0 for the upper 8 bit, they are originally

> > > > > > + * resevered for host bridge.

> > > > > > + * */

> > > > > > +#define I440FX_PCI_HOLE_BASE 0xb0

> > > > > > +

> > > > > >  static void piix3_set_irq(void *opaque, int pirq, int level);

> > > > > >  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int

> > > > > pci_intx);

> > > > > >  static void piix3_write_config_xen(PCIDevice *dev,

> > > > > > @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice

> *pci_dev, int

> > > > > pci_intx)

> > > > > >      return (pci_intx + slot_addend) & 3;

> > > > > >  }

> > > > > >

> > > > > > +

> > > > > > +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)

> > > > > > +{

> > > > > > +    ram_addr_t above_4g_mem_size;

> > > > > > +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start,

> > > > > pci_hole64_size;

> > > > > > +

> > > > > > +    pci_hole_start = pci_default_read_config(&f->dev,

> > > > > I440FX_PCI_HOLE_BASE, 1) << 24;

> > > > > > +    pci_hole_size = 0x100000000ULL - pci_hole_start;

> > > > > > +

> > > > > > +    if (ram_size >= pci_hole_start) {

> > > > > > +        above_4g_mem_size = ram_size - pci_hole_start;

> > > > > > +    } else {

> > > > > > +        above_4g_mem_size = 0;

> > > > > > +    }

> > > > > > +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;

> > > > > > +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);

> > > > > > +

> > > > > > +    if (del) {

> > > > > > +        memory_region_del_subregion(f->system_memory,

> > > > > &f->pci_hole);

> > > > > > +        if (pci_hole64_size) {

> > > > > > +            memory_region_del_subregion(f->system_memory,

> > > > > &f->pci_hole_64bit);

> > > > > > +        }

> > > > > > +    }

> > > > > > +

> > > > > > +    memory_region_init_alias(&f->pci_hole, "pci-hole",

> > > > > f->pci_address_space,

> > > > > > +                             pci_hole_start, pci_hole_size);

> > > > > > +    memory_region_add_subregion(f->system_memory,

> pci_hole_start,

> > > > > &f->pci_hole);

> > > > > > +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",

> > > > > > +                             f->pci_address_space,

> > > > > > +                             pci_hole64_start, pci_hole64_size);

> > > > > > +    if (pci_hole64_size) {

> > > > > > +        memory_region_add_subregion(f->system_memory,

> > > > > pci_hole64_start,

> > > > > > +                                    &f->pci_hole_64bit);

> > > > > > +    }

> > > > > > +}

> > > > > > +

> > > > > > +

> > > > > >  static void i440fx_update_memory_mappings(PCII440FXState *d)

> > > > > >  {

> > > > > >      int i;

> > > > > > @@ -136,6 +181,9 @@ static void i440fx_write_config(PCIDevice

> *dev,

> > > > > >          range_covers_byte(address, len, I440FX_SMRAM)) {

> > > > > >          i440fx_update_memory_mappings(d);

> > > > > >      }

> > > > > > +    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {

> > > > > > +        i440fx_update_pci_mem_hole(d, true);

> > > > > > +    }

> > > > > >  }

> > > > > >

> > > > > >  static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)

> > > > > > @@ -203,6 +251,10 @@ static int i440fx_initfn(PCIDevice *dev)

> > > > > >

> > > > > >      d->dev.config[I440FX_SMRAM] = 0x02;

> > > > > >

> > > > > > +    /* Emulate top of memory, here use 0xe0000000 as default val*/

> > > > > > +    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;

> > > > > > +    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE,

> (uint8_t)(addr >>

> > > > > 24));

> > > > > > +

> > > > > >      cpu_smm_register(&i440fx_set_smm, d);

> > > > > >      return 0;

> > > > > >  }

> > > > > > @@ -214,10 +266,6 @@ static PCIBus *i440fx_common_init(const

> char

> > > > > *device_name,

> > > > > >                                    MemoryRegion

> > > > > *address_space_mem,

> > > > > >                                    MemoryRegion

> > > > > *address_space_io,

> > > > > >                                    ram_addr_t ram_size,

> > > > > > -                                  hwaddr pci_hole_start,

> > > > > > -                                  hwaddr pci_hole_size,

> > > > > > -                                  hwaddr pci_hole64_start,

> > > > > > -                                  hwaddr pci_hole64_size,

> > > > > >                                    MemoryRegion

> > > > > *pci_address_space,

> > > > > >                                    MemoryRegion *ram_memory)

> > > > > >  {

> > > > > > @@ -244,16 +292,6 @@ static PCIBus *i440fx_common_init(const

> char

> > > > > *device_name,

> > > > > >      f->system_memory = address_space_mem;

> > > > > >      f->pci_address_space = pci_address_space;

> > > > > >      f->ram_memory = ram_memory;

> > > > > > -    memory_region_init_alias(&f->pci_hole, "pci-hole",

> > > > > f->pci_address_space,

> > > > > > -                             pci_hole_start, pci_hole_size);

> > > > > > -    memory_region_add_subregion(f->system_memory,

> pci_hole_start,

> > > > > &f->pci_hole);

> > > > > > -    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",

> > > > > > -                             f->pci_address_space,

> > > > > > -                             pci_hole64_start, pci_hole64_size);

> > > > > > -    if (pci_hole64_size) {

> > > > > > -        memory_region_add_subregion(f->system_memory,

> > > > > pci_hole64_start,

> > > > > > -                                    &f->pci_hole_64bit);

> > > > > > -    }

> > > > > >      memory_region_init_alias(&f->smram_region, "smram-region",

> > > > > >                               f->pci_address_space, 0xa0000,

> > > > > 0x20000);

> > > > > >      memory_region_add_subregion_overlap(f->system_memory,

> > > > > 0xa0000,

> > > > > > @@ -295,6 +333,7 @@ static PCIBus *i440fx_common_init(const char

> > > > > *device_name,

> > > > > >      (*pi440fx_state)->dev.config[0x57]=ram_size;

> > > > > >

> > > > > >      i440fx_update_memory_mappings(f);

> > > > > > +    i440fx_update_pci_mem_hole(f, false);

> > > > > >

> > > > > >      return b;

> > > > > >  }

> > > > > > @@ -304,10 +343,6 @@ PCIBus *i440fx_init(PCII440FXState

> > > > > **pi440fx_state, int *piix3_devfn,

> > > > > >                      MemoryRegion *address_space_mem,

> > > > > >                      MemoryRegion *address_space_io,

> > > > > >                      ram_addr_t ram_size,

> > > > > > -                    hwaddr pci_hole_start,

> > > > > > -                    hwaddr pci_hole_size,

> > > > > > -                    hwaddr pci_hole64_start,

> > > > > > -                    hwaddr pci_hole64_size,

> > > > > >                      MemoryRegion *pci_memory, MemoryRegion

> > > > > *ram_memory)

> > > > > >

> > > > > >  {

> > > > > > @@ -315,8 +350,6 @@ PCIBus *i440fx_init(PCII440FXState

> **pi440fx_state,

> > > > > int *piix3_devfn,

> > > > > >

> > > > > >      b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn,

> isa_bus,

> > > > > pic,

> > > > > >                             address_space_mem, address_space_io,

> > > > > ram_size,

> > > > > > -                           pci_hole_start, pci_hole_size,

> > > > > > -                           pci_hole64_start, pci_hole64_size,

> > > > > >                             pci_memory, ram_memory);

> > > > > >      return b;

> > > > > >  }

> > > > > > --

> > > > > > 1.7.12.1

> > > > > >

> > > >

> > >

>
Michael S. Tsirkin - Feb. 27, 2013, 10:50 a.m.
On Mon, Feb 25, 2013 at 02:53:37PM +0800, Xudong Hao wrote:
> v2:
> * Use "piix: " in the subject rather than "qemu: "
> * Define TOM register as one byte
> * Define default TOM value instead of hardcode 0xe0000000 in more that one place
> * Use API pci_set_byte for pci config access
> * Use dev->config instead of the indirect d->dev.config
> 
> Define a TOM(top of memory) register to report the base of PCI memory, update
> memory region dynamically. TOM register are defined to one byte in PCI configure
> space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> it requires bios set TOM with 16M-aligned.
> 
> Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>

Could you supply some motivation for this patch?

> ---
>  hw/pc.h       |  7 +++---
>  hw/pc_piix.c  | 12 +++-------
>  hw/piix_pci.c | 73 +++++++++++++++++++++++++++++++++++++++++++----------------
>  3 files changed, 59 insertions(+), 33 deletions(-)
> 
> diff --git a/hw/pc.h b/hw/pc.h
> index fbcf43d..62adcc5 100644
> --- a/hw/pc.h
> +++ b/hw/pc.h
> @@ -129,15 +129,14 @@ extern int no_hpet;
>  struct PCII440FXState;
>  typedef struct PCII440FXState PCII440FXState;
>  
> +#define I440FX_TOM     0xe0000000
> +#define I440FX_XEN_TOM 0xf0000000
> +
>  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
>                      ISABus **isa_bus, qemu_irq *pic,
>                      MemoryRegion *address_space_mem,
>                      MemoryRegion *address_space_io,
>                      ram_addr_t ram_size,
> -                    hwaddr pci_hole_start,
> -                    hwaddr pci_hole_size,
> -                    hwaddr pci_hole64_start,
> -                    hwaddr pci_hole64_size,
>                      MemoryRegion *pci_memory,
>                      MemoryRegion *ram_memory);
>  
> diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> index 0a6923d..2eef510 100644
> --- a/hw/pc_piix.c
> +++ b/hw/pc_piix.c
> @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
>          kvmclock_create();
>      }
>  
> -    if (ram_size >= 0xe0000000 ) {
> -        above_4g_mem_size = ram_size - 0xe0000000;
> -        below_4g_mem_size = 0xe0000000;
> +    if (ram_size >= I440FX_TOM) {
> +        above_4g_mem_size = ram_size - I440FX_TOM;
> +        below_4g_mem_size = I440FX_TOM;
>      } else {
>          above_4g_mem_size = 0;
>          below_4g_mem_size = ram_size;
> @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion *system_memory,
>      if (pci_enabled) {
>          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
>                                system_memory, system_io, ram_size,
> -                              below_4g_mem_size,
> -                              0x100000000ULL - below_4g_mem_size,
> -                              0x100000000ULL + above_4g_mem_size,
> -                              (sizeof(hwaddr) == 4
> -                               ? 0
> -                               : ((uint64_t)1 << 62)),
>                                pci_memory, ram_memory);
>      } else {
>          pci_bus = NULL;
> diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> index 3d79c73..3e5a25c 100644
> --- a/hw/piix_pci.c
> +++ b/hw/piix_pci.c
> @@ -86,6 +86,14 @@ struct PCII440FXState {
>  #define I440FX_PAM_SIZE 7
>  #define I440FX_SMRAM    0x72
>  
> +/* The maximum vaule of TOM(top of memory) register in I440FX
> + * is 1G, so it doesn't meet any popular virutal machines, so
> + * define another register to report the base of PCI memory.
> + * Use one byte 0xb0 for the upper 8 bit, they are originally
> + * resevered for host bridge.
> + * */
> +#define I440FX_PCI_HOLE_BASE 0xb0

Do you have to use a fixed address? As others said, it's a hack.
How about adding a special device for this hackery?

> +
>  static void piix3_set_irq(void *opaque, int pirq, int level);
>  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pci_intx);
>  static void piix3_write_config_xen(PCIDevice *dev,
> @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
>      return (pci_intx + slot_addend) & 3;
>  }
>  
> +
> +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
> +{
> +    ram_addr_t above_4g_mem_size;
> +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start, pci_hole64_size;
> +
> +    pci_hole_start = pci_default_read_config(&f->dev, I440FX_PCI_HOLE_BASE, 1) << 24;
> +    pci_hole_size = 0x100000000ULL - pci_hole_start;
> +
> +    if (ram_size >= pci_hole_start) {
> +        above_4g_mem_size = ram_size - pci_hole_start;
> +    } else {
> +        above_4g_mem_size = 0;
> +    }
> +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
> +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
> +
> +    if (del) {
> +        memory_region_del_subregion(f->system_memory, &f->pci_hole);
> +        if (pci_hole64_size) {
> +            memory_region_del_subregion(f->system_memory, &f->pci_hole_64bit);
> +        }
> +    }
> +
> +    memory_region_init_alias(&f->pci_hole, "pci-hole", f->pci_address_space,
> +                             pci_hole_start, pci_hole_size);
> +    memory_region_add_subregion(f->system_memory, pci_hole_start, &f->pci_hole);
> +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> +                             f->pci_address_space,
> +                             pci_hole64_start, pci_hole64_size);
> +    if (pci_hole64_size) {
> +        memory_region_add_subregion(f->system_memory, pci_hole64_start,
> +                                    &f->pci_hole_64bit);
> +    }
> +}
> +
> +

If you do things like this, won't ACPI tables become incorrect?
Ian Campbell - Feb. 27, 2013, 11:06 a.m.
I'm not sure about qemu-devel but on xen-devel the policy is not to top
post so please could you avoid doping so.

On Wed, 2013-02-27 at 09:49 +0000, Zhang, Xiantao wrote:
> > Given that Xen has at least two other mechanisms (xenstore and
> > hvmparams) for passing this sort of information around I'm not sure why
> > hacking the emulated i440fx device should be the preferred option.
> 
> Actually, even in hardware,  I believe there are many registers which
> are implemented with write-once attributes, and they are only used by
> firmware and reserved for OS.

The i440fx does not have this register (be it write once or otherwise),
which is my actual point -- you are adding a magic property to the
emulation of this device which the real hardware doesn't have. It isn't
really relevant that other hardware could implement write once
registers, that's obviously the case.

> In addition,  with this change,  it can benefit all VMMs (not just
> Xen) which use Qemu as device model.  If adopt xenstore way, perhaps
> other VMMs also have to write similar and duplicate logic for the same
> purpose.

Which other VMM? AIUI qemu/kvm doesn't have a requirement to communicate
this information from the VMM to qemu because qemu is the VMM and
controls all of the hardware resources.

Ian.
Hao, Xudong - March 18, 2013, 3:21 p.m.
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Wednesday, February 27, 2013 6:50 PM
> To: Hao, Xudong
> Cc: aliguori@us.ibm.com; qemu-devel@nongnu.org;
> stefano.stabellini@eu.citrix.com; xen-devel@lists.xen.org; afaerber@suse.de;
> JBeulich@suse.com; Zhang, Xiantao
> Subject: Re: [PATCH v2] piix: define a TOM register to report the base of PCI
> 
> On Mon, Feb 25, 2013 at 02:53:37PM +0800, Xudong Hao wrote:
> > v2:
> > * Use "piix: " in the subject rather than "qemu: "
> > * Define TOM register as one byte
> > * Define default TOM value instead of hardcode 0xe0000000 in more that one
> place
> > * Use API pci_set_byte for pci config access
> > * Use dev->config instead of the indirect d->dev.config
> >
> > Define a TOM(top of memory) register to report the base of PCI memory,
> update
> > memory region dynamically. TOM register are defined to one byte in PCI
> configure
> > space, because that only upper 4 bit of PCI memory takes effect for Xen, so
> > it requires bios set TOM with 16M-aligned.
> >
> > Signed-off-by: Xudong Hao <xudong.hao@intel.com>
> > Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
> 
> Could you supply some motivation for this patch?
> 

It's a fix for Xen. Qemu want more information from Xen, copy Stefano's comments:

QEMU needs to know where the end of the guest's RAM is (because there is
where it allocates the videoram and other stuff), so at least the size
of the MMIO hole is important.

Thanks,
-Xudong
> > ---
> >  hw/pc.h       |  7 +++---
> >  hw/pc_piix.c  | 12 +++-------
> >  hw/piix_pci.c | 73
> +++++++++++++++++++++++++++++++++++++++++++----------------
> >  3 files changed, 59 insertions(+), 33 deletions(-)
> >
> > diff --git a/hw/pc.h b/hw/pc.h
> > index fbcf43d..62adcc5 100644
> > --- a/hw/pc.h
> > +++ b/hw/pc.h
> > @@ -129,15 +129,14 @@ extern int no_hpet;
> >  struct PCII440FXState;
> >  typedef struct PCII440FXState PCII440FXState;
> >
> > +#define I440FX_TOM     0xe0000000
> > +#define I440FX_XEN_TOM 0xf0000000
> > +
> >  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
> >                      ISABus **isa_bus, qemu_irq *pic,
> >                      MemoryRegion *address_space_mem,
> >                      MemoryRegion *address_space_io,
> >                      ram_addr_t ram_size,
> > -                    hwaddr pci_hole_start,
> > -                    hwaddr pci_hole_size,
> > -                    hwaddr pci_hole64_start,
> > -                    hwaddr pci_hole64_size,
> >                      MemoryRegion *pci_memory,
> >                      MemoryRegion *ram_memory);
> >
> > diff --git a/hw/pc_piix.c b/hw/pc_piix.c
> > index 0a6923d..2eef510 100644
> > --- a/hw/pc_piix.c
> > +++ b/hw/pc_piix.c
> > @@ -93,9 +93,9 @@ static void pc_init1(MemoryRegion *system_memory,
> >          kvmclock_create();
> >      }
> >
> > -    if (ram_size >= 0xe0000000 ) {
> > -        above_4g_mem_size = ram_size - 0xe0000000;
> > -        below_4g_mem_size = 0xe0000000;
> > +    if (ram_size >= I440FX_TOM) {
> > +        above_4g_mem_size = ram_size - I440FX_TOM;
> > +        below_4g_mem_size = I440FX_TOM;
> >      } else {
> >          above_4g_mem_size = 0;
> >          below_4g_mem_size = ram_size;
> > @@ -130,12 +130,6 @@ static void pc_init1(MemoryRegion
> *system_memory,
> >      if (pci_enabled) {
> >          pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
> >                                system_memory, system_io,
> ram_size,
> > -                              below_4g_mem_size,
> > -                              0x100000000ULL -
> below_4g_mem_size,
> > -                              0x100000000ULL +
> above_4g_mem_size,
> > -                              (sizeof(hwaddr) == 4
> > -                               ? 0
> > -                               : ((uint64_t)1 << 62)),
> >                                pci_memory, ram_memory);
> >      } else {
> >          pci_bus = NULL;
> > diff --git a/hw/piix_pci.c b/hw/piix_pci.c
> > index 3d79c73..3e5a25c 100644
> > --- a/hw/piix_pci.c
> > +++ b/hw/piix_pci.c
> > @@ -86,6 +86,14 @@ struct PCII440FXState {
> >  #define I440FX_PAM_SIZE 7
> >  #define I440FX_SMRAM    0x72
> >
> > +/* The maximum vaule of TOM(top of memory) register in I440FX
> > + * is 1G, so it doesn't meet any popular virutal machines, so
> > + * define another register to report the base of PCI memory.
> > + * Use one byte 0xb0 for the upper 8 bit, they are originally
> > + * resevered for host bridge.
> > + * */
> > +#define I440FX_PCI_HOLE_BASE 0xb0
> 
> Do you have to use a fixed address? As others said, it's a hack.
> How about adding a special device for this hackery?
> 
> > +
> >  static void piix3_set_irq(void *opaque, int pirq, int level);
> >  static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int
> pci_intx);
> >  static void piix3_write_config_xen(PCIDevice *dev,
> > @@ -101,6 +109,43 @@ static int pci_slot_get_pirq(PCIDevice *pci_dev, int
> pci_intx)
> >      return (pci_intx + slot_addend) & 3;
> >  }
> >
> > +
> > +static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
> > +{
> > +    ram_addr_t above_4g_mem_size;
> > +    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start,
> pci_hole64_size;
> > +
> > +    pci_hole_start = pci_default_read_config(&f->dev,
> I440FX_PCI_HOLE_BASE, 1) << 24;
> > +    pci_hole_size = 0x100000000ULL - pci_hole_start;
> > +
> > +    if (ram_size >= pci_hole_start) {
> > +        above_4g_mem_size = ram_size - pci_hole_start;
> > +    } else {
> > +        above_4g_mem_size = 0;
> > +    }
> > +    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
> > +    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
> > +
> > +    if (del) {
> > +        memory_region_del_subregion(f->system_memory,
> &f->pci_hole);
> > +        if (pci_hole64_size) {
> > +            memory_region_del_subregion(f->system_memory,
> &f->pci_hole_64bit);
> > +        }
> > +    }
> > +
> > +    memory_region_init_alias(&f->pci_hole, "pci-hole",
> f->pci_address_space,
> > +                             pci_hole_start, pci_hole_size);
> > +    memory_region_add_subregion(f->system_memory, pci_hole_start,
> &f->pci_hole);
> > +    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
> > +                             f->pci_address_space,
> > +                             pci_hole64_start, pci_hole64_size);
> > +    if (pci_hole64_size) {
> > +        memory_region_add_subregion(f->system_memory,
> pci_hole64_start,
> > +                                    &f->pci_hole_64bit);
> > +    }
> > +}
> > +
> > +
> 
> If you do things like this, won't ACPI tables become incorrect?
> 
ACPI is built in guest firmware, and these things is reported from firmware by this faking register.
Hao, Xudong - March 18, 2013, 3:25 p.m.
> -----Original Message-----

> From: qemu-devel-bounces+xudong.hao=intel.com@nongnu.org

> [mailto:qemu-devel-bounces+xudong.hao=intel.com@nongnu.org] On Behalf

> Of Ian Campbell

> Sent: Wednesday, February 27, 2013 7:07 PM

> To: Zhang, Xiantao

> Cc: aliguori@us.ibm.com; mst@redhat.com; Stefano Stabellini; Hao, Xudong;

> qemu-devel@nongnu.org; xen-devel@lists.xen.org; JBeulich@suse.com;

> afaerber@suse.de

> Subject: Re: [Qemu-devel] [PATCH v2] piix: define a TOM register to report the

> base of PCI

> 

> I'm not sure about qemu-devel but on xen-devel the policy is not to top

> post so please could you avoid doping so.

> 

> On Wed, 2013-02-27 at 09:49 +0000, Zhang, Xiantao wrote:

> > > Given that Xen has at least two other mechanisms (xenstore and

> > > hvmparams) for passing this sort of information around I'm not sure why

> > > hacking the emulated i440fx device should be the preferred option.

> >

> > Actually, even in hardware,  I believe there are many registers which

> > are implemented with write-once attributes, and they are only used by

> > firmware and reserved for OS.

> 

> The i440fx does not have this register (be it write once or otherwise),

> which is my actual point -- you are adding a magic property to the

> emulation of this device which the real hardware doesn't have. 


Sorry to response later.
But why faking a register that the real hardware doesn't have is not acceptant? I440fx device don't need this register for native environment, but for virtualization, adding such a register can simplify things.

> It isn't

> really relevant that other hardware could implement write once

> registers, that's obviously the case.

> 

> > In addition,  with this change,  it can benefit all VMMs (not just

> > Xen) which use Qemu as device model.  If adopt xenstore way, perhaps

> > other VMMs also have to write similar and duplicate logic for the same

> > purpose.

> 

> Which other VMM? AIUI qemu/kvm doesn't have a requirement to

> communicate

> this information from the VMM to qemu because qemu is the VMM and

> controls all of the hardware resources.

> 

I think our changes have better compatibility, we can't predict qemu will not be used for other VMMs, although changes only benefit xen till now.
Andreas Färber - March 18, 2013, 4:55 p.m.
Am 18.03.2013 16:21, schrieb Hao, Xudong:
>> -----Original Message-----
>> From: Michael S. Tsirkin [mailto:mst@redhat.com]
>> Sent: Wednesday, February 27, 2013 6:50 PM
>> To: Hao, Xudong
>> Cc: aliguori@us.ibm.com; qemu-devel@nongnu.org;
>> stefano.stabellini@eu.citrix.com; xen-devel@lists.xen.org; afaerber@suse.de;
>> JBeulich@suse.com; Zhang, Xiantao
>> Subject: Re: [PATCH v2] piix: define a TOM register to report the base of PCI
>>
>> On Mon, Feb 25, 2013 at 02:53:37PM +0800, Xudong Hao wrote:
>>> v2:
>>> * Use "piix: " in the subject rather than "qemu: "
>>> * Define TOM register as one byte
>>> * Define default TOM value instead of hardcode 0xe0000000 in more that one
>> place
>>> * Use API pci_set_byte for pci config access
>>> * Use dev->config instead of the indirect d->dev.config
>>>
>>> Define a TOM(top of memory) register to report the base of PCI memory,
>> update
>>> memory region dynamically. TOM register are defined to one byte in PCI
>> configure
>>> space, because that only upper 4 bit of PCI memory takes effect for Xen, so
>>> it requires bios set TOM with 16M-aligned.
>>>
>>> Signed-off-by: Xudong Hao <xudong.hao@intel.com>
>>> Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
>>
>> Could you supply some motivation for this patch?
>>
> 
> It's a fix for Xen. Qemu want more information from Xen, copy Stefano's comments:
> 
> QEMU needs to know where the end of the guest's RAM is (because there is
> where it allocates the videoram and other stuff), so at least the size
> of the MMIO hole is important.

Could you please reply to Anthony's comment that this information is
already available via fw_cfg interface? hw/fw_cfg.h is designed so that
it can be embedded elsewhere (e.g., in SeaBIOS and OpenBIOS). Reusing
any information available through that interface would seem much easier
than fiddling with reserved registers on emulated hardware.

Regards,
Andreas

Patch

diff --git a/hw/pc.h b/hw/pc.h
index fbcf43d..62adcc5 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -129,15 +129,14 @@  extern int no_hpet;
 struct PCII440FXState;
 typedef struct PCII440FXState PCII440FXState;
 
+#define I440FX_TOM     0xe0000000
+#define I440FX_XEN_TOM 0xf0000000
+
 PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix_devfn,
                     ISABus **isa_bus, qemu_irq *pic,
                     MemoryRegion *address_space_mem,
                     MemoryRegion *address_space_io,
                     ram_addr_t ram_size,
-                    hwaddr pci_hole_start,
-                    hwaddr pci_hole_size,
-                    hwaddr pci_hole64_start,
-                    hwaddr pci_hole64_size,
                     MemoryRegion *pci_memory,
                     MemoryRegion *ram_memory);
 
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 0a6923d..2eef510 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -93,9 +93,9 @@  static void pc_init1(MemoryRegion *system_memory,
         kvmclock_create();
     }
 
-    if (ram_size >= 0xe0000000 ) {
-        above_4g_mem_size = ram_size - 0xe0000000;
-        below_4g_mem_size = 0xe0000000;
+    if (ram_size >= I440FX_TOM) {
+        above_4g_mem_size = ram_size - I440FX_TOM;
+        below_4g_mem_size = I440FX_TOM;
     } else {
         above_4g_mem_size = 0;
         below_4g_mem_size = ram_size;
@@ -130,12 +130,6 @@  static void pc_init1(MemoryRegion *system_memory,
     if (pci_enabled) {
         pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi,
                               system_memory, system_io, ram_size,
-                              below_4g_mem_size,
-                              0x100000000ULL - below_4g_mem_size,
-                              0x100000000ULL + above_4g_mem_size,
-                              (sizeof(hwaddr) == 4
-                               ? 0
-                               : ((uint64_t)1 << 62)),
                               pci_memory, ram_memory);
     } else {
         pci_bus = NULL;
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index 3d79c73..3e5a25c 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -86,6 +86,14 @@  struct PCII440FXState {
 #define I440FX_PAM_SIZE 7
 #define I440FX_SMRAM    0x72
 
+/* The maximum vaule of TOM(top of memory) register in I440FX
+ * is 1G, so it doesn't meet any popular virutal machines, so
+ * define another register to report the base of PCI memory.
+ * Use one byte 0xb0 for the upper 8 bit, they are originally
+ * resevered for host bridge.
+ * */
+#define I440FX_PCI_HOLE_BASE 0xb0
+
 static void piix3_set_irq(void *opaque, int pirq, int level);
 static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pci_intx);
 static void piix3_write_config_xen(PCIDevice *dev,
@@ -101,6 +109,43 @@  static int pci_slot_get_pirq(PCIDevice *pci_dev, int pci_intx)
     return (pci_intx + slot_addend) & 3;
 }
 
+
+static void i440fx_update_pci_mem_hole(PCII440FXState *f, bool del)
+{
+    ram_addr_t above_4g_mem_size;
+    hwaddr pci_hole_start, pci_hole_size, pci_hole64_start, pci_hole64_size;
+
+    pci_hole_start = pci_default_read_config(&f->dev, I440FX_PCI_HOLE_BASE, 1) << 24;
+    pci_hole_size = 0x100000000ULL - pci_hole_start;
+
+    if (ram_size >= pci_hole_start) {
+        above_4g_mem_size = ram_size - pci_hole_start;
+    } else {
+        above_4g_mem_size = 0;
+    }
+    pci_hole64_start = 0x100000000ULL + above_4g_mem_size;
+    pci_hole64_size = sizeof(hwaddr) == 4 ? 0 : ((uint64_t)1 << 62);
+
+    if (del) {
+        memory_region_del_subregion(f->system_memory, &f->pci_hole);
+        if (pci_hole64_size) {
+            memory_region_del_subregion(f->system_memory, &f->pci_hole_64bit);
+        }
+    }
+
+    memory_region_init_alias(&f->pci_hole, "pci-hole", f->pci_address_space,
+                             pci_hole_start, pci_hole_size);
+    memory_region_add_subregion(f->system_memory, pci_hole_start, &f->pci_hole);
+    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
+                             f->pci_address_space,
+                             pci_hole64_start, pci_hole64_size);
+    if (pci_hole64_size) {
+        memory_region_add_subregion(f->system_memory, pci_hole64_start,
+                                    &f->pci_hole_64bit);
+    }
+}
+
+
 static void i440fx_update_memory_mappings(PCII440FXState *d)
 {
     int i;
@@ -136,6 +181,9 @@  static void i440fx_write_config(PCIDevice *dev,
         range_covers_byte(address, len, I440FX_SMRAM)) {
         i440fx_update_memory_mappings(d);
     }
+    if (range_covers_byte(address, len, I440FX_PCI_HOLE_BASE)) {
+        i440fx_update_pci_mem_hole(d, true);
+    }
 }
 
 static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
@@ -203,6 +251,10 @@  static int i440fx_initfn(PCIDevice *dev)
 
     d->dev.config[I440FX_SMRAM] = 0x02;
 
+    /* Emulate top of memory, here use 0xe0000000 as default val*/
+    uint32_t addr = xen_enabled() ? I440FX_XEN_TOM : I440FX_TOM;
+    pci_set_byte(dev->config + I440FX_PCI_HOLE_BASE, (uint8_t)(addr >> 24));
+
     cpu_smm_register(&i440fx_set_smm, d);
     return 0;
 }
@@ -214,10 +266,6 @@  static PCIBus *i440fx_common_init(const char *device_name,
                                   MemoryRegion *address_space_mem,
                                   MemoryRegion *address_space_io,
                                   ram_addr_t ram_size,
-                                  hwaddr pci_hole_start,
-                                  hwaddr pci_hole_size,
-                                  hwaddr pci_hole64_start,
-                                  hwaddr pci_hole64_size,
                                   MemoryRegion *pci_address_space,
                                   MemoryRegion *ram_memory)
 {
@@ -244,16 +292,6 @@  static PCIBus *i440fx_common_init(const char *device_name,
     f->system_memory = address_space_mem;
     f->pci_address_space = pci_address_space;
     f->ram_memory = ram_memory;
-    memory_region_init_alias(&f->pci_hole, "pci-hole", f->pci_address_space,
-                             pci_hole_start, pci_hole_size);
-    memory_region_add_subregion(f->system_memory, pci_hole_start, &f->pci_hole);
-    memory_region_init_alias(&f->pci_hole_64bit, "pci-hole64",
-                             f->pci_address_space,
-                             pci_hole64_start, pci_hole64_size);
-    if (pci_hole64_size) {
-        memory_region_add_subregion(f->system_memory, pci_hole64_start,
-                                    &f->pci_hole_64bit);
-    }
     memory_region_init_alias(&f->smram_region, "smram-region",
                              f->pci_address_space, 0xa0000, 0x20000);
     memory_region_add_subregion_overlap(f->system_memory, 0xa0000,
@@ -295,6 +333,7 @@  static PCIBus *i440fx_common_init(const char *device_name,
     (*pi440fx_state)->dev.config[0x57]=ram_size;
 
     i440fx_update_memory_mappings(f);
+    i440fx_update_pci_mem_hole(f, false);
 
     return b;
 }
@@ -304,10 +343,6 @@  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
                     MemoryRegion *address_space_mem,
                     MemoryRegion *address_space_io,
                     ram_addr_t ram_size,
-                    hwaddr pci_hole_start,
-                    hwaddr pci_hole_size,
-                    hwaddr pci_hole64_start,
-                    hwaddr pci_hole64_size,
                     MemoryRegion *pci_memory, MemoryRegion *ram_memory)
 
 {
@@ -315,8 +350,6 @@  PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn,
 
     b = i440fx_common_init("i440FX", pi440fx_state, piix3_devfn, isa_bus, pic,
                            address_space_mem, address_space_io, ram_size,
-                           pci_hole_start, pci_hole_size,
-                           pci_hole64_start, pci_hole64_size,
                            pci_memory, ram_memory);
     return b;
 }