diff mbox

[2/2] pc: add 'etc/reserved-memory-end' fw_cfg interface for SeaBIOS

Message ID 1384264707-9947-3-git-send-email-imammedo@redhat.com
State New
Headers show

Commit Message

Igor Mammedov Nov. 12, 2013, 1:58 p.m. UTC
'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
BARs mapping could safely start in high memory.

Allowing BIOS to start mapping 64-bit PCI BARs at address where it
wouldn't conflict with other mappings QEMU might place before it.

That permits QEMU to reserve extra address space before
64-bit PCI hole for memory hotplug.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
 hw/i386/pc.c         | 14 +++++++++++++-
 hw/pci-host/piix.c   |  3 ++-
 hw/pci-host/q35.c    |  3 ++-
 include/hw/i386/pc.h |  3 ++-
 4 files changed, 19 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Nov. 12, 2013, 6:26 p.m. UTC | #1
Il 12/11/2013 14:58, Igor Mammedov ha scritto:
> 'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
> BARs mapping could safely start in high memory.
> 
> Allowing BIOS to start mapping 64-bit PCI BARs at address where it
> wouldn't conflict with other mappings QEMU might place before it.
> 
> That permits QEMU to reserve extra address space before
> 64-bit PCI hole for memory hotplug.

I may be royally wrong, but I think the new file should only be added to
new machine types.  Otherwise, after migrating old machine types from
new QEMU to old QEMU, you may end up with PCI BARs mapped outside the
"PCI windows" that exist until before patch 1/2 of this series.

Does this make sense?

Also, would it make sense to use the e820 interface that Gerd has added,
instead of a new fw_cfg file?

Thanks,

Paolo


> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
>  hw/i386/pc.c         | 14 +++++++++++++-
>  hw/pci-host/piix.c   |  3 ++-
>  hw/pci-host/q35.c    |  3 ++-
>  include/hw/i386/pc.h |  3 ++-
>  4 files changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 6c82ada..b504047 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1095,11 +1095,23 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
>  
>  /* setup pci memory address space mapping into system address space */
>  void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
> -                            MemoryRegion *pci_address_space)
> +                            MemoryRegion *pci_address_space,
> +                            uint64_t reserved_memory_end)
>  {
> +    uint64_t *val;
> +    FWCfgState *fw_cfg = fw_cfg_find();
> +
>      /* Set to lower priority than RAM */
>      memory_region_add_subregion_overlap(system_memory, 0x0,
>                                          pci_address_space, -1);
> +    g_assert(fw_cfg);
> +    /*
> +     *  Align address at 1G, this makes sure it can be exactly covered
> +     *  with a PAT entry even when using huge pages.
> +     */
> +    val = g_malloc(sizeof(*val));
> +    *val = cpu_to_le64(ROUND_UP(reserved_memory_end, 0x1ULL << 30));
> +    fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
>  }
>  
>  void pc_acpi_init(const char *default_dsdt)
> diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
> index 5d4e290..16205e7 100644
> --- a/hw/pci-host/piix.c
> +++ b/hw/pci-host/piix.c
> @@ -351,7 +351,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
>  
>      /* setup pci memory mapping */
>      pc_pci_as_mapping_init(OBJECT(f), f->system_memory,
> -                           f->pci_address_space);
> +                           f->pci_address_space,
> +                           0x100000000ULL + above_4g_mem_size);
>  
>      memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region",
>                               f->pci_address_space, 0xa0000, 0x20000);
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index d1792de..1293353 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -353,7 +353,8 @@ static int mch_init(PCIDevice *d)
>  
>      /* setup pci memory mapping */
>      pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
> -                           mch->pci_address_space);
> +                           mch->pci_address_space,
> +                           0x100000000ULL + mch->above_4g_mem_size);
>  
>      /* smram */
>      cpu_smm_register(&mch_set_smm, mch);
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 8b3be3c..2663046 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -130,7 +130,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
>  
>  
>  void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
> -                            MemoryRegion *pci_address_space);
> +                            MemoryRegion *pci_address_space,
> +                            uint64_t reserved_memory_end);
>  
>  FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>                             const char *kernel_filename,
>
Igor Mammedov Nov. 12, 2013, 8:17 p.m. UTC | #2
On Tue, 12 Nov 2013 19:26:02 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 12/11/2013 14:58, Igor Mammedov ha scritto:
> > 'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
> > BARs mapping could safely start in high memory.
> > 
> > Allowing BIOS to start mapping 64-bit PCI BARs at address where it
> > wouldn't conflict with other mappings QEMU might place before it.
> > 
> > That permits QEMU to reserve extra address space before
> > 64-bit PCI hole for memory hotplug.
> 
> I may be royally wrong, but I think the new file should only be added to
> new machine types.  Otherwise, after migrating old machine types from
> new QEMU to old QEMU, you may end up with PCI BARs mapped outside the
> "PCI windows" that exist until before patch 1/2 of this series.
> 
> Does this make sense?
it will shift BARs only when 'etc/reserved-memory-end' != above4gram_end,
presently 'etc/reserved-memory-end' == above4gram_end, so it doesn't affect
new->old or old->new migration.
Just to be sure I've done new->old and old->new migration testing, it looks
like nothing is broken by patch.

When  'etc/reserved-memory-end' > above4gram_end, new->old migration is
unlikely (unrealistically) to be broken due to default 64bit PCI hole size in
old QEMU is 1ULL << 62, so to become broken guest should have a BAR that will
go beyond 1ULL << 62 border.

But your question have reminded me to make sure that memory hotplug should be
disabled for old machine type to avoid clash of memory hotplug region and old
64-bit PCI window.

> 
> Also, would it make sense to use the e820 interface that Gerd has added,
> instead of a new fw_cfg file?
Gerd suggested it before, but we've come to agreement that dedicated file
is cleaner approach than extending standard e820 table with QEMU specific
extensions. http://www.mail-archive.com/qemu-devel@nongnu.org/msg200359.html

> 
> Thanks,
> 
> Paolo
> 
> 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> >  hw/i386/pc.c         | 14 +++++++++++++-
> >  hw/pci-host/piix.c   |  3 ++-
> >  hw/pci-host/q35.c    |  3 ++-
> >  include/hw/i386/pc.h |  3 ++-
> >  4 files changed, 19 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 6c82ada..b504047 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -1095,11 +1095,23 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> >  
> >  /* setup pci memory address space mapping into system address space */
> >  void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
> > -                            MemoryRegion *pci_address_space)
> > +                            MemoryRegion *pci_address_space,
> > +                            uint64_t reserved_memory_end)
> >  {
> > +    uint64_t *val;
> > +    FWCfgState *fw_cfg = fw_cfg_find();
> > +
> >      /* Set to lower priority than RAM */
> >      memory_region_add_subregion_overlap(system_memory, 0x0,
> >                                          pci_address_space, -1);
> > +    g_assert(fw_cfg);
> > +    /*
> > +     *  Align address at 1G, this makes sure it can be exactly covered
> > +     *  with a PAT entry even when using huge pages.
> > +     */
> > +    val = g_malloc(sizeof(*val));
> > +    *val = cpu_to_le64(ROUND_UP(reserved_memory_end, 0x1ULL << 30));
> > +    fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
> >  }
> >  
> >  void pc_acpi_init(const char *default_dsdt)
> > diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
> > index 5d4e290..16205e7 100644
> > --- a/hw/pci-host/piix.c
> > +++ b/hw/pci-host/piix.c
> > @@ -351,7 +351,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> >  
> >      /* setup pci memory mapping */
> >      pc_pci_as_mapping_init(OBJECT(f), f->system_memory,
> > -                           f->pci_address_space);
> > +                           f->pci_address_space,
> > +                           0x100000000ULL + above_4g_mem_size);
> >  
> >      memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region",
> >                               f->pci_address_space, 0xa0000, 0x20000);
> > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> > index d1792de..1293353 100644
> > --- a/hw/pci-host/q35.c
> > +++ b/hw/pci-host/q35.c
> > @@ -353,7 +353,8 @@ static int mch_init(PCIDevice *d)
> >  
> >      /* setup pci memory mapping */
> >      pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
> > -                           mch->pci_address_space);
> > +                           mch->pci_address_space,
> > +                           0x100000000ULL + mch->above_4g_mem_size);
> >  
> >      /* smram */
> >      cpu_smm_register(&mch_set_smm, mch);
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index 8b3be3c..2663046 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -130,7 +130,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> >  
> >  
> >  void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
> > -                            MemoryRegion *pci_address_space);
> > +                            MemoryRegion *pci_address_space,
> > +                            uint64_t reserved_memory_end);
> >  
> >  FWCfgState *pc_memory_init(MemoryRegion *system_memory,
> >                             const char *kernel_filename,
> > 
>
Michael S. Tsirkin Nov. 12, 2013, 10:10 p.m. UTC | #3
On Tue, Nov 12, 2013 at 07:26:02PM +0100, Paolo Bonzini wrote:
> Il 12/11/2013 14:58, Igor Mammedov ha scritto:
> > 'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
> > BARs mapping could safely start in high memory.
> > 
> > Allowing BIOS to start mapping 64-bit PCI BARs at address where it
> > wouldn't conflict with other mappings QEMU might place before it.
> > 
> > That permits QEMU to reserve extra address space before
> > 64-bit PCI hole for memory hotplug.
> 
> I may be royally wrong, but I think the new file should only be added to
> new machine types.  Otherwise, after migrating old machine types from
> new QEMU to old QEMU, you may end up with PCI BARs mapped outside the
> "PCI windows" that exist until before patch 1/2 of this series.
> 
> Does this make sense?

Yes.
Generally FW CFG must not be added/removed for a given machine types,
otherwise guest that is migrated while reading it will
get a corrupted result: half old and half new.

> Also, would it make sense to use the e820 interface that Gerd has added,
> instead of a new fw_cfg file?
> 
> Thanks,
> 
> Paolo


This was already discussed on previous revisions of this patch.
Have you seen that discussion?

> 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> >  hw/i386/pc.c         | 14 +++++++++++++-
> >  hw/pci-host/piix.c   |  3 ++-
> >  hw/pci-host/q35.c    |  3 ++-
> >  include/hw/i386/pc.h |  3 ++-
> >  4 files changed, 19 insertions(+), 4 deletions(-)
> > 
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 6c82ada..b504047 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -1095,11 +1095,23 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> >  
> >  /* setup pci memory address space mapping into system address space */
> >  void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
> > -                            MemoryRegion *pci_address_space)
> > +                            MemoryRegion *pci_address_space,
> > +                            uint64_t reserved_memory_end)
> >  {
> > +    uint64_t *val;
> > +    FWCfgState *fw_cfg = fw_cfg_find();
> > +
> >      /* Set to lower priority than RAM */
> >      memory_region_add_subregion_overlap(system_memory, 0x0,
> >                                          pci_address_space, -1);
> > +    g_assert(fw_cfg);
> > +    /*
> > +     *  Align address at 1G, this makes sure it can be exactly covered
> > +     *  with a PAT entry even when using huge pages.
> > +     */
> > +    val = g_malloc(sizeof(*val));
> > +    *val = cpu_to_le64(ROUND_UP(reserved_memory_end, 0x1ULL << 30));
> > +    fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
> >  }
> >  
> >  void pc_acpi_init(const char *default_dsdt)
> > diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
> > index 5d4e290..16205e7 100644
> > --- a/hw/pci-host/piix.c
> > +++ b/hw/pci-host/piix.c
> > @@ -351,7 +351,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
> >  
> >      /* setup pci memory mapping */
> >      pc_pci_as_mapping_init(OBJECT(f), f->system_memory,
> > -                           f->pci_address_space);
> > +                           f->pci_address_space,
> > +                           0x100000000ULL + above_4g_mem_size);
> >  
> >      memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region",
> >                               f->pci_address_space, 0xa0000, 0x20000);
> > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> > index d1792de..1293353 100644
> > --- a/hw/pci-host/q35.c
> > +++ b/hw/pci-host/q35.c
> > @@ -353,7 +353,8 @@ static int mch_init(PCIDevice *d)
> >  
> >      /* setup pci memory mapping */
> >      pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
> > -                           mch->pci_address_space);
> > +                           mch->pci_address_space,
> > +                           0x100000000ULL + mch->above_4g_mem_size);
> >  
> >      /* smram */
> >      cpu_smm_register(&mch_set_smm, mch);
> > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > index 8b3be3c..2663046 100644
> > --- a/include/hw/i386/pc.h
> > +++ b/include/hw/i386/pc.h
> > @@ -130,7 +130,8 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
> >  
> >  
> >  void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
> > -                            MemoryRegion *pci_address_space);
> > +                            MemoryRegion *pci_address_space,
> > +                            uint64_t reserved_memory_end);
> >  
> >  FWCfgState *pc_memory_init(MemoryRegion *system_memory,
> >                             const char *kernel_filename,
> >
Paolo Bonzini Nov. 12, 2013, 11:03 p.m. UTC | #4
Il 12/11/2013 23:10, Michael S. Tsirkin ha scritto:
> This was already discussed on previous revisions of this patch.
> Have you seen that discussion?

Obviously not. :)

Paolo
Igor Mammedov Nov. 13, 2013, 12:04 p.m. UTC | #5
On Wed, 13 Nov 2013 00:10:39 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Tue, Nov 12, 2013 at 07:26:02PM +0100, Paolo Bonzini wrote:
> > Il 12/11/2013 14:58, Igor Mammedov ha scritto:
> > > 'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
> > > BARs mapping could safely start in high memory.
> > > 
> > > Allowing BIOS to start mapping 64-bit PCI BARs at address where it
> > > wouldn't conflict with other mappings QEMU might place before it.
> > > 
> > > That permits QEMU to reserve extra address space before
> > > 64-bit PCI hole for memory hotplug.
> > 
> > I may be royally wrong, but I think the new file should only be added to
> > new machine types.  Otherwise, after migrating old machine types from
> > new QEMU to old QEMU, you may end up with PCI BARs mapped outside the
> > "PCI windows" that exist until before patch 1/2 of this series.
> > 
> > Does this make sense?
> 
> Yes.
> Generally FW CFG must not be added/removed for a given machine types,
> otherwise guest that is migrated while reading it will
> get a corrupted result: half old and half new.

Is it true for a file 'etc/reserved-memory-end' though?

I've debugged SeaBIOS to learn more about it, and new->old migration with
following reboot, showed that file is not found by SeaBIOS (well since old
QEMU doesn't have it), as result SeaBIOS fallbacks to the old behavior 
placing 64-PCI bars right above ram_over_4G as it was intended.

And with 'etc/reserved-memory-end' == ram_over_4G_end as it is in this
patch, there isn't issue whatsoever.

Looks like there is no migrations issues with files, provided SeaBIOS knows
how to deal with a missing file.
Michael S. Tsirkin Nov. 14, 2013, 7:40 a.m. UTC | #6
On Wed, Nov 13, 2013 at 01:04:10PM +0100, Igor Mammedov wrote:
> On Wed, 13 Nov 2013 00:10:39 +0200
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Tue, Nov 12, 2013 at 07:26:02PM +0100, Paolo Bonzini wrote:
> > > Il 12/11/2013 14:58, Igor Mammedov ha scritto:
> > > > 'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
> > > > BARs mapping could safely start in high memory.
> > > > 
> > > > Allowing BIOS to start mapping 64-bit PCI BARs at address where it
> > > > wouldn't conflict with other mappings QEMU might place before it.
> > > > 
> > > > That permits QEMU to reserve extra address space before
> > > > 64-bit PCI hole for memory hotplug.
> > > 
> > > I may be royally wrong, but I think the new file should only be added to
> > > new machine types.  Otherwise, after migrating old machine types from
> > > new QEMU to old QEMU, you may end up with PCI BARs mapped outside the
> > > "PCI windows" that exist until before patch 1/2 of this series.
> > > 
> > > Does this make sense?
> > 
> > Yes.
> > Generally FW CFG must not be added/removed for a given machine types,
> > otherwise guest that is migrated while reading it will
> > get a corrupted result: half old and half new.
> 
> Is it true for a file 'etc/reserved-memory-end' though?

It's true for any FW CFG entry.

> I've debugged SeaBIOS to learn more about it, and new->old migration with
> following reboot, showed that file is not found by SeaBIOS (well since old
> QEMU doesn't have it), as result SeaBIOS fallbacks to the old behavior 
> placing 64-PCI bars right above ram_over_4G as it was intended.
> 
> And with 'etc/reserved-memory-end' == ram_over_4G_end as it is in this
> patch, there isn't issue whatsoever.
> 
> Looks like there is no migrations issues with files, provided SeaBIOS knows
> how to deal with a missing file.

Here's an example of the issue:

    qemu_cfg_read_entry(&count, QEMU_CFG_FILE_DIR, sizeof(count));

1. <<<<<<

    count = be32_to_cpu(count);

    u32 e;
    for (e = 0; e < count; e++) {
        struct QemuCfgFile qfile;
        qemu_cfg_read(&qfile, sizeof(qfile));
        qemu_romfile_add(qfile.name, be16_to_cpu(qfile.select)
                         , 0, be32_to_cpu(qfile.size));
    }

2. <<<<<<<


If migration happens during qemu_cfg_read_entry before point 1, you can
get a byte from old count and a byte from new, resulting in a corrupt
count.

If migration happens at point 2, you will get incorrect
mapping between file selector and name. System will likely
fail to boot.

There are probably other issues.

Bottom line: FW CFG is guest visible state. Migration must not change
it.

> -- 
> Regards,
>   Igor
Igor Mammedov Nov. 14, 2013, 1:37 p.m. UTC | #7
On Thu, 14 Nov 2013 09:40:12 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Wed, Nov 13, 2013 at 01:04:10PM +0100, Igor Mammedov wrote:
> > On Wed, 13 Nov 2013 00:10:39 +0200
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Tue, Nov 12, 2013 at 07:26:02PM +0100, Paolo Bonzini wrote:
> > > > Il 12/11/2013 14:58, Igor Mammedov ha scritto:
> > > > > 'etc/reserved-memory-end' will allow QEMU to tell BIOS where PCI
> > > > > BARs mapping could safely start in high memory.
> > > > > 
> > > > > Allowing BIOS to start mapping 64-bit PCI BARs at address where it
> > > > > wouldn't conflict with other mappings QEMU might place before it.
> > > > > 
> > > > > That permits QEMU to reserve extra address space before
> > > > > 64-bit PCI hole for memory hotplug.
> > > > 
> > > > I may be royally wrong, but I think the new file should only be added to
> > > > new machine types.  Otherwise, after migrating old machine types from
> > > > new QEMU to old QEMU, you may end up with PCI BARs mapped outside the
> > > > "PCI windows" that exist until before patch 1/2 of this series.
> > > > 
> > > > Does this make sense?
> > > 
> > > Yes.
> > > Generally FW CFG must not be added/removed for a given machine types,
> > > otherwise guest that is migrated while reading it will
> > > get a corrupted result: half old and half new.
> > 
> > Is it true for a file 'etc/reserved-memory-end' though?
> 
> It's true for any FW CFG entry.
> 
> > I've debugged SeaBIOS to learn more about it, and new->old migration with
> > following reboot, showed that file is not found by SeaBIOS (well since old
> > QEMU doesn't have it), as result SeaBIOS fallbacks to the old behavior 
> > placing 64-PCI bars right above ram_over_4G as it was intended.
> > 
> > And with 'etc/reserved-memory-end' == ram_over_4G_end as it is in this
> > patch, there isn't issue whatsoever.
> > 
> > Looks like there is no migrations issues with files, provided SeaBIOS knows
> > how to deal with a missing file.
> 
> Here's an example of the issue:
> 
>     qemu_cfg_read_entry(&count, QEMU_CFG_FILE_DIR, sizeof(count));
> 
> 1. <<<<<<
> 
>     count = be32_to_cpu(count);
> 
>     u32 e;
>     for (e = 0; e < count; e++) {
>         struct QemuCfgFile qfile;
>         qemu_cfg_read(&qfile, sizeof(qfile));
>         qemu_romfile_add(qfile.name, be16_to_cpu(qfile.select)
>                          , 0, be32_to_cpu(qfile.size));
>     }
> 
> 2. <<<<<<<
> 
> 
> If migration happens during qemu_cfg_read_entry before point 1, you can
> get a byte from old count and a byte from new, resulting in a corrupt
> count.
> 
> If migration happens at point 2, you will get incorrect
> mapping between file selector and name. System will likely
> fail to boot.
> 
> There are probably other issues.
> 
> Bottom line: FW CFG is guest visible state. Migration must not change
> it.
Thanks for explanation,
I'll resend this patch with machine compatibility changes.

> 
> > -- 
> > Regards,
> >   Igor
diff mbox

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 6c82ada..b504047 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1095,11 +1095,23 @@  PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
 
 /* setup pci memory address space mapping into system address space */
 void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
-                            MemoryRegion *pci_address_space)
+                            MemoryRegion *pci_address_space,
+                            uint64_t reserved_memory_end)
 {
+    uint64_t *val;
+    FWCfgState *fw_cfg = fw_cfg_find();
+
     /* Set to lower priority than RAM */
     memory_region_add_subregion_overlap(system_memory, 0x0,
                                         pci_address_space, -1);
+    g_assert(fw_cfg);
+    /*
+     *  Align address at 1G, this makes sure it can be exactly covered
+     *  with a PAT entry even when using huge pages.
+     */
+    val = g_malloc(sizeof(*val));
+    *val = cpu_to_le64(ROUND_UP(reserved_memory_end, 0x1ULL << 30));
+    fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
 }
 
 void pc_acpi_init(const char *default_dsdt)
diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c
index 5d4e290..16205e7 100644
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -351,7 +351,8 @@  PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
 
     /* setup pci memory mapping */
     pc_pci_as_mapping_init(OBJECT(f), f->system_memory,
-                           f->pci_address_space);
+                           f->pci_address_space,
+                           0x100000000ULL + above_4g_mem_size);
 
     memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region",
                              f->pci_address_space, 0xa0000, 0x20000);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index d1792de..1293353 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -353,7 +353,8 @@  static int mch_init(PCIDevice *d)
 
     /* setup pci memory mapping */
     pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
-                           mch->pci_address_space);
+                           mch->pci_address_space,
+                           0x100000000ULL + mch->above_4g_mem_size);
 
     /* smram */
     cpu_smm_register(&mch_set_smm, mch);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 8b3be3c..2663046 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -130,7 +130,8 @@  PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
 
 
 void pc_pci_as_mapping_init(Object *owner, MemoryRegion *system_memory,
-                            MemoryRegion *pci_address_space);
+                            MemoryRegion *pci_address_space,
+                            uint64_t reserved_memory_end);
 
 FWCfgState *pc_memory_init(MemoryRegion *system_memory,
                            const char *kernel_filename,