diff mbox

[v2] map 64-bit PCI BARs at location provided by emulator

Message ID 1381666424-25250-1-git-send-email-imammedo@redhat.com
State New
Headers show

Commit Message

Igor Mammedov Oct. 13, 2013, 12:13 p.m. UTC
Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
over 4G + RamSizeOver4G location, which doesn't allow to reserve
extra space before 64-bit PCI window. For memory hotplug an extra
RAM space might be reserved after present 64-bit RAM end and BIOS
should map 64-bit PCI BARs after it.

Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
where it should start mapping of 64-bit PCI BARs. If romfile is
missing, BIOS reverts to legacy behavior and starts mapping right
after high memory.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
v2:
  * place 64-bit window behind high RAM end if "etc/pcimem64-start"
    points below it.
---
 src/fw/pciinit.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin Oct. 13, 2013, 12:31 p.m. UTC | #1
On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> over 4G + RamSizeOver4G location, which doesn't allow to reserve
> extra space before 64-bit PCI window. For memory hotplug an extra
> RAM space might be reserved after present 64-bit RAM end and BIOS
> should map 64-bit PCI BARs after it.
> 
> Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> where it should start mapping of 64-bit PCI BARs. If romfile is
> missing, BIOS reverts to legacy behavior and starts mapping right
> after high memory.
> 
> Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> ---
> v2:
>   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
>     points below it.

Hmm I had an alternative suggestion of passing smbios
tables in from QEMU (seabios already has a mechanism for this).
We could then simply increase the existing RamSize variable.

What do you think about this idea?


> ---
>  src/fw/pciinit.c | 13 ++++++++++++-
>  1 file changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> index b29db99..798309b 100644
> --- a/src/fw/pciinit.c
> +++ b/src/fw/pciinit.c
> @@ -18,6 +18,8 @@
>  #include "paravirt.h" // RamSize
>  #include "string.h" // memset
>  #include "util.h" // pci_setup
> +#include "byteorder.h" // le64_to_cpu
> +#include "romfile.h" // romfile_loadint
>  
>  #define PCI_DEVICE_MEM_MIN     0x1000
>  #define PCI_BRIDGE_IO_MIN      0x1000
> @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
>  {
>      if (pci_bios_init_root_regions(busses)) {
>          struct pci_region r64_mem, r64_pref;
> +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> +                                 ram64_end));
> +        if (base64 < ram64_end) {
> +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> +                       "placing 64-bit PCI window behind RAM end: %llx",
> +                        base64, ram64_end);
> +            base64 = ram64_end;
> +        }
>          r64_mem.list.first = NULL;
>          r64_pref.list.first = NULL;
>          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
>          u64 align_mem = pci_region_align(&r64_mem);
>          u64 align_pref = pci_region_align(&r64_pref);
>  
> -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> +        r64_mem.base = ALIGN(base64, align_mem);
>          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
>          pcimem64_start = r64_mem.base;
>          pcimem64_end = r64_pref.base + sum_pref;
> -- 
> 1.8.3.1
Igor Mammedov Oct. 13, 2013, 3:11 p.m. UTC | #2
On Sun, 13 Oct 2013 15:31:23 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > extra space before 64-bit PCI window. For memory hotplug an extra
> > RAM space might be reserved after present 64-bit RAM end and BIOS
> > should map 64-bit PCI BARs after it.
> > 
> > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > where it should start mapping of 64-bit PCI BARs. If romfile is
> > missing, BIOS reverts to legacy behavior and starts mapping right
> > after high memory.
> > 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > v2:
> >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> >     points below it.
> 
> Hmm I had an alternative suggestion of passing smbios
> tables in from QEMU (seabios already has a mechanism for this).
> We could then simply increase the existing RamSize variable.
> 
> What do you think about this idea?
There is several issues with RamSizeOver4G I see:
1. As Gerd pointed out it is also used to setup a part of e820 tables.
   It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
   high memory too" but it will be more complicated to keep seabios
   backward/forward compatible if we use RamSizeOver4G for passing.

2. It doesn't scale if we are going to use more than 1Tb future.
   Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
   either, see
http://www.seabios.org/pipermail/seabios/2010-September/000911.html

It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
approach doesn't have backward/forward compatibility issues.

> 
> 
> > ---
> >  src/fw/pciinit.c | 13 ++++++++++++-
> >  1 file changed, 12 insertions(+), 1 deletion(-)
> > 
> > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > index b29db99..798309b 100644
> > --- a/src/fw/pciinit.c
> > +++ b/src/fw/pciinit.c
> > @@ -18,6 +18,8 @@
> >  #include "paravirt.h" // RamSize
> >  #include "string.h" // memset
> >  #include "util.h" // pci_setup
> > +#include "byteorder.h" // le64_to_cpu
> > +#include "romfile.h" // romfile_loadint
> >  
> >  #define PCI_DEVICE_MEM_MIN     0x1000
> >  #define PCI_BRIDGE_IO_MIN      0x1000
> > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> >  {
> >      if (pci_bios_init_root_regions(busses)) {
> >          struct pci_region r64_mem, r64_pref;
> > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > +                                 ram64_end));
> > +        if (base64 < ram64_end) {
> > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > +                        base64, ram64_end);
> > +            base64 = ram64_end;
> > +        }
> >          r64_mem.list.first = NULL;
> >          r64_pref.list.first = NULL;
> >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> >          u64 align_mem = pci_region_align(&r64_mem);
> >          u64 align_pref = pci_region_align(&r64_pref);
> >  
> > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > +        r64_mem.base = ALIGN(base64, align_mem);
> >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> >          pcimem64_start = r64_mem.base;
> >          pcimem64_end = r64_pref.base + sum_pref;
> > -- 
> > 1.8.3.1
>
Kevin O'Connor Oct. 13, 2013, 3:15 p.m. UTC | #3
On Sun, Oct 13, 2013 at 03:31:23PM +0300, Michael S. Tsirkin wrote:
> On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > extra space before 64-bit PCI window. For memory hotplug an extra
> > RAM space might be reserved after present 64-bit RAM end and BIOS
> > should map 64-bit PCI BARs after it.
> > 
> > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > where it should start mapping of 64-bit PCI BARs. If romfile is
> > missing, BIOS reverts to legacy behavior and starts mapping right
> > after high memory.
> > 
> > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > ---
> > v2:
> >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> >     points below it.
> 
> Hmm I had an alternative suggestion of passing smbios
> tables in from QEMU (seabios already has a mechanism for this).

Irrespective of this discussion, I do think the SMBIOS tables should
be passed in from QEMU.

-Kevin
Michael S. Tsirkin Oct. 13, 2013, 3:59 p.m. UTC | #4
On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> On Sun, 13 Oct 2013 15:31:23 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > should map 64-bit PCI BARs after it.
> > > 
> > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > after high memory.
> > > 
> > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > ---
> > > v2:
> > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > >     points below it.
> > 
> > Hmm I had an alternative suggestion of passing smbios
> > tables in from QEMU (seabios already has a mechanism for this).
> > We could then simply increase the existing RamSize variable.
> > 
> > What do you think about this idea?
> There is several issues with RamSizeOver4G I see:
> 1. As Gerd pointed out it is also used to setup a part of e820 tables.
>    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
>    high memory too" but it will be more complicated to keep seabios
>    backward/forward compatible if we use RamSizeOver4G for passing.

What are the compatibility worries, exactly?

The way I see it, if QEMU wants to be compatible it can't
use memory hotplug. In that case RamSizeOver4G will have
the old meaning which should work without issues.

> 2. It doesn't scale if we are going to use more than 1Tb future.
>    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
>    either, see
> http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> 
> It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> approach doesn't have backward/forward compatibility issues.

Hmm but you aren't passing in memory size - pcimem64-start
merely tells bios where to put PCI devices.
So how would it help?

> > 
> > 
> > > ---
> > >  src/fw/pciinit.c | 13 ++++++++++++-
> > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > index b29db99..798309b 100644
> > > --- a/src/fw/pciinit.c
> > > +++ b/src/fw/pciinit.c
> > > @@ -18,6 +18,8 @@
> > >  #include "paravirt.h" // RamSize
> > >  #include "string.h" // memset
> > >  #include "util.h" // pci_setup
> > > +#include "byteorder.h" // le64_to_cpu
> > > +#include "romfile.h" // romfile_loadint
> > >  
> > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > >  {
> > >      if (pci_bios_init_root_regions(busses)) {
> > >          struct pci_region r64_mem, r64_pref;
> > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > +                                 ram64_end));
> > > +        if (base64 < ram64_end) {
> > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > +                        base64, ram64_end);
> > > +            base64 = ram64_end;
> > > +        }
> > >          r64_mem.list.first = NULL;
> > >          r64_pref.list.first = NULL;
> > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > >          u64 align_mem = pci_region_align(&r64_mem);
> > >          u64 align_pref = pci_region_align(&r64_pref);
> > >  
> > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > +        r64_mem.base = ALIGN(base64, align_mem);
> > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > >          pcimem64_start = r64_mem.base;
> > >          pcimem64_end = r64_pref.base + sum_pref;
> > > -- 
> > > 1.8.3.1
> > 
> 
> 
> -- 
> Regards,
>   Igor
Igor Mammedov Oct. 13, 2013, 4:23 p.m. UTC | #5
On Sun, 13 Oct 2013 18:59:20 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > On Sun, 13 Oct 2013 15:31:23 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > should map 64-bit PCI BARs after it.
> > > > 
> > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > after high memory.
> > > > 
> > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > ---
> > > > v2:
> > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > >     points below it.
> > > 
> > > Hmm I had an alternative suggestion of passing smbios
> > > tables in from QEMU (seabios already has a mechanism for this).
> > > We could then simply increase the existing RamSize variable.
> > > 
> > > What do you think about this idea?
> > There is several issues with RamSizeOver4G I see:
> > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> >    high memory too" but it will be more complicated to keep seabios
> >    backward/forward compatible if we use RamSizeOver4G for passing.
> 
> What are the compatibility worries, exactly?
> 
> The way I see it, if QEMU wants to be compatible it can't
> use memory hotplug. In that case RamSizeOver4G will have
> the old meaning which should work without issues.
QEMU can't know what BIOS version is used and error out in wrong case, and that
could cause hard to guess what going wrong bugs in the guest.

> 
> > 2. It doesn't scale if we are going to use more than 1Tb future.
> >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> >    either, see
> > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > 
> > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > approach doesn't have backward/forward compatibility issues.
> 
> Hmm but you aren't passing in memory size - pcimem64-start
> merely tells bios where to put PCI devices.
> So how would it help?
If a lot of memory is reserved for memory hotplug (terabytes), that could
put window behind what RamSizeOver4G could address it.

> > > 
> > > 
> > > > ---
> > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > index b29db99..798309b 100644
> > > > --- a/src/fw/pciinit.c
> > > > +++ b/src/fw/pciinit.c
> > > > @@ -18,6 +18,8 @@
> > > >  #include "paravirt.h" // RamSize
> > > >  #include "string.h" // memset
> > > >  #include "util.h" // pci_setup
> > > > +#include "byteorder.h" // le64_to_cpu
> > > > +#include "romfile.h" // romfile_loadint
> > > >  
> > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > >  {
> > > >      if (pci_bios_init_root_regions(busses)) {
> > > >          struct pci_region r64_mem, r64_pref;
> > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > +                                 ram64_end));
> > > > +        if (base64 < ram64_end) {
> > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > +                        base64, ram64_end);
> > > > +            base64 = ram64_end;
> > > > +        }
> > > >          r64_mem.list.first = NULL;
> > > >          r64_pref.list.first = NULL;
> > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > >  
> > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > >          pcimem64_start = r64_mem.base;
> > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > -- 
> > > > 1.8.3.1
> > > 
> > 
> > 
> > -- 
> > Regards,
> >   Igor
Michael S. Tsirkin Oct. 13, 2013, 4:46 p.m. UTC | #6
On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> On Sun, 13 Oct 2013 18:59:20 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > > On Sun, 13 Oct 2013 15:31:23 +0300
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > 
> > > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > > should map 64-bit PCI BARs after it.
> > > > > 
> > > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > > after high memory.
> > > > > 
> > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > ---
> > > > > v2:
> > > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > > >     points below it.
> > > > 
> > > > Hmm I had an alternative suggestion of passing smbios
> > > > tables in from QEMU (seabios already has a mechanism for this).
> > > > We could then simply increase the existing RamSize variable.
> > > > 
> > > > What do you think about this idea?
> > > There is several issues with RamSizeOver4G I see:
> > > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> > >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> > >    high memory too" but it will be more complicated to keep seabios
> > >    backward/forward compatible if we use RamSizeOver4G for passing.
> > 
> > What are the compatibility worries, exactly?
> > 
> > The way I see it, if QEMU wants to be compatible it can't
> > use memory hotplug. In that case RamSizeOver4G will have
> > the old meaning which should work without issues.
> QEMU can't know what BIOS version is used and error out in wrong case, and that
> could cause hard to guess what going wrong bugs in the guest.
> 
> > 
> > > 2. It doesn't scale if we are going to use more than 1Tb future.
> > >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> > >    either, see
> > > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > > 
> > > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > > approach doesn't have backward/forward compatibility issues.
> > 
> > Hmm but you aren't passing in memory size - pcimem64-start
> > merely tells bios where to put PCI devices.
> > So how would it help?
> If a lot of memory is reserved for memory hotplug (terabytes), that could
> put window behind what RamSizeOver4G could address it.

All we care about for a compatible bios is that it
keep working in old hardware configurations.
IMO there's no real need for old BIOS to work
with e.g. memory hotplug - it's reasonable
to ask users to update fitmware if they want
to use this feature.

> > > > 
> > > > 
> > > > > ---
> > > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > > index b29db99..798309b 100644
> > > > > --- a/src/fw/pciinit.c
> > > > > +++ b/src/fw/pciinit.c
> > > > > @@ -18,6 +18,8 @@
> > > > >  #include "paravirt.h" // RamSize
> > > > >  #include "string.h" // memset
> > > > >  #include "util.h" // pci_setup
> > > > > +#include "byteorder.h" // le64_to_cpu
> > > > > +#include "romfile.h" // romfile_loadint
> > > > >  
> > > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > >  {
> > > > >      if (pci_bios_init_root_regions(busses)) {
> > > > >          struct pci_region r64_mem, r64_pref;
> > > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > > +                                 ram64_end));
> > > > > +        if (base64 < ram64_end) {
> > > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > > +                        base64, ram64_end);
> > > > > +            base64 = ram64_end;
> > > > > +        }
> > > > >          r64_mem.list.first = NULL;
> > > > >          r64_pref.list.first = NULL;
> > > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > > >  
> > > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > > >          pcimem64_start = r64_mem.base;
> > > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > > -- 
> > > > > 1.8.3.1
> > > > 
> > > 
> > > 
> > > -- 
> > > Regards,
> > >   Igor
> 
> 
> -- 
> Regards,
>   Igor
Igor Mammedov Oct. 13, 2013, 5:33 p.m. UTC | #7
On Sun, 13 Oct 2013 19:46:09 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> > On Sun, 13 Oct 2013 18:59:20 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > > > On Sun, 13 Oct 2013 15:31:23 +0300
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > 
> > > > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > > > should map 64-bit PCI BARs after it.
> > > > > > 
> > > > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > > > after high memory.
> > > > > > 
> > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > ---
> > > > > > v2:
> > > > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > > > >     points below it.
> > > > > 
> > > > > Hmm I had an alternative suggestion of passing smbios
> > > > > tables in from QEMU (seabios already has a mechanism for this).
> > > > > We could then simply increase the existing RamSize variable.
> > > > > 
> > > > > What do you think about this idea?
> > > > There is several issues with RamSizeOver4G I see:
> > > > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> > > >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> > > >    high memory too" but it will be more complicated to keep seabios
> > > >    backward/forward compatible if we use RamSizeOver4G for passing.
> > > 
> > > What are the compatibility worries, exactly?
> > > 
> > > The way I see it, if QEMU wants to be compatible it can't
> > > use memory hotplug. In that case RamSizeOver4G will have
> > > the old meaning which should work without issues.
> > QEMU can't know what BIOS version is used and error out in wrong case, and that
> > could cause hard to guess what going wrong bugs in the guest.
> > 
> > > 
> > > > 2. It doesn't scale if we are going to use more than 1Tb future.
> > > >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> > > >    either, see
> > > > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > > > 
> > > > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > > > approach doesn't have backward/forward compatibility issues.
> > > 
> > > Hmm but you aren't passing in memory size - pcimem64-start
> > > merely tells bios where to put PCI devices.
> > > So how would it help?
> > If a lot of memory is reserved for memory hotplug (terabytes), that could
> > put window behind what RamSizeOver4G could address it.
> 
> All we care about for a compatible bios is that it
> keep working in old hardware configurations.
> IMO there's no real need for old BIOS to work
> with e.g. memory hotplug - it's reasonable
> to ask users to update fitmware if they want
> to use this feature.
Fine with me if we don't care about strange hangs if something misused.

But RamSizeOver4G still doesn't allow correctly place PCI window if large
amount of memory present/reserved.

> 
> > > > > 
> > > > > 
> > > > > > ---
> > > > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > > > index b29db99..798309b 100644
> > > > > > --- a/src/fw/pciinit.c
> > > > > > +++ b/src/fw/pciinit.c
> > > > > > @@ -18,6 +18,8 @@
> > > > > >  #include "paravirt.h" // RamSize
> > > > > >  #include "string.h" // memset
> > > > > >  #include "util.h" // pci_setup
> > > > > > +#include "byteorder.h" // le64_to_cpu
> > > > > > +#include "romfile.h" // romfile_loadint
> > > > > >  
> > > > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > >  {
> > > > > >      if (pci_bios_init_root_regions(busses)) {
> > > > > >          struct pci_region r64_mem, r64_pref;
> > > > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > > > +                                 ram64_end));
> > > > > > +        if (base64 < ram64_end) {
> > > > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > > > +                        base64, ram64_end);
> > > > > > +            base64 = ram64_end;
> > > > > > +        }
> > > > > >          r64_mem.list.first = NULL;
> > > > > >          r64_pref.list.first = NULL;
> > > > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > > > >  
> > > > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > > > >          pcimem64_start = r64_mem.base;
> > > > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > > > -- 
> > > > > > 1.8.3.1
> > > > > 
> > > > 
> > > > 
> > > > -- 
> > > > Regards,
> > > >   Igor
> > 
> > 
> > -- 
> > Regards,
> >   Igor
Igor Mammedov Oct. 13, 2013, 6:19 p.m. UTC | #8
On Sun, 13 Oct 2013 19:33:19 +0200
Igor Mammedov <imammedo@redhat.com> wrote:

> On Sun, 13 Oct 2013 19:46:09 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> > > On Sun, 13 Oct 2013 18:59:20 +0300
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > 
> > > > On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > > > > On Sun, 13 Oct 2013 15:31:23 +0300
> > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > 
> > > > > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > > > > should map 64-bit PCI BARs after it.
> > > > > > > 
> > > > > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > > > > after high memory.
> > > > > > > 
> > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > > ---
> > > > > > > v2:
> > > > > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > > > > >     points below it.
> > > > > > 
> > > > > > Hmm I had an alternative suggestion of passing smbios
> > > > > > tables in from QEMU (seabios already has a mechanism for this).
> > > > > > We could then simply increase the existing RamSize variable.
> > > > > > 
> > > > > > What do you think about this idea?
> > > > > There is several issues with RamSizeOver4G I see:
> > > > > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> > > > >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> > > > >    high memory too" but it will be more complicated to keep seabios
> > > > >    backward/forward compatible if we use RamSizeOver4G for passing.
> > > > 
> > > > What are the compatibility worries, exactly?
> > > > 
> > > > The way I see it, if QEMU wants to be compatible it can't
> > > > use memory hotplug. In that case RamSizeOver4G will have
> > > > the old meaning which should work without issues.
> > > QEMU can't know what BIOS version is used and error out in wrong case, and that
> > > could cause hard to guess what going wrong bugs in the guest.
> > > 
> > > > 
> > > > > 2. It doesn't scale if we are going to use more than 1Tb future.
> > > > >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> > > > >    either, see
> > > > > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > > > > 
> > > > > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > > > > approach doesn't have backward/forward compatibility issues.
> > > > 
> > > > Hmm but you aren't passing in memory size - pcimem64-start
> > > > merely tells bios where to put PCI devices.
> > > > So how would it help?
> > > If a lot of memory is reserved for memory hotplug (terabytes), that could
> > > put window behind what RamSizeOver4G could address it.
> > 
> > All we care about for a compatible bios is that it
> > keep working in old hardware configurations.
> > IMO there's no real need for old BIOS to work
> > with e.g. memory hotplug - it's reasonable
> > to ask users to update fitmware if they want
> > to use this feature.
> Fine with me if we don't care about strange hangs if something misused.
> 
> But RamSizeOver4G still doesn't allow correctly place PCI window if large
> amount of memory present/reserved.
BTW: I don't think it's good practice to change semantics of an old interface
in general. It's less confusing to leave old interface as is (obsoleting it
eventually) and use a new one.

> > 
> > > > > > 
> > > > > > 
> > > > > > > ---
> > > > > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > > > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > > > > 
> > > > > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > > > > index b29db99..798309b 100644
> > > > > > > --- a/src/fw/pciinit.c
> > > > > > > +++ b/src/fw/pciinit.c
> > > > > > > @@ -18,6 +18,8 @@
> > > > > > >  #include "paravirt.h" // RamSize
> > > > > > >  #include "string.h" // memset
> > > > > > >  #include "util.h" // pci_setup
> > > > > > > +#include "byteorder.h" // le64_to_cpu
> > > > > > > +#include "romfile.h" // romfile_loadint
> > > > > > >  
> > > > > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > > > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > >  {
> > > > > > >      if (pci_bios_init_root_regions(busses)) {
> > > > > > >          struct pci_region r64_mem, r64_pref;
> > > > > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > > > > +                                 ram64_end));
> > > > > > > +        if (base64 < ram64_end) {
> > > > > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > > > > +                        base64, ram64_end);
> > > > > > > +            base64 = ram64_end;
> > > > > > > +        }
> > > > > > >          r64_mem.list.first = NULL;
> > > > > > >          r64_pref.list.first = NULL;
> > > > > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > > > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > > > > >  
> > > > > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > > > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > > > > >          pcimem64_start = r64_mem.base;
> > > > > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > > > > -- 
> > > > > > > 1.8.3.1
> > > > > > 
> > > > > 
> > > > > 
> > > > > -- 
> > > > > Regards,
> > > > >   Igor
> > > 
> > > 
> > > -- 
> > > Regards,
> > >   Igor
> 
> 
> -- 
> Regards,
>   Igor
> 
> _______________________________________________
> SeaBIOS mailing list
> SeaBIOS@seabios.org
> http://www.seabios.org/mailman/listinfo/seabios
Kevin O'Connor Oct. 13, 2013, 7:53 p.m. UTC | #9
On Sun, Oct 13, 2013 at 08:19:16PM +0200, Igor Mammedov wrote:
> On Sun, 13 Oct 2013 19:33:19 +0200
> Igor Mammedov <imammedo@redhat.com> wrote:
> > On Sun, 13 Oct 2013 19:46:09 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> > > > On Sun, 13 Oct 2013 18:59:20 +0300
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > All we care about for a compatible bios is that it
> > > keep working in old hardware configurations.
> > > IMO there's no real need for old BIOS to work
> > > with e.g. memory hotplug - it's reasonable
> > > to ask users to update fitmware if they want
> > > to use this feature.
> > Fine with me if we don't care about strange hangs if something misused.
> > 
> > But RamSizeOver4G still doesn't allow correctly place PCI window if large
> > amount of memory present/reserved.
> BTW: I don't think it's good practice to change semantics of an old interface
> in general. It's less confusing to leave old interface as is (obsoleting it
> eventually) and use a new one.

I agree.  The RamSizeOver4G (as passed via three cmos bytes) is ugly -
lets not make it more complex.  Any new info can be passed via fw_cfg.
Indeed, I'd happily accept patches that migrated the existing cmos
parameters to new fw_cfg entries.

-Kevin
Michael S. Tsirkin Oct. 13, 2013, 8:28 p.m. UTC | #10
On Sun, Oct 13, 2013 at 07:33:19PM +0200, Igor Mammedov wrote:
> On Sun, 13 Oct 2013 19:46:09 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> > > On Sun, 13 Oct 2013 18:59:20 +0300
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > 
> > > > On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > > > > On Sun, 13 Oct 2013 15:31:23 +0300
> > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > 
> > > > > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > > > > should map 64-bit PCI BARs after it.
> > > > > > > 
> > > > > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > > > > after high memory.
> > > > > > > 
> > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > > ---
> > > > > > > v2:
> > > > > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > > > > >     points below it.
> > > > > > 
> > > > > > Hmm I had an alternative suggestion of passing smbios
> > > > > > tables in from QEMU (seabios already has a mechanism for this).
> > > > > > We could then simply increase the existing RamSize variable.
> > > > > > 
> > > > > > What do you think about this idea?
> > > > > There is several issues with RamSizeOver4G I see:
> > > > > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> > > > >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> > > > >    high memory too" but it will be more complicated to keep seabios
> > > > >    backward/forward compatible if we use RamSizeOver4G for passing.
> > > > 
> > > > What are the compatibility worries, exactly?
> > > > 
> > > > The way I see it, if QEMU wants to be compatible it can't
> > > > use memory hotplug. In that case RamSizeOver4G will have
> > > > the old meaning which should work without issues.
> > > QEMU can't know what BIOS version is used and error out in wrong case, and that
> > > could cause hard to guess what going wrong bugs in the guest.
> > > 
> > > > 
> > > > > 2. It doesn't scale if we are going to use more than 1Tb future.
> > > > >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> > > > >    either, see
> > > > > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > > > > 
> > > > > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > > > > approach doesn't have backward/forward compatibility issues.
> > > > 
> > > > Hmm but you aren't passing in memory size - pcimem64-start
> > > > merely tells bios where to put PCI devices.
> > > > So how would it help?
> > > If a lot of memory is reserved for memory hotplug (terabytes), that could
> > > put window behind what RamSizeOver4G could address it.
> > 
> > All we care about for a compatible bios is that it
> > keep working in old hardware configurations.
> > IMO there's no real need for old BIOS to work
> > with e.g. memory hotplug - it's reasonable
> > to ask users to update fitmware if they want
> > to use this feature.
> Fine with me if we don't care about strange hangs if something misused.
> 
> But RamSizeOver4G still doesn't allow correctly place PCI window if large
> amount of memory present/reserved.

I'm fine with adding a new fw cfg really.
I am however not sure we should have a single new base64 flag.
Hardware has a set of ranges where guest can
put PCI devices - would we be better off with
an interface that lists all these ranges?

I'm not saying we should but this needs a bit of thought.

> > 
> > > > > > 
> > > > > > 
> > > > > > > ---
> > > > > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > > > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > > > > 
> > > > > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > > > > index b29db99..798309b 100644
> > > > > > > --- a/src/fw/pciinit.c
> > > > > > > +++ b/src/fw/pciinit.c
> > > > > > > @@ -18,6 +18,8 @@
> > > > > > >  #include "paravirt.h" // RamSize
> > > > > > >  #include "string.h" // memset
> > > > > > >  #include "util.h" // pci_setup
> > > > > > > +#include "byteorder.h" // le64_to_cpu
> > > > > > > +#include "romfile.h" // romfile_loadint
> > > > > > >  
> > > > > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > > > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > >  {
> > > > > > >      if (pci_bios_init_root_regions(busses)) {
> > > > > > >          struct pci_region r64_mem, r64_pref;
> > > > > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > > > > +                                 ram64_end));
> > > > > > > +        if (base64 < ram64_end) {
> > > > > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > > > > +                        base64, ram64_end);
> > > > > > > +            base64 = ram64_end;
> > > > > > > +        }
> > > > > > >          r64_mem.list.first = NULL;
> > > > > > >          r64_pref.list.first = NULL;
> > > > > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > > > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > > > > >  
> > > > > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > > > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > > > > >          pcimem64_start = r64_mem.base;
> > > > > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > > > > -- 
> > > > > > > 1.8.3.1
> > > > > > 
> > > > > 
> > > > > 
> > > > > -- 
> > > > > Regards,
> > > > >   Igor
> > > 
> > > 
> > > -- 
> > > Regards,
> > >   Igor
> 
> 
> -- 
> Regards,
>   Igor
Gerd Hoffmann Oct. 14, 2013, 8:01 a.m. UTC | #11
Hi,

> > > But RamSizeOver4G still doesn't allow correctly place PCI window if large
> > > amount of memory present/reserved.
> > BTW: I don't think it's good practice to change semantics of an old interface
> > in general. It's less confusing to leave old interface as is (obsoleting it
> > eventually) and use a new one.

> I agree.  The RamSizeOver4G (as passed via three cmos bytes) is ugly -
> lets not make it more complex.

Also it would mix up two different (although related) things:  Actual
memory and reserving address space for memory (which then might be
hotplugged).

> Any new info can be passed via fw_cfg.

Plan is to extend the existing e820 table passed from qemu.  Today it
carries reservations only.  We can stick ram ranges in there too.

cheers,
  Gerd
Igor Mammedov Oct. 14, 2013, 10:27 a.m. UTC | #12
On Sun, 13 Oct 2013 23:28:47 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Sun, Oct 13, 2013 at 07:33:19PM +0200, Igor Mammedov wrote:
> > On Sun, 13 Oct 2013 19:46:09 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> > > > On Sun, 13 Oct 2013 18:59:20 +0300
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > 
> > > > > On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > > > > > On Sun, 13 Oct 2013 15:31:23 +0300
> > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > 
> > > > > > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > > > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > > > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > > > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > > > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > > > > > should map 64-bit PCI BARs after it.
> > > > > > > > 
> > > > > > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > > > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > > > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > > > > > after high memory.
> > > > > > > > 
> > > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > > > ---
> > > > > > > > v2:
> > > > > > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > > > > > >     points below it.
> > > > > > > 
> > > > > > > Hmm I had an alternative suggestion of passing smbios
> > > > > > > tables in from QEMU (seabios already has a mechanism for this).
> > > > > > > We could then simply increase the existing RamSize variable.
> > > > > > > 
> > > > > > > What do you think about this idea?
> > > > > > There is several issues with RamSizeOver4G I see:
> > > > > > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> > > > > >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> > > > > >    high memory too" but it will be more complicated to keep seabios
> > > > > >    backward/forward compatible if we use RamSizeOver4G for passing.
> > > > > 
> > > > > What are the compatibility worries, exactly?
> > > > > 
> > > > > The way I see it, if QEMU wants to be compatible it can't
> > > > > use memory hotplug. In that case RamSizeOver4G will have
> > > > > the old meaning which should work without issues.
> > > > QEMU can't know what BIOS version is used and error out in wrong case, and that
> > > > could cause hard to guess what going wrong bugs in the guest.
> > > > 
> > > > > 
> > > > > > 2. It doesn't scale if we are going to use more than 1Tb future.
> > > > > >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> > > > > >    either, see
> > > > > > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > > > > > 
> > > > > > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > > > > > approach doesn't have backward/forward compatibility issues.
> > > > > 
> > > > > Hmm but you aren't passing in memory size - pcimem64-start
> > > > > merely tells bios where to put PCI devices.
> > > > > So how would it help?
> > > > If a lot of memory is reserved for memory hotplug (terabytes), that could
> > > > put window behind what RamSizeOver4G could address it.
> > > 
> > > All we care about for a compatible bios is that it
> > > keep working in old hardware configurations.
> > > IMO there's no real need for old BIOS to work
> > > with e.g. memory hotplug - it's reasonable
> > > to ask users to update fitmware if they want
> > > to use this feature.
> > Fine with me if we don't care about strange hangs if something misused.
> > 
> > But RamSizeOver4G still doesn't allow correctly place PCI window if large
> > amount of memory present/reserved.
> 
> I'm fine with adding a new fw cfg really.
> I am however not sure we should have a single new base64 flag.
> Hardware has a set of ranges where guest can
> put PCI devices - would we be better off with
> an interface that lists all these ranges?
>
> I'm not saying we should but this needs a bit of thought.
I've offered to reuse your "etc/pci-info" fw_cfg that were rejected earlier,
but it still seems no go: http://permalink.gmane.org/gmane.comp.emulators.qemu/237307

And there is slight difference between PCI holes and PCI address space mappings
represented by MemoryRegion-s in QEMU.

Basically we only need to inform BIOS where to PCI address spaces start and
simple "etc/pcimem64-start" + "etc/pcimem32-start" are just fine for that.
And for now (memory hotplug) we need only the first one, the second one is
very well hardcoded in Seabios/QEMU.

It's less complicated than maintaining structure interface (which would be
difficult to keep compatible if it will be changing in future).

A more flexible approach would be a separate table (like e820 table, but only
for PCI ranges), but it seems a bit of overkill for now (I can't picture
a need for more than 2 PCI address space mappings).
 
> > > 
> > > > > > > 
> > > > > > > 
> > > > > > > > ---
> > > > > > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > > > > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > > > > > 
> > > > > > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > > > > > index b29db99..798309b 100644
> > > > > > > > --- a/src/fw/pciinit.c
> > > > > > > > +++ b/src/fw/pciinit.c
> > > > > > > > @@ -18,6 +18,8 @@
> > > > > > > >  #include "paravirt.h" // RamSize
> > > > > > > >  #include "string.h" // memset
> > > > > > > >  #include "util.h" // pci_setup
> > > > > > > > +#include "byteorder.h" // le64_to_cpu
> > > > > > > > +#include "romfile.h" // romfile_loadint
> > > > > > > >  
> > > > > > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > > > > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > > > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > > >  {
> > > > > > > >      if (pci_bios_init_root_regions(busses)) {
> > > > > > > >          struct pci_region r64_mem, r64_pref;
> > > > > > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > > > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > > > > > +                                 ram64_end));
> > > > > > > > +        if (base64 < ram64_end) {
> > > > > > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > > > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > > > > > +                        base64, ram64_end);
> > > > > > > > +            base64 = ram64_end;
> > > > > > > > +        }
> > > > > > > >          r64_mem.list.first = NULL;
> > > > > > > >          r64_pref.list.first = NULL;
> > > > > > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > > > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > > > > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > > > > > >  
> > > > > > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > > > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > > > > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > > > > > >          pcimem64_start = r64_mem.base;
> > > > > > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > > > > > -- 
> > > > > > > > 1.8.3.1
> > > > > > > 
> > > > > > 
> > > > > > 
> > > > > > -- 
> > > > > > Regards,
> > > > > >   Igor
> > > > 
> > > > 
> > > > -- 
> > > > Regards,
> > > >   Igor
> > 
> > 
> > -- 
> > Regards,
> >   Igor
Michael S. Tsirkin Oct. 14, 2013, 11 a.m. UTC | #13
On Mon, Oct 14, 2013 at 12:27:26PM +0200, Igor Mammedov wrote:
> On Sun, 13 Oct 2013 23:28:47 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Sun, Oct 13, 2013 at 07:33:19PM +0200, Igor Mammedov wrote:
> > > On Sun, 13 Oct 2013 19:46:09 +0300
> > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > 
> > > > On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> > > > > On Sun, 13 Oct 2013 18:59:20 +0300
> > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > 
> > > > > > On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > > > > > > On Sun, 13 Oct 2013 15:31:23 +0300
> > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > 
> > > > > > > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > > > > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > > > > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > > > > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > > > > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > > > > > > should map 64-bit PCI BARs after it.
> > > > > > > > > 
> > > > > > > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > > > > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > > > > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > > > > > > after high memory.
> > > > > > > > > 
> > > > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > > > > ---
> > > > > > > > > v2:
> > > > > > > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > > > > > > >     points below it.
> > > > > > > > 
> > > > > > > > Hmm I had an alternative suggestion of passing smbios
> > > > > > > > tables in from QEMU (seabios already has a mechanism for this).
> > > > > > > > We could then simply increase the existing RamSize variable.
> > > > > > > > 
> > > > > > > > What do you think about this idea?
> > > > > > > There is several issues with RamSizeOver4G I see:
> > > > > > > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> > > > > > >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> > > > > > >    high memory too" but it will be more complicated to keep seabios
> > > > > > >    backward/forward compatible if we use RamSizeOver4G for passing.
> > > > > > 
> > > > > > What are the compatibility worries, exactly?
> > > > > > 
> > > > > > The way I see it, if QEMU wants to be compatible it can't
> > > > > > use memory hotplug. In that case RamSizeOver4G will have
> > > > > > the old meaning which should work without issues.
> > > > > QEMU can't know what BIOS version is used and error out in wrong case, and that
> > > > > could cause hard to guess what going wrong bugs in the guest.
> > > > > 
> > > > > > 
> > > > > > > 2. It doesn't scale if we are going to use more than 1Tb future.
> > > > > > >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> > > > > > >    either, see
> > > > > > > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > > > > > > 
> > > > > > > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > > > > > > approach doesn't have backward/forward compatibility issues.
> > > > > > 
> > > > > > Hmm but you aren't passing in memory size - pcimem64-start
> > > > > > merely tells bios where to put PCI devices.
> > > > > > So how would it help?
> > > > > If a lot of memory is reserved for memory hotplug (terabytes), that could
> > > > > put window behind what RamSizeOver4G could address it.
> > > > 
> > > > All we care about for a compatible bios is that it
> > > > keep working in old hardware configurations.
> > > > IMO there's no real need for old BIOS to work
> > > > with e.g. memory hotplug - it's reasonable
> > > > to ask users to update fitmware if they want
> > > > to use this feature.
> > > Fine with me if we don't care about strange hangs if something misused.
> > > 
> > > But RamSizeOver4G still doesn't allow correctly place PCI window if large
> > > amount of memory present/reserved.
> > 
> > I'm fine with adding a new fw cfg really.
> > I am however not sure we should have a single new base64 flag.
> > Hardware has a set of ranges where guest can
> > put PCI devices - would we be better off with
> > an interface that lists all these ranges?
> >
> > I'm not saying we should but this needs a bit of thought.
> I've offered to reuse your "etc/pci-info" fw_cfg that were rejected earlier,
> but it still seems no go: http://permalink.gmane.org/gmane.comp.emulators.qemu/237307
> 
> And there is slight difference between PCI holes and PCI address space mappings
> represented by MemoryRegion-s in QEMU.
> 
> Basically we only need to inform BIOS where to PCI address spaces start and
> simple "etc/pcimem64-start" + "etc/pcimem32-start" are just fine for that.
> And for now (memory hotplug) we need only the first one, the second one is
> very well hardcoded in Seabios/QEMU.

Hmm okay but the only reason we want to control this is
because it conflicts with memory hotplug, no?
I vaguely recall we discussed just passing amount of
hot-pluggable memory in, in the past, but I don't
remember the reason we decided against it.
Could you refresh my memory?

> It's less complicated than maintaining structure interface (which would be
> difficult to keep compatible if it will be changing in future).
> 
> A more flexible approach would be a separate table (like e820 table, but only
> for PCI ranges), but it seems a bit of overkill for now (I can't picture
> a need for more than 2 PCI address space mappings).

It's not unthinkable.
Multiple ECAM regions (for multi-root systems) can make holes in the address space.
Also, we just ignore everything above the ioapic, but that's
not a must, we could maybe use address space above ioapic.

> > > > 
> > > > > > > > 
> > > > > > > > 
> > > > > > > > > ---
> > > > > > > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > > > > > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > > > > > > 
> > > > > > > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > > > > > > index b29db99..798309b 100644
> > > > > > > > > --- a/src/fw/pciinit.c
> > > > > > > > > +++ b/src/fw/pciinit.c
> > > > > > > > > @@ -18,6 +18,8 @@
> > > > > > > > >  #include "paravirt.h" // RamSize
> > > > > > > > >  #include "string.h" // memset
> > > > > > > > >  #include "util.h" // pci_setup
> > > > > > > > > +#include "byteorder.h" // le64_to_cpu
> > > > > > > > > +#include "romfile.h" // romfile_loadint
> > > > > > > > >  
> > > > > > > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > > > > > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > > > > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > > > >  {
> > > > > > > > >      if (pci_bios_init_root_regions(busses)) {
> > > > > > > > >          struct pci_region r64_mem, r64_pref;
> > > > > > > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > > > > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > > > > > > +                                 ram64_end));
> > > > > > > > > +        if (base64 < ram64_end) {
> > > > > > > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > > > > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > > > > > > +                        base64, ram64_end);
> > > > > > > > > +            base64 = ram64_end;
> > > > > > > > > +        }
> > > > > > > > >          r64_mem.list.first = NULL;
> > > > > > > > >          r64_pref.list.first = NULL;
> > > > > > > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > > > > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > > > > > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > > > > > > >  
> > > > > > > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > > > > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > > > > > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > > > > > > >          pcimem64_start = r64_mem.base;
> > > > > > > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > > > > > > -- 
> > > > > > > > > 1.8.3.1
> > > > > > > > 
> > > > > > > 
> > > > > > > 
> > > > > > > -- 
> > > > > > > Regards,
> > > > > > >   Igor
> > > > > 
> > > > > 
> > > > > -- 
> > > > > Regards,
> > > > >   Igor
> > > 
> > > 
> > > -- 
> > > Regards,
> > >   Igor
Gerd Hoffmann Oct. 14, 2013, 12:16 p.m. UTC | #14
Hi,

> > And there is slight difference between PCI holes and PCI address space mappings
> > represented by MemoryRegion-s in QEMU.
> > 
> > Basically we only need to inform BIOS where to PCI address spaces start and
> > simple "etc/pcimem64-start" + "etc/pcimem32-start" are just fine for that.
> > And for now (memory hotplug) we need only the first one, the second one is
> > very well hardcoded in Seabios/QEMU.

Yes, 32bit hole is basically the [ end-of-memory -> ioapic ] range.
Can't see any reason to make that configurable, it is quite unlikely
that this ever changes.

For the 64bit window it makes sense to configure it, to give qemu a
little more control about the address space layout.

> Hmm okay but the only reason we want to control this is
> because it conflicts with memory hotplug, no?

> I vaguely recall we discussed just passing amount of
> hot-pluggable memory in, in the past, but I don't
> remember the reason we decided against it.
> Could you refresh my memory?

To me it makes more sense to just go the direct route and say "please
put the 64bit bars at this location" rather than indirect "we might want
hotplug $thatmuch memory" and then expect the bios to leave that much
room.

If we ever want reserve more address space for other reasons it'll be
easier with the direct variant as the bios doesn't need to be aware that
we'll need some address space for $newfeature too.

> > A more flexible approach would be a separate table (like e820 table, but only
> > for PCI ranges), but it seems a bit of overkill for now (I can't picture
> > a need for more than 2 PCI address space mappings).
> 
> It's not unthinkable.
> Multiple ECAM regions (for multi-root systems) can make holes in the address space.

Sounds pretty theoretic ...

> Also, we just ignore everything above the ioapic, but that's
> not a must, we could maybe use address space above ioapic.

Any reason why we should that?

cheers,
  Gerd
Igor Mammedov Oct. 14, 2013, 12:28 p.m. UTC | #15
On Mon, 14 Oct 2013 14:00:12 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Mon, Oct 14, 2013 at 12:27:26PM +0200, Igor Mammedov wrote:
> > On Sun, 13 Oct 2013 23:28:47 +0300
> > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > 
> > > On Sun, Oct 13, 2013 at 07:33:19PM +0200, Igor Mammedov wrote:
> > > > On Sun, 13 Oct 2013 19:46:09 +0300
> > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > 
> > > > > On Sun, Oct 13, 2013 at 06:23:28PM +0200, Igor Mammedov wrote:
> > > > > > On Sun, 13 Oct 2013 18:59:20 +0300
> > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > 
> > > > > > > On Sun, Oct 13, 2013 at 05:11:54PM +0200, Igor Mammedov wrote:
> > > > > > > > On Sun, 13 Oct 2013 15:31:23 +0300
> > > > > > > > "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > > 
> > > > > > > > > On Sun, Oct 13, 2013 at 02:13:44PM +0200, Igor Mammedov wrote:
> > > > > > > > > > Currently 64-bit PCI BARs are unconditionally mapped by BIOS right
> > > > > > > > > > over 4G + RamSizeOver4G location, which doesn't allow to reserve
> > > > > > > > > > extra space before 64-bit PCI window. For memory hotplug an extra
> > > > > > > > > > RAM space might be reserved after present 64-bit RAM end and BIOS
> > > > > > > > > > should map 64-bit PCI BARs after it.
> > > > > > > > > > 
> > > > > > > > > > Introduce "etc/pcimem64-start" romfile to provide BIOS a hint
> > > > > > > > > > where it should start mapping of 64-bit PCI BARs. If romfile is
> > > > > > > > > > missing, BIOS reverts to legacy behavior and starts mapping right
> > > > > > > > > > after high memory.
> > > > > > > > > > 
> > > > > > > > > > Signed-off-by: Igor Mammedov <imammedo@redhat.com>
> > > > > > > > > > ---
> > > > > > > > > > v2:
> > > > > > > > > >   * place 64-bit window behind high RAM end if "etc/pcimem64-start"
> > > > > > > > > >     points below it.
> > > > > > > > > 
> > > > > > > > > Hmm I had an alternative suggestion of passing smbios
> > > > > > > > > tables in from QEMU (seabios already has a mechanism for this).
> > > > > > > > > We could then simply increase the existing RamSize variable.
> > > > > > > > > 
> > > > > > > > > What do you think about this idea?
> > > > > > > > There is several issues with RamSizeOver4G I see:
> > > > > > > > 1. As Gerd pointed out it is also used to setup a part of e820 tables.
> > > > > > > >    It possibly could be also changed see "[Qemu-devel] [RfC PATCH] e820: pass
> > > > > > > >    high memory too" but it will be more complicated to keep seabios
> > > > > > > >    backward/forward compatible if we use RamSizeOver4G for passing.
> > > > > > > 
> > > > > > > What are the compatibility worries, exactly?
> > > > > > > 
> > > > > > > The way I see it, if QEMU wants to be compatible it can't
> > > > > > > use memory hotplug. In that case RamSizeOver4G will have
> > > > > > > the old meaning which should work without issues.
> > > > > > QEMU can't know what BIOS version is used and error out in wrong case, and that
> > > > > > could cause hard to guess what going wrong bugs in the guest.
> > > > > > 
> > > > > > > 
> > > > > > > > 2. It doesn't scale if we are going to use more than 1Tb future.
> > > > > > > >    Extending RamSizeOver4G value via additional CMOS port wasn't welcomed
> > > > > > > >    either, see
> > > > > > > > http://www.seabios.org/pipermail/seabios/2010-September/000911.html
> > > > > > > > 
> > > > > > > > It looks like fw_cfg interface is preferred one and "etc/pcimem64-start"
> > > > > > > > approach doesn't have backward/forward compatibility issues.
> > > > > > > 
> > > > > > > Hmm but you aren't passing in memory size - pcimem64-start
> > > > > > > merely tells bios where to put PCI devices.
> > > > > > > So how would it help?
> > > > > > If a lot of memory is reserved for memory hotplug (terabytes), that could
> > > > > > put window behind what RamSizeOver4G could address it.
> > > > > 
> > > > > All we care about for a compatible bios is that it
> > > > > keep working in old hardware configurations.
> > > > > IMO there's no real need for old BIOS to work
> > > > > with e.g. memory hotplug - it's reasonable
> > > > > to ask users to update fitmware if they want
> > > > > to use this feature.
> > > > Fine with me if we don't care about strange hangs if something misused.
> > > > 
> > > > But RamSizeOver4G still doesn't allow correctly place PCI window if large
> > > > amount of memory present/reserved.
> > > 
> > > I'm fine with adding a new fw cfg really.
> > > I am however not sure we should have a single new base64 flag.
> > > Hardware has a set of ranges where guest can
> > > put PCI devices - would we be better off with
> > > an interface that lists all these ranges?
> > >
> > > I'm not saying we should but this needs a bit of thought.
> > I've offered to reuse your "etc/pci-info" fw_cfg that were rejected earlier,
> > but it still seems no go: http://permalink.gmane.org/gmane.comp.emulators.qemu/237307
> > 
> > And there is slight difference between PCI holes and PCI address space mappings
> > represented by MemoryRegion-s in QEMU.
> > 
> > Basically we only need to inform BIOS where to PCI address spaces start and
> > simple "etc/pcimem64-start" + "etc/pcimem32-start" are just fine for that.
> > And for now (memory hotplug) we need only the first one, the second one is
> > very well hardcoded in Seabios/QEMU.
> 
> Hmm okay but the only reason we want to control this is
> because it conflicts with memory hotplug, no?
yes.

> I vaguely recall we discussed just passing amount of
> hot-pluggable memory in, in the past, but I don't
> remember the reason we decided against it.
> Could you refresh my memory?

"etc/pcimem64-start" is explicitly passes absolute location where mapping
should start and BIOS doesn't have to make any hidden assumptions/calculations
to get it right.

> 
> > It's less complicated than maintaining structure interface (which would be
> > difficult to keep compatible if it will be changing in future).
> > 
> > A more flexible approach would be a separate table (like e820 table, but only
> > for PCI ranges), but it seems a bit of overkill for now (I can't picture
> > a need for more than 2 PCI address space mappings).
> 
> It's not unthinkable.
> Multiple ECAM regions (for multi-root systems) can make holes in the address space.
> Also, we just ignore everything above the ioapic, but that's
> not a must, we could maybe use address space above ioapic.
the space above ioapic is full of holes,
question is if we really have to use multiple PCI holes as opposed to
a simpler 2 region setup?


> 
> > > > > 
> > > > > > > > > 
> > > > > > > > > 
> > > > > > > > > > ---
> > > > > > > > > >  src/fw/pciinit.c | 13 ++++++++++++-
> > > > > > > > > >  1 file changed, 12 insertions(+), 1 deletion(-)
> > > > > > > > > > 
> > > > > > > > > > diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
> > > > > > > > > > index b29db99..798309b 100644
> > > > > > > > > > --- a/src/fw/pciinit.c
> > > > > > > > > > +++ b/src/fw/pciinit.c
> > > > > > > > > > @@ -18,6 +18,8 @@
> > > > > > > > > >  #include "paravirt.h" // RamSize
> > > > > > > > > >  #include "string.h" // memset
> > > > > > > > > >  #include "util.h" // pci_setup
> > > > > > > > > > +#include "byteorder.h" // le64_to_cpu
> > > > > > > > > > +#include "romfile.h" // romfile_loadint
> > > > > > > > > >  
> > > > > > > > > >  #define PCI_DEVICE_MEM_MIN     0x1000
> > > > > > > > > >  #define PCI_BRIDGE_IO_MIN      0x1000
> > > > > > > > > > @@ -764,6 +766,15 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > > > > >  {
> > > > > > > > > >      if (pci_bios_init_root_regions(busses)) {
> > > > > > > > > >          struct pci_region r64_mem, r64_pref;
> > > > > > > > > > +        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
> > > > > > > > > > +        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
> > > > > > > > > > +                                 ram64_end));
> > > > > > > > > > +        if (base64 < ram64_end) {
> > > > > > > > > > +            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
> > > > > > > > > > +                       "placing 64-bit PCI window behind RAM end: %llx",
> > > > > > > > > > +                        base64, ram64_end);
> > > > > > > > > > +            base64 = ram64_end;
> > > > > > > > > > +        }
> > > > > > > > > >          r64_mem.list.first = NULL;
> > > > > > > > > >          r64_pref.list.first = NULL;
> > > > > > > > > >          pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
> > > > > > > > > > @@ -779,7 +790,7 @@ static void pci_bios_map_devices(struct pci_bus *busses)
> > > > > > > > > >          u64 align_mem = pci_region_align(&r64_mem);
> > > > > > > > > >          u64 align_pref = pci_region_align(&r64_pref);
> > > > > > > > > >  
> > > > > > > > > > -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> > > > > > > > > > +        r64_mem.base = ALIGN(base64, align_mem);
> > > > > > > > > >          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
> > > > > > > > > >          pcimem64_start = r64_mem.base;
> > > > > > > > > >          pcimem64_end = r64_pref.base + sum_pref;
> > > > > > > > > > -- 
> > > > > > > > > > 1.8.3.1
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > 
> > > > > > > > -- 
> > > > > > > > Regards,
> > > > > > > >   Igor
> > > > > > 
> > > > > > 
> > > > > > -- 
> > > > > > Regards,
> > > > > >   Igor
> > > > 
> > > > 
> > > > -- 
> > > > Regards,
> > > >   Igor
>
Michael S. Tsirkin Oct. 14, 2013, 12:38 p.m. UTC | #16
On Mon, Oct 14, 2013 at 02:16:23PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > > And there is slight difference between PCI holes and PCI address space mappings
> > > represented by MemoryRegion-s in QEMU.
> > > 
> > > Basically we only need to inform BIOS where to PCI address spaces start and
> > > simple "etc/pcimem64-start" + "etc/pcimem32-start" are just fine for that.
> > > And for now (memory hotplug) we need only the first one, the second one is
> > > very well hardcoded in Seabios/QEMU.
> 
> Yes, 32bit hole is basically the [ end-of-memory -> ioapic ] range.
> Can't see any reason to make that configurable, it is quite unlikely
> that this ever changes.
> 
> For the 64bit window it makes sense to configure it, to give qemu a
> little more control about the address space layout.
> 
> > Hmm okay but the only reason we want to control this is
> > because it conflicts with memory hotplug, no?
> 
> > I vaguely recall we discussed just passing amount of
> > hot-pluggable memory in, in the past, but I don't
> > remember the reason we decided against it.
> > Could you refresh my memory?
> 
> To me it makes more sense to just go the direct route and say "please
> put the 64bit bars at this location" rather than indirect "we might want
> hotplug $thatmuch memory" and then expect the bios to leave that much
> room.

Only if the newfeature address is not under bios control.  I know that
bios is simplistic so all it cares about ATM is pci window, but can't
shake the impression that we are better off telling the guest what's
going on rather than what it should do.

In particular the issue that was discussed (what to do
if pci start is set by host to below ram end)
will simply go away if we pass in an incremental
value: there will be no invalid configurations.

> If we ever want reserve more address space for other reasons it'll be
> easier with the direct variant as the bios doesn't need to be aware that
> we'll need some address space for $newfeature too.

We can call it "reserved memory space" rather than
hotplug memory if you prefer.

> > > A more flexible approach would be a separate table (like e820 table, but only
> > > for PCI ranges), but it seems a bit of overkill for now (I can't picture
> > > a need for more than 2 PCI address space mappings).
> > 
> > It's not unthinkable.
> > Multiple ECAM regions (for multi-root systems) can make holes in the address space.
> 
> Sounds pretty theoretic ...

What? Multiple PCI roots?

> > Also, we just ignore everything above the ioapic, but that's
> > not a must, we could maybe use address space above ioapic.
> 
> Any reason why we should that?
> 
> cheers,
>   Gerd
> 

32 bit address space is contrained, using it is preferable for
32 bit guests ...
Gerd Hoffmann Oct. 14, 2013, 1:04 p.m. UTC | #17
Hi,

> > 
> > To me it makes more sense to just go the direct route and say "please
> > put the 64bit bars at this location" rather than indirect "we might want
> > hotplug $thatmuch memory" and then expect the bios to leave that much
> > room.
> 
> Only if the newfeature address is not under bios control.  I know that
> bios is simplistic so all it cares about ATM is pci window, but can't
> shake the impression that we are better off telling the guest what's
> going on rather than what it should do.
> 
> In particular the issue that was discussed (what to do
> if pci start is set by host to below ram end)
> will simply go away if we pass in an incremental
> value: there will be no invalid configurations.

The "what is going on" might need updates in both qemu and seabios if
something new goes on.  For example qemu getting support non-contignous
memory.  The "leave that much address space free above memory" suddenly
is ambiguous as there are two (or more) memory blocks above 4g.  "please
place 64bit pci bars there" continues to work just fine.

> > > It's not unthinkable.
> > > Multiple ECAM regions (for multi-root systems) can make holes in the address space.
> > 
> > Sounds pretty theoretic ...
> 
> What? Multiple PCI roots?

That we'll need pass multiple pci windows because of that.  I expect
we'll need seabios support for the new multi-root hardware anyway, and
seabios will probably be aware of the hardware constrains then ...

Also as far I know nobody is working on such a chipset.

I simply wouldn't worry about that today.  Designing a interface when
you don't know the exact needs for the it has a high chance to go wrong.

> > > Also, we just ignore everything above the ioapic, but that's
> > > not a must, we could maybe use address space above ioapic.
> > 
> > Any reason why we should that?
> > 
> 
> 32 bit address space is contrained, using it is preferable for
> 32 bit guests ...

This wouldn't help in the cases I've seen in practice.  We have no
problems at all to fit in mmio pci bars, even lots of them, they are
small enough.  We can run out of address space if we have pci devices
with larger chunks of memory on them, such as qxl or ivshmem.  And these
memory bars are too big to fit into any of the small holes above the
ioapic.

cheers,
  Gerd
Michael S. Tsirkin Oct. 14, 2013, 2 p.m. UTC | #18
On Mon, Oct 14, 2013 at 03:04:45PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > > 
> > > To me it makes more sense to just go the direct route and say "please
> > > put the 64bit bars at this location" rather than indirect "we might want
> > > hotplug $thatmuch memory" and then expect the bios to leave that much
> > > room.
> > 
> > Only if the newfeature address is not under bios control.  I know that
> > bios is simplistic so all it cares about ATM is pci window, but can't
> > shake the impression that we are better off telling the guest what's
> > going on rather than what it should do.
> > 
> > In particular the issue that was discussed (what to do
> > if pci start is set by host to below ram end)
> > will simply go away if we pass in an incremental
> > value: there will be no invalid configurations.
> 
> The "what is going on" might need updates in both qemu and seabios if
> something new goes on.  For example qemu getting support non-contignous
> memory.  The "leave that much address space free above memory" suddenly
> is ambiguous as there are two (or more) memory blocks above 4g.  "please
> place 64bit pci bars there" continues to work just fine.

Yes but at the cost of overspecifying it.
I think it's down to the name: it's called pcimem64-start
but it can actually be less than 4G and we need to worry what to
do then. Also, 64 doesn't really mean >4G.

So how about "reserve-memory-over-4g"?
bios then does 1ull << 32 + reserve-memory-over-4g
to figure out how much to skip.


> > > > It's not unthinkable.
> > > > Multiple ECAM regions (for multi-root systems) can make holes in the address space.
> > > 
> > > Sounds pretty theoretic ...
> > 
> > What? Multiple PCI roots?
> 
> That we'll need pass multiple pci windows because of that.  I expect
> we'll need seabios support for the new multi-root hardware anyway, and
> seabios will probably be aware of the hardware constrains then ...
> 
> Also as far I know nobody is working on such a chipset.
> 
> I simply wouldn't worry about that today.  Designing a interface when
> you don't know the exact needs for the it has a high chance to go wrong.

Well you were the one that asked about that :)

> > > > Also, we just ignore everything above the ioapic, but that's
> > > > not a must, we could maybe use address space above ioapic.
> > > 
> > > Any reason why we should that?
> > > 
> > 
> > 32 bit address space is contrained, using it is preferable for
> > 32 bit guests ...
> 
> This wouldn't help in the cases I've seen in practice.  We have no
> problems at all to fit in mmio pci bars, even lots of them, they are
> small enough.  We can run out of address space if we have pci devices
> with larger chunks of memory on them, such as qxl or ivshmem.  And these
> memory bars are too big to fit into any of the small holes above the
> ioapic.
> 
> cheers,
>   Gerd
>
Igor Mammedov Oct. 14, 2013, 4:15 p.m. UTC | #19
On Mon, 14 Oct 2013 17:00:47 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Mon, Oct 14, 2013 at 03:04:45PM +0200, Gerd Hoffmann wrote:
> >   Hi,
> > 
> > > > 
> > > > To me it makes more sense to just go the direct route and say "please
> > > > put the 64bit bars at this location" rather than indirect "we might want
> > > > hotplug $thatmuch memory" and then expect the bios to leave that much
> > > > room.
> > > 
> > > Only if the newfeature address is not under bios control.  I know that
> > > bios is simplistic so all it cares about ATM is pci window, but can't
> > > shake the impression that we are better off telling the guest what's
> > > going on rather than what it should do.
> > > 
> > > In particular the issue that was discussed (what to do
> > > if pci start is set by host to below ram end)
> > > will simply go away if we pass in an incremental
> > > value: there will be no invalid configurations.
> > 
> > The "what is going on" might need updates in both qemu and seabios if
> > something new goes on.  For example qemu getting support non-contignous
> > memory.  The "leave that much address space free above memory" suddenly
> > is ambiguous as there are two (or more) memory blocks above 4g.  "please
> > place 64bit pci bars there" continues to work just fine.
> 
> Yes but at the cost of overspecifying it.
> I think it's down to the name: it's called pcimem64-start
> but it can actually be less than 4G and we need to worry what to
> do then. Also, 64 doesn't really mean >4G.
Why 64bit PCI window can't start below 4G, is there a SPEC that forbids it?
If hardware says it has window start below 4G, than it's build this way. 

I'd use v1 of this patch, i.e. if QEMU tells that window starts below 4G,
then it supports such configuration.

> 
> So how about "reserve-memory-over-4g"?
> bios then does 1ull << 32 + reserve-memory-over-4g
> to figure out how much to skip.

That name means making implicit assumption that PCI windows start after
"reserve-memory-over-4g". I'd rather have an explicit interface that tells
where window starts, without making any assumptions. Alternatively how about
something like "pcimem64-start-high-dword" and making it 32 bit value
passing 32-63 bits?
Michael S. Tsirkin Oct. 14, 2013, 4:37 p.m. UTC | #20
On Mon, Oct 14, 2013 at 06:15:25PM +0200, Igor Mammedov wrote:
> On Mon, 14 Oct 2013 17:00:47 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Mon, Oct 14, 2013 at 03:04:45PM +0200, Gerd Hoffmann wrote:
> > >   Hi,
> > > 
> > > > > 
> > > > > To me it makes more sense to just go the direct route and say "please
> > > > > put the 64bit bars at this location" rather than indirect "we might want
> > > > > hotplug $thatmuch memory" and then expect the bios to leave that much
> > > > > room.
> > > > 
> > > > Only if the newfeature address is not under bios control.  I know that
> > > > bios is simplistic so all it cares about ATM is pci window, but can't
> > > > shake the impression that we are better off telling the guest what's
> > > > going on rather than what it should do.
> > > > 
> > > > In particular the issue that was discussed (what to do
> > > > if pci start is set by host to below ram end)
> > > > will simply go away if we pass in an incremental
> > > > value: there will be no invalid configurations.
> > > 
> > > The "what is going on" might need updates in both qemu and seabios if
> > > something new goes on.  For example qemu getting support non-contignous
> > > memory.  The "leave that much address space free above memory" suddenly
> > > is ambiguous as there are two (or more) memory blocks above 4g.  "please
> > > place 64bit pci bars there" continues to work just fine.
> > 
> > Yes but at the cost of overspecifying it.
> > I think it's down to the name: it's called pcimem64-start
> > but it can actually be less than 4G and we need to worry what to
> > do then. Also, 64 doesn't really mean >4G.
> Why 64bit PCI window can't start below 4G, is there a SPEC that forbids it?
> If hardware says it has window start below 4G, than it's build this way. 

That's the issue.
There's no such thing as a 64 bit window in spec or any
real hardware.
On PIIX RAM had priority so where you put RAM
you could't put PCI devices.
This is what creates the "64 bit window".


> I'd use v1 of this patch, i.e. if QEMU tells that window starts below 4G,
> then it supports such configuration.

How does this work exactly?
"64 bit window" is simply BIOS putting devices
above 4G if it can't put them all below 4G.

It seems that if you create this config you will
just get a broken system.

> > 
> > So how about "reserve-memory-over-4g"?
> > bios then does 1ull << 32 + reserve-memory-over-4g
> > to figure out how much to skip.
> 
> That name means making implicit assumption that PCI windows start after
> "reserve-memory-over-4g". I'd rather have an explicit interface that tells
> where window starts, without making any assumptions.

Not at all. It allows bios to put pci where it wants.
QEMU just tells bios where it *can't* go.

> Alternatively how about
> something like "pcimem64-start-high-dword" and making it 32 bit value
> passing 32-63 bits?
> 
> 
> 

It's an option ...
Gerd Hoffmann Oct. 15, 2013, 8:01 a.m. UTC | #21
Hi,

> Yes but at the cost of overspecifying it.
> I think it's down to the name: it's called pcimem64-start
> but it can actually be less than 4G and we need to worry what to
> do then. Also, 64 doesn't really mean >4G.
> 
> So how about "reserve-memory-over-4g"?
> bios then does 1ull << 32 + reserve-memory-over-4g
> to figure out how much to skip.

We are reaching the point where it becomes pointless bikeshedding ...

I want a interface which is clearly defined and which doesn't break if
the way we use the address space above 4g changes (hotplug,
non-contignous memory, whatever).  So make it depend on the memory
deployed isn't a clever idea.

So at the end of the day it comes down to specify an address, either
relative to 4g (your reserve-memory-over-4g suggestion) or relative to
zero (Igors pcimem64-start patch).  Both will do the job.  In both cases
the bios has to check it has no conflicts with known ram regions (i.e.
compare against 1<<32 + RamSizeAbove4G).

I personally don't see the point in having the address relative to 4g
and prefer the pcimem64-start approach.  We could rename it to
pcimem64-minimum-address to make more clear this is about keeping some
space free rather than specifyng a fixed address where the 64bit pci
bars should be mapped to.  But at the end of the day I don't care too
much, how we are going to name the baby is just a matter of taste and
not really critical for the interface ...

What is the state of the qemu side patches btw?

cheers,
  Gerd
Igor Mammedov Oct. 15, 2013, 9:05 a.m. UTC | #22
On Tue, 15 Oct 2013 10:01:01 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

>   Hi,
> 
> > Yes but at the cost of overspecifying it.
> > I think it's down to the name: it's called pcimem64-start
> > but it can actually be less than 4G and we need to worry what to
> > do then. Also, 64 doesn't really mean >4G.
> > 
> > So how about "reserve-memory-over-4g"?
> > bios then does 1ull << 32 + reserve-memory-over-4g
> > to figure out how much to skip.
> 
> We are reaching the point where it becomes pointless bikeshedding ...
> 
> I want a interface which is clearly defined and which doesn't break if
> the way we use the address space above 4g changes (hotplug,
> non-contignous memory, whatever).  So make it depend on the memory
> deployed isn't a clever idea.
> 
> So at the end of the day it comes down to specify an address, either
> relative to 4g (your reserve-memory-over-4g suggestion) or relative to
> zero (Igors pcimem64-start patch).  Both will do the job.  In both cases
> the bios has to check it has no conflicts with known ram regions (i.e.
> compare against 1<<32 + RamSizeAbove4G).
> 
> I personally don't see the point in having the address relative to 4g
> and prefer the pcimem64-start approach.  We could rename it to
> pcimem64-minimum-address to make more clear this is about keeping some
> space free rather than specifyng a fixed address where the 64bit pci
> bars should be mapped to.  But at the end of the day I don't care too
> much, how we are going to name the baby is just a matter of taste and
> not really critical for the interface ...
Michael,

My preference is the same as Gerd's.
Though if you NACK this approach, I'm fine with relative to 4g approach
as you suggest, the only change I'd like to see in naming is memory
reservation to be replaced with pcimem64, i.e. something like:
 pcimem64-4gb-offset
to reflect value we are actually passing in.

> 
> What is the state of the qemu side patches btw?
I've them ready but they conflict with you 1Tb in e820 RFC,
I can post relevant patches as soon as we agree on this topic.
May I pick up your patch and post it along with pcimem64-start patches?
 
> 
> cheers,
>   Gerd
> 
> 
>
Michael S. Tsirkin Oct. 15, 2013, 9:08 a.m. UTC | #23
On Tue, Oct 15, 2013 at 10:01:01AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > Yes but at the cost of overspecifying it.
> > I think it's down to the name: it's called pcimem64-start
> > but it can actually be less than 4G and we need to worry what to
> > do then. Also, 64 doesn't really mean >4G.
> > 
> > So how about "reserve-memory-over-4g"?
> > bios then does 1ull << 32 + reserve-memory-over-4g
> > to figure out how much to skip.
> 
> We are reaching the point where it becomes pointless bikeshedding ...
> 
> I want a interface which is clearly defined and which doesn't break if
> the way we use the address space above 4g changes (hotplug,
> non-contignous memory, whatever).  So make it depend on the memory
> deployed isn't a clever idea.
> 
> So at the end of the day it comes down to specify an address, either
> relative to 4g (your reserve-memory-over-4g suggestion) or relative to
> zero (Igors pcimem64-start patch).  Both will do the job.  In both cases
> the bios has to check it has no conflicts with known ram regions (i.e.
> compare against 1<<32 + RamSizeAbove4G).

Actually it doesn't: bios doesn't use RAM above 4G value.
It passes it to guest but ignores it itself.
So you can likely boot guest and let it figure it out.


> 
> I personally don't see the point in having the address relative to 4g
> and prefer the pcimem64-start approach.  We could rename it to
> pcimem64-minimum-address to make more clear this is about keeping some
> space free rather than specifyng a fixed address where the 64bit pci
> bars should be mapped to.  But at the end of the day I don't care too
> much, how we are going to name the baby is just a matter of taste and
> not really critical for the interface ...

I agree with this last claim.
Finding a nice name

> What is the state of the qemu side patches btw?
> 
> cheers,
>   Gerd
>
Gerd Hoffmann Oct. 15, 2013, 9:14 a.m. UTC | #24
Hi,

> > What is the state of the qemu side patches btw?
> I've them ready but they conflict with you 1Tb in e820 RFC,
> I can post relevant patches as soon as we agree on this topic.
> May I pick up your patch and post it along with pcimem64-start patches?

Yes, makes sense it just pick it into the series.

cheers,
  Gerd
Michael S. Tsirkin Oct. 15, 2013, 9:16 a.m. UTC | #25
On Tue, Oct 15, 2013 at 11:05:48AM +0200, Igor Mammedov wrote:
> On Tue, 15 Oct 2013 10:01:01 +0200
> Gerd Hoffmann <kraxel@redhat.com> wrote:
> 
> >   Hi,
> > 
> > > Yes but at the cost of overspecifying it.
> > > I think it's down to the name: it's called pcimem64-start
> > > but it can actually be less than 4G and we need to worry what to
> > > do then. Also, 64 doesn't really mean >4G.
> > > 
> > > So how about "reserve-memory-over-4g"?
> > > bios then does 1ull << 32 + reserve-memory-over-4g
> > > to figure out how much to skip.
> > 
> > We are reaching the point where it becomes pointless bikeshedding ...
> > 
> > I want a interface which is clearly defined and which doesn't break if
> > the way we use the address space above 4g changes (hotplug,
> > non-contignous memory, whatever).  So make it depend on the memory
> > deployed isn't a clever idea.
> > 
> > So at the end of the day it comes down to specify an address, either
> > relative to 4g (your reserve-memory-over-4g suggestion) or relative to
> > zero (Igors pcimem64-start patch).  Both will do the job.  In both cases
> > the bios has to check it has no conflicts with known ram regions (i.e.
> > compare against 1<<32 + RamSizeAbove4G).
> > 
> > I personally don't see the point in having the address relative to 4g
> > and prefer the pcimem64-start approach.  We could rename it to
> > pcimem64-minimum-address to make more clear this is about keeping some
> > space free rather than specifyng a fixed address where the 64bit pci
> > bars should be mapped to.  But at the end of the day I don't care too
> > much, how we are going to name the baby is just a matter of taste and
> > not really critical for the interface ...
> Michael,
> 
> My preference is the same as Gerd's.
> Though if you NACK this approach, I'm fine with relative to 4g approach
> as you suggest, the only change I'd like to see in naming is memory
> reservation to be replaced with pcimem64, i.e. something like:
>  pcimem64-4gb-offset
> to reflect value we are actually passing in.

I'm not going to nack.

> > 
> > What is the state of the qemu side patches btw?
> I've them ready but they conflict with you 1Tb in e820 RFC,
> I can post relevant patches as soon as we agree on this topic.
> May I pick up your patch and post it along with pcimem64-start patches?

So for qemu we really need to merge them together with
memory hotplug I think.  It's not a big patch correct?
If it's small there's no need to merge just this interface
first, let's merge it all together.

> > 
> > cheers,
> >   Gerd
> > 
> > 
> >
Gerd Hoffmann Oct. 15, 2013, 9:24 a.m. UTC | #26
Hi,

> > > What is the state of the qemu side patches btw?
> > I've them ready but they conflict with you 1Tb in e820 RFC,
> > I can post relevant patches as soon as we agree on this topic.
> > May I pick up your patch and post it along with pcimem64-start patches?
> 
> So for qemu we really need to merge them together with
> memory hotplug I think.

I'd prefer to not delay them until the full memory hotplug is ready to
go though, for release planning reasons.

Once the pcimem64-$whatever patch is in (+apci-from-qemu) no additional
support is needed in seabios to handle memory hotplug, correct?

So we can go prepare a seabios release when the interfaces are settled
and merged, and continuing the work on memory hotplug can happen in
parallel.

cheers,
  Gerd
Igor Mammedov Oct. 15, 2013, 9:47 a.m. UTC | #27
On Tue, 15 Oct 2013 12:16:43 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Tue, Oct 15, 2013 at 11:05:48AM +0200, Igor Mammedov wrote:
> > On Tue, 15 Oct 2013 10:01:01 +0200
> > Gerd Hoffmann <kraxel@redhat.com> wrote:
> > 
> > >   Hi,
> > > 
> > > > Yes but at the cost of overspecifying it.
> > > > I think it's down to the name: it's called pcimem64-start
> > > > but it can actually be less than 4G and we need to worry what to
> > > > do then. Also, 64 doesn't really mean >4G.
> > > > 
> > > > So how about "reserve-memory-over-4g"?
> > > > bios then does 1ull << 32 + reserve-memory-over-4g
> > > > to figure out how much to skip.
> > > 
> > > We are reaching the point where it becomes pointless bikeshedding ...
> > > 
> > > I want a interface which is clearly defined and which doesn't break if
> > > the way we use the address space above 4g changes (hotplug,
> > > non-contignous memory, whatever).  So make it depend on the memory
> > > deployed isn't a clever idea.
> > > 
> > > So at the end of the day it comes down to specify an address, either
> > > relative to 4g (your reserve-memory-over-4g suggestion) or relative to
> > > zero (Igors pcimem64-start patch).  Both will do the job.  In both cases
> > > the bios has to check it has no conflicts with known ram regions (i.e.
> > > compare against 1<<32 + RamSizeAbove4G).
> > > 
> > > I personally don't see the point in having the address relative to 4g
> > > and prefer the pcimem64-start approach.  We could rename it to
> > > pcimem64-minimum-address to make more clear this is about keeping some
> > > space free rather than specifyng a fixed address where the 64bit pci
> > > bars should be mapped to.  But at the end of the day I don't care too
> > > much, how we are going to name the baby is just a matter of taste and
> > > not really critical for the interface ...
> > Michael,
> > 
> > My preference is the same as Gerd's.
> > Though if you NACK this approach, I'm fine with relative to 4g approach
> > as you suggest, the only change I'd like to see in naming is memory
> > reservation to be replaced with pcimem64, i.e. something like:
> >  pcimem64-4gb-offset
> > to reflect value we are actually passing in.
> 
> I'm not going to nack.
Ok, then I'll repost with suggested "pcimem64-minimum-address" but no other
changes.

> 
> > > 
> > > What is the state of the qemu side patches btw?
> > I've them ready but they conflict with you 1Tb in e820 RFC,
> > I can post relevant patches as soon as we agree on this topic.
> > May I pick up your patch and post it along with pcimem64-start patches?
> 
> So for qemu we really need to merge them together with
> memory hotplug I think.  It's not a big patch correct?
> If it's small there's no need to merge just this interface
> first, let's merge it all together.
It's quite independent from memhotplug so I'll just cherry-pick ~3-4 patches
and post them.

> > > 
> > > cheers,
> > >   Gerd
> > > 
> > > 
> > > 
>
Igor Mammedov Oct. 15, 2013, 9:53 a.m. UTC | #28
On Tue, 15 Oct 2013 11:24:19 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

>   Hi,
> 
> > > > What is the state of the qemu side patches btw?
> > > I've them ready but they conflict with you 1Tb in e820 RFC,
> > > I can post relevant patches as soon as we agree on this topic.
> > > May I pick up your patch and post it along with pcimem64-start patches?
> > 
> > So for qemu we really need to merge them together with
> > memory hotplug I think.
> 
> I'd prefer to not delay them until the full memory hotplug is ready to
> go though, for release planning reasons.
> 
> Once the pcimem64-$whatever patch is in (+apci-from-qemu) no additional
> support is needed in seabios to handle memory hotplug, correct?
almost,
according our latest discussions it will make SMBIOS reflect incorrect
memory devices info after reboot, but that could go before mem hotplug or even
as follow up to make SMBIOS work with non-contiguous memory ranges.

> 
> So we can go prepare a seabios release when the interfaces are settled
> and merged, and continuing the work on memory hotplug can happen in
> parallel.
true, and that will make resulting memhotplug series smaller.

> cheers,
>   Gerd
> 
> 
>
Igor Mammedov Oct. 15, 2013, 12:36 p.m. UTC | #29
On Tue, 15 Oct 2013 11:14:36 +0200
Gerd Hoffmann <kraxel@redhat.com> wrote:

>   Hi,
> 
> > > What is the state of the qemu side patches btw?
> > I've them ready but they conflict with you 1Tb in e820 RFC,
> > I can post relevant patches as soon as we agree on this topic.
> > May I pick up your patch and post it along with pcimem64-start patches?
> 
> Yes, makes sense it just pick it into the series.
it conflicted with one of memory hotplug patches and since I'm not posting
whole series there is no need to resubmit it.

> cheers,
>   Gerd
> 
> 
>
diff mbox

Patch

diff --git a/src/fw/pciinit.c b/src/fw/pciinit.c
index b29db99..798309b 100644
--- a/src/fw/pciinit.c
+++ b/src/fw/pciinit.c
@@ -18,6 +18,8 @@ 
 #include "paravirt.h" // RamSize
 #include "string.h" // memset
 #include "util.h" // pci_setup
+#include "byteorder.h" // le64_to_cpu
+#include "romfile.h" // romfile_loadint
 
 #define PCI_DEVICE_MEM_MIN     0x1000
 #define PCI_BRIDGE_IO_MIN      0x1000
@@ -764,6 +766,15 @@  static void pci_bios_map_devices(struct pci_bus *busses)
 {
     if (pci_bios_init_root_regions(busses)) {
         struct pci_region r64_mem, r64_pref;
+        u64 ram64_end = 0x100000000ULL + RamSizeOver4G;
+        u64 base64 = le64_to_cpu(romfile_loadint("etc/pcimem64-start",
+                                 ram64_end));
+        if (base64 < ram64_end) {
+            dprintf(1, "ignorig etc/pcimem64-start [0x%llx] below present RAM, "
+                       "placing 64-bit PCI window behind RAM end: %llx",
+                        base64, ram64_end);
+            base64 = ram64_end;
+        }
         r64_mem.list.first = NULL;
         r64_pref.list.first = NULL;
         pci_region_migrate_64bit_entries(&busses[0].r[PCI_REGION_TYPE_MEM],
@@ -779,7 +790,7 @@  static void pci_bios_map_devices(struct pci_bus *busses)
         u64 align_mem = pci_region_align(&r64_mem);
         u64 align_pref = pci_region_align(&r64_pref);
 
-        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
+        r64_mem.base = ALIGN(base64, align_mem);
         r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
         pcimem64_start = r64_mem.base;
         pcimem64_end = r64_pref.base + sum_pref;