diff mbox

[v2] pc: allow raising low memory via max-ram-below-4g option

Message ID 1452257883-19549-1-git-send-email-kraxel@redhat.com
State New
Headers show

Commit Message

Gerd Hoffmann Jan. 8, 2016, 12:58 p.m. UTC
This patch extends the functionality of the max-ram-below-4g option
to also allow increasing lowmem.  Use case: Give as much memory as
possible to legacy non-PAE guests.

While being at it also rework the lowmem calculation logic and add a
longish comment describing how it works and what the compatibility
constrains are.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 hw/i386/pc.c      |  2 +-
 hw/i386/pc_piix.c | 61 +++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 40 insertions(+), 23 deletions(-)

Comments

Igor Mammedov Jan. 8, 2016, 5:45 p.m. UTC | #1
On Fri,  8 Jan 2016 13:58:03 +0100
Gerd Hoffmann <kraxel@redhat.com> wrote:

> This patch extends the functionality of the max-ram-below-4g option
> to also allow increasing lowmem.  Use case: Give as much memory as
> possible to legacy non-PAE guests.
> 
> While being at it also rework the lowmem calculation logic and add a
> longish comment describing how it works and what the compatibility
> constrains are.
CCing Laszlo as it might affect OVMF

> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  hw/i386/pc.c      |  2 +-
>  hw/i386/pc_piix.c | 61 +++++++++++++++++++++++++++++++++++--------------------
>  2 files changed, 40 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 459260b..1332269 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1887,7 +1887,7 @@ static void pc_machine_initfn(Object *obj)
>                          pc_machine_get_hotplug_memory_region_size,
>                          NULL, NULL, NULL, &error_abort);
>  
> -    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
> +    pcms->max_ram_below_4g = 0xe0000000; /* 3.5G */
>      object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
>                          pc_machine_get_max_ram_below_4g,
>                          pc_machine_set_max_ram_below_4g,
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 438cdae..3743736 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -87,29 +87,46 @@ static void pc_init1(MachineState *machine,
>      PcGuestInfo *guest_info;
>      ram_addr_t lowmem;
>  
> -    /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> -     * If it doesn't, we need to split it in chunks below and above 4G.
> -     * In any case, try to make sure that guest addresses aligned at
> -     * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
> -     * For old machine types, use whatever split we used historically to avoid
> -     * breaking migration.
> +    /*
> +     * Calculate ram split, for memory below and above 4G.  It's a bit
> +     * complicated for backward compatibility reasons ...
> +     *
> +     *  - Traditional split is 3.5G (lowmem = 0xe0000000).  This is the
> +     *    default value for max_ram_below_4g now.
> +     *
> +     *  - Then, to gigabyte align the memory, we move the split to 3G
> +     *    (lowmem = 0xc0000000).  But only in case we have to split in
> +     *    the first place, i.e. ram_size is larger than (traditional)
> +     *    lowmem.  And for new machine types (gigabyte_align = true)
> +     *    only, for live migration compatibility reasons.
> +     *
> +     *  - Next the max-ram-below-4g option was added, which allowed to
> +     *    reduce lowmem to a smaller value, to allow a larger PCI I/O
> +     *    window below 4G.  qemu doesn't enforce gigabyte alignment here,
> +     *    but prints a warning.
> +     *
> +     *  - Finally max-ram-below-4g got updated to also allow raising lowmem,
> +     *    so legacy non-PAE guests can get as much memory as possible in
> +     *    the 32bit address space below 4G.
> +     *
> +     * Examples:
> +     *    qemu -M pc-1.7 -m 4G    (old default)    -> 3584M low,  512M high
> +     *    qemu -M pc -m 4G        (new default)    -> 3072M low, 1024M high
> +     *    qemu -M pc,max-ram-below-4g=2G -m 4G     -> 2048M low, 2048M high
> +     *    qemu -M pc,max-ram-below-4g=4G -m 3968M  -> 3968M low (=4G-128M)
>       */
> -    if (machine->ram_size >= 0xe0000000) {
> -        lowmem = pcmc->gigabyte_align ? 0xc0000000 : 0xe0000000;
> -    } else {
> -        lowmem = 0xe0000000;
> -    }
> -
> -    /* Handle the machine opt max-ram-below-4g.  It is basically doing
> -     * min(qemu limit, user limit).
> -     */
> -    if (lowmem > pcms->max_ram_below_4g) {
> -        lowmem = pcms->max_ram_below_4g;
> -        if (machine->ram_size - lowmem > lowmem &&
> -            lowmem & ((1ULL << 30) - 1)) {
> -            error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64
> -                         ") not a multiple of 1G; possible bad performance.",
> -                         pcms->max_ram_below_4g);
> +    lowmem = pcms->max_ram_below_4g;
> +    if (machine->ram_size >= pcms->max_ram_below_4g) {
> +        if (pcmc->gigabyte_align) {
> +            if (lowmem > 0xc0000000) {
> +                lowmem = 0xc0000000;
> +            }
> +            if (lowmem & ((1ULL << 30) - 1)) {
> +                error_report("Warning: Large machine and max_ram_below_4g "
> +                             "(%" PRIu64 ") not a multiple of 1G; "
> +                             "possible bad performance.",
> +                             pcms->max_ram_below_4g);
> +            }
>          }
>      }
>
Laszlo Ersek Jan. 8, 2016, 6:32 p.m. UTC | #2
On 01/08/16 18:45, Igor Mammedov wrote:
> On Fri,  8 Jan 2016 13:58:03 +0100
> Gerd Hoffmann <kraxel@redhat.com> wrote:
> 
>> This patch extends the functionality of the max-ram-below-4g option
>> to also allow increasing lowmem.  Use case: Give as much memory as
>> possible to legacy non-PAE guests.
>>
>> While being at it also rework the lowmem calculation logic and add a
>> longish comment describing how it works and what the compatibility
>> constrains are.
> CCing Laszlo as it might affect OVMF

Thanks a lot for the CC, Igor!

So I have to investigate this separately for i440fx and Q35.

(1) For i440fx, OVMF determines the base of the 32-bit PCI hole like this:

      PciBase = (TopOfLowRam < BASE_2GB) ? BASE_2GB : TopOfLowRam;

where TopOfLowRam is calculated from the CMOS registers 0x34 and 0x35.

*If* QEMU is still sticking with the idea of git commit ddaaefb4dd, that
is, the 32-bit PCI hole still starts immediately after the end of low
RAM, then this change should be fine for i440fx.

(The problem used to be the (TopOfLowRam > BASE_2GB) case, when OVMF
allowed BAR allocation right above the end of low RAM, but QEMU didn't
actually start the PCI hole until higher up.)

Gerd, can you confirm that this new logic for the lowmem/highmem split
doesn't affect the above?

In other words, as long as there is no "void" left between the top of
low RAM and the base of the PCI hole, it doesn't matter where exactly
the split is.

(2) For Q35, the OVMF code is different:

//
// A 3GB base will always fall into Q35's 32-bit PCI host aperture,
// regardless of the Q35 MMCONFIG BAR. Correspondingly, QEMU never lets
// the RAM below 4 GB exceed it.
//
PciBase = BASE_2GB + BASE_1GB;
ASSERT (TopOfLowRam <= PciBase);


(This is based on pc_q35_init() in QEMU.)

This patch doesn't change "hw/i386/pc_q35.c", so that looks fine.

The patch does change "hw/i386/pc.c", which I believe might still affect
Q35...

... Hm, as far as I understand pc_q35_init(), the change in
"hw/i386/pc.c" will only cause the default user limit to move *down*
half a gig. The previous default user limit was 4G (i.e., not a limit at
all), and the new default is 3.5 GB.

And, in any case, the user limit continues to *lower* the split only,
from the initial 0x80000000 (2GB) or 0xb0000000 (3GB). So Q35 looks good
too.

Bottom line, I think the patch should be fine -- famous last words -- as
long as the idea of git commit ddaaefb4dd is still intact in QEMU:
- in Q35 the split cannot be raised
- in i440fx the split *can* be raised, but OVMF deals with that, as
  long as QEMU's 32-bit PCI hole still starts right after the split.

... I propose to replace the "pc:" prefix in the subject with "piix:" or
"i440fx:".

Thanks
Laszlo


> 
>>
>> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
>> ---
>>  hw/i386/pc.c      |  2 +-
>>  hw/i386/pc_piix.c | 61 +++++++++++++++++++++++++++++++++++--------------------
>>  2 files changed, 40 insertions(+), 23 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 459260b..1332269 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -1887,7 +1887,7 @@ static void pc_machine_initfn(Object *obj)
>>                          pc_machine_get_hotplug_memory_region_size,
>>                          NULL, NULL, NULL, &error_abort);
>>  
>> -    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
>> +    pcms->max_ram_below_4g = 0xe0000000; /* 3.5G */
>>      object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
>>                          pc_machine_get_max_ram_below_4g,
>>                          pc_machine_set_max_ram_below_4g,
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 438cdae..3743736 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -87,29 +87,46 @@ static void pc_init1(MachineState *machine,
>>      PcGuestInfo *guest_info;
>>      ram_addr_t lowmem;
>>  
>> -    /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
>> -     * If it doesn't, we need to split it in chunks below and above 4G.
>> -     * In any case, try to make sure that guest addresses aligned at
>> -     * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
>> -     * For old machine types, use whatever split we used historically to avoid
>> -     * breaking migration.
>> +    /*
>> +     * Calculate ram split, for memory below and above 4G.  It's a bit
>> +     * complicated for backward compatibility reasons ...
>> +     *
>> +     *  - Traditional split is 3.5G (lowmem = 0xe0000000).  This is the
>> +     *    default value for max_ram_below_4g now.
>> +     *
>> +     *  - Then, to gigabyte align the memory, we move the split to 3G
>> +     *    (lowmem = 0xc0000000).  But only in case we have to split in
>> +     *    the first place, i.e. ram_size is larger than (traditional)
>> +     *    lowmem.  And for new machine types (gigabyte_align = true)
>> +     *    only, for live migration compatibility reasons.
>> +     *
>> +     *  - Next the max-ram-below-4g option was added, which allowed to
>> +     *    reduce lowmem to a smaller value, to allow a larger PCI I/O
>> +     *    window below 4G.  qemu doesn't enforce gigabyte alignment here,
>> +     *    but prints a warning.
>> +     *
>> +     *  - Finally max-ram-below-4g got updated to also allow raising lowmem,
>> +     *    so legacy non-PAE guests can get as much memory as possible in
>> +     *    the 32bit address space below 4G.
>> +     *
>> +     * Examples:
>> +     *    qemu -M pc-1.7 -m 4G    (old default)    -> 3584M low,  512M high
>> +     *    qemu -M pc -m 4G        (new default)    -> 3072M low, 1024M high
>> +     *    qemu -M pc,max-ram-below-4g=2G -m 4G     -> 2048M low, 2048M high
>> +     *    qemu -M pc,max-ram-below-4g=4G -m 3968M  -> 3968M low (=4G-128M)
>>       */
>> -    if (machine->ram_size >= 0xe0000000) {
>> -        lowmem = pcmc->gigabyte_align ? 0xc0000000 : 0xe0000000;
>> -    } else {
>> -        lowmem = 0xe0000000;
>> -    }
>> -
>> -    /* Handle the machine opt max-ram-below-4g.  It is basically doing
>> -     * min(qemu limit, user limit).
>> -     */
>> -    if (lowmem > pcms->max_ram_below_4g) {
>> -        lowmem = pcms->max_ram_below_4g;
>> -        if (machine->ram_size - lowmem > lowmem &&
>> -            lowmem & ((1ULL << 30) - 1)) {
>> -            error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64
>> -                         ") not a multiple of 1G; possible bad performance.",
>> -                         pcms->max_ram_below_4g);
>> +    lowmem = pcms->max_ram_below_4g;
>> +    if (machine->ram_size >= pcms->max_ram_below_4g) {
>> +        if (pcmc->gigabyte_align) {
>> +            if (lowmem > 0xc0000000) {
>> +                lowmem = 0xc0000000;
>> +            }
>> +            if (lowmem & ((1ULL << 30) - 1)) {
>> +                error_report("Warning: Large machine and max_ram_below_4g "
>> +                             "(%" PRIu64 ") not a multiple of 1G; "
>> +                             "possible bad performance.",
>> +                             pcms->max_ram_below_4g);
>> +            }
>>          }
>>      }
>>  
> 
>
Gerd Hoffmann Jan. 11, 2016, 8:26 a.m. UTC | #3
On Fr, 2016-01-08 at 19:32 +0100, Laszlo Ersek wrote:
> On 01/08/16 18:45, Igor Mammedov wrote:
> > On Fri,  8 Jan 2016 13:58:03 +0100
> > Gerd Hoffmann <kraxel@redhat.com> wrote:
> > 
> >> This patch extends the functionality of the max-ram-below-4g option
> >> to also allow increasing lowmem.  Use case: Give as much memory as
> >> possible to legacy non-PAE guests.
> >>
> >> While being at it also rework the lowmem calculation logic and add a
> >> longish comment describing how it works and what the compatibility
> >> constrains are.
> > CCing Laszlo as it might affect OVMF
> 
> Thanks a lot for the CC, Igor!
> 
> So I have to investigate this separately for i440fx and Q35.
> 
> (1) For i440fx, OVMF determines the base of the 32-bit PCI hole like this:
> 
>       PciBase = (TopOfLowRam < BASE_2GB) ? BASE_2GB : TopOfLowRam;
> 
> where TopOfLowRam is calculated from the CMOS registers 0x34 and 0x35.
> 
> *If* QEMU is still sticking with the idea of git commit ddaaefb4dd, that
> is, the 32-bit PCI hole still starts immediately after the end of low
> RAM, then this change should be fine for i440fx.

Good.

> Gerd, can you confirm that this new logic for the lowmem/highmem split
> doesn't affect the above?
> 
> In other words, as long as there is no "void" left between the top of
> low RAM and the base of the PCI hole, it doesn't matter where exactly
> the split is.

Yes, the logic is the same as before.  Anything above ram is pci i/o.

> (2) For Q35, the OVMF code is different:

The patch doesn't change q35 behavior.

cheers,
  Gerd
Laszlo Ersek Jan. 11, 2016, 12:16 p.m. UTC | #4
On 01/11/16 09:26, Gerd Hoffmann wrote:
> On Fr, 2016-01-08 at 19:32 +0100, Laszlo Ersek wrote:
>> On 01/08/16 18:45, Igor Mammedov wrote:
>>> On Fri,  8 Jan 2016 13:58:03 +0100
>>> Gerd Hoffmann <kraxel@redhat.com> wrote:
>>>
>>>> This patch extends the functionality of the max-ram-below-4g option
>>>> to also allow increasing lowmem.  Use case: Give as much memory as
>>>> possible to legacy non-PAE guests.
>>>>
>>>> While being at it also rework the lowmem calculation logic and add a
>>>> longish comment describing how it works and what the compatibility
>>>> constrains are.
>>> CCing Laszlo as it might affect OVMF
>>
>> Thanks a lot for the CC, Igor!
>>
>> So I have to investigate this separately for i440fx and Q35.
>>
>> (1) For i440fx, OVMF determines the base of the 32-bit PCI hole like this:
>>
>>       PciBase = (TopOfLowRam < BASE_2GB) ? BASE_2GB : TopOfLowRam;
>>
>> where TopOfLowRam is calculated from the CMOS registers 0x34 and 0x35.
>>
>> *If* QEMU is still sticking with the idea of git commit ddaaefb4dd, that
>> is, the 32-bit PCI hole still starts immediately after the end of low
>> RAM, then this change should be fine for i440fx.
> 
> Good.
> 
>> Gerd, can you confirm that this new logic for the lowmem/highmem split
>> doesn't affect the above?
>>
>> In other words, as long as there is no "void" left between the top of
>> low RAM and the base of the PCI hole, it doesn't matter where exactly
>> the split is.
> 
> Yes, the logic is the same as before.  Anything above ram is pci i/o.
> 
>> (2) For Q35, the OVMF code is different:
> 
> The patch doesn't change q35 behavior.

Thanks for confirming!

Acked-by: Laszlo Ersek <lersek@redhat.com>
Marcel Apfelbaum Jan. 11, 2016, 12:26 p.m. UTC | #5
On 01/08/2016 02:58 PM, Gerd Hoffmann wrote:
> This patch extends the functionality of the max-ram-below-4g option
> to also allow increasing lowmem.  Use case: Give as much memory as
> possible to legacy non-PAE guests.
>
> While being at it also rework the lowmem calculation logic and add a
> longish comment describing how it works and what the compatibility
> constrains are.
>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>   hw/i386/pc.c      |  2 +-
>   hw/i386/pc_piix.c | 61 +++++++++++++++++++++++++++++++++++--------------------
>   2 files changed, 40 insertions(+), 23 deletions(-)
>
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 459260b..1332269 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1887,7 +1887,7 @@ static void pc_machine_initfn(Object *obj)
>                           pc_machine_get_hotplug_memory_region_size,
>                           NULL, NULL, NULL, &error_abort);
>
> -    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
> +    pcms->max_ram_below_4g = 0xe0000000; /* 3.5G */
>       object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
>                           pc_machine_get_max_ram_below_4g,
>                           pc_machine_set_max_ram_below_4g,
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 438cdae..3743736 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -87,29 +87,46 @@ static void pc_init1(MachineState *machine,
>       PcGuestInfo *guest_info;
>       ram_addr_t lowmem;
>
> -    /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> -     * If it doesn't, we need to split it in chunks below and above 4G.
> -     * In any case, try to make sure that guest addresses aligned at
> -     * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
> -     * For old machine types, use whatever split we used historically to avoid
> -     * breaking migration.
> +    /*
> +     * Calculate ram split, for memory below and above 4G.  It's a bit
> +     * complicated for backward compatibility reasons ...
> +     *
> +     *  - Traditional split is 3.5G (lowmem = 0xe0000000).  This is the
> +     *    default value for max_ram_below_4g now.
> +     *
> +     *  - Then, to gigabyte align the memory, we move the split to 3G
> +     *    (lowmem = 0xc0000000).  But only in case we have to split in
> +     *    the first place, i.e. ram_size is larger than (traditional)
> +     *    lowmem.  And for new machine types (gigabyte_align = true)
> +     *    only, for live migration compatibility reasons.
> +     *
> +     *  - Next the max-ram-below-4g option was added, which allowed to
> +     *    reduce lowmem to a smaller value, to allow a larger PCI I/O
> +     *    window below 4G.  qemu doesn't enforce gigabyte alignment here,
> +     *    but prints a warning.
> +     *
> +     *  - Finally max-ram-below-4g got updated to also allow raising lowmem,
> +     *    so legacy non-PAE guests can get as much memory as possible in
> +     *    the 32bit address space below 4G.
> +     *
> +     * Examples:
> +     *    qemu -M pc-1.7 -m 4G    (old default)    -> 3584M low,  512M high
> +     *    qemu -M pc -m 4G        (new default)    -> 3072M low, 1024M high
> +     *    qemu -M pc,max-ram-below-4g=2G -m 4G     -> 2048M low, 2048M high
> +     *    qemu -M pc,max-ram-below-4g=4G -m 3968M  -> 3968M low (=4G-128M)
>        */
> -    if (machine->ram_size >= 0xe0000000) {
> -        lowmem = pcmc->gigabyte_align ? 0xc0000000 : 0xe0000000;
> -    } else {
> -        lowmem = 0xe0000000;
> -    }
> -
> -    /* Handle the machine opt max-ram-below-4g.  It is basically doing
> -     * min(qemu limit, user limit).
> -     */
> -    if (lowmem > pcms->max_ram_below_4g) {
> -        lowmem = pcms->max_ram_below_4g;
> -        if (machine->ram_size - lowmem > lowmem &&
> -            lowmem & ((1ULL << 30) - 1)) {
> -            error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64
> -                         ") not a multiple of 1G; possible bad performance.",
> -                         pcms->max_ram_below_4g);
> +    lowmem = pcms->max_ram_below_4g;
> +    if (machine->ram_size >= pcms->max_ram_below_4g) {
> +        if (pcmc->gigabyte_align) {
> +            if (lowmem > 0xc0000000) {
> +                lowmem = 0xc0000000;
> +            }
> +            if (lowmem & ((1ULL << 30) - 1)) {
> +                error_report("Warning: Large machine and max_ram_below_4g "
> +                             "(%" PRIu64 ") not a multiple of 1G; "
> +                             "possible bad performance.",
> +                             pcms->max_ram_below_4g);
> +            }
>           }
>       }
>
>

Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>

Thank you for the detailed explanation!
Marcel
Eric Blake Jan. 14, 2016, 11:45 p.m. UTC | #6
On 01/08/2016 05:58 AM, Gerd Hoffmann wrote:
> This patch extends the functionality of the max-ram-below-4g option
> to also allow increasing lowmem.  Use case: Give as much memory as
> possible to legacy non-PAE guests.
> 
> While being at it also rework the lowmem calculation logic and add a
> longish comment describing how it works and what the compatibility
> constrains are.

s/constrains/constraints/

> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---

> +            if (lowmem & ((1ULL << 30) - 1)) {
> +                error_report("Warning: Large machine and max_ram_below_4g "
> +                             "(%" PRIu64 ") not a multiple of 1G; "
> +                             "possible bad performance.",

No trailing '.' in error_report(), please.
Eduardo Habkost Jan. 19, 2016, 12:37 p.m. UTC | #7
On Fri, Jan 08, 2016 at 01:58:03PM +0100, Gerd Hoffmann wrote:
> This patch extends the functionality of the max-ram-below-4g option
> to also allow increasing lowmem.  Use case: Give as much memory as
> possible to legacy non-PAE guests.
> 
> While being at it also rework the lowmem calculation logic and add a
> longish comment describing how it works and what the compatibility
> constrains are.
> 
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
> ---
>  hw/i386/pc.c      |  2 +-
>  hw/i386/pc_piix.c | 61 +++++++++++++++++++++++++++++++++++--------------------
>  2 files changed, 40 insertions(+), 23 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 459260b..1332269 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1887,7 +1887,7 @@ static void pc_machine_initfn(Object *obj)
>                          pc_machine_get_hotplug_memory_region_size,
>                          NULL, NULL, NULL, &error_abort);
>  
> -    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
> +    pcms->max_ram_below_4g = 0xe0000000; /* 3.5G */
>      object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
>                          pc_machine_get_max_ram_below_4g,
>                          pc_machine_set_max_ram_below_4g,
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 438cdae..3743736 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -87,29 +87,46 @@ static void pc_init1(MachineState *machine,
>      PcGuestInfo *guest_info;
>      ram_addr_t lowmem;
>  
> -    /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> -     * If it doesn't, we need to split it in chunks below and above 4G.
> -     * In any case, try to make sure that guest addresses aligned at
> -     * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
> -     * For old machine types, use whatever split we used historically to avoid
> -     * breaking migration.
> +    /*
> +     * Calculate ram split, for memory below and above 4G.  It's a bit
> +     * complicated for backward compatibility reasons ...
> +     *
> +     *  - Traditional split is 3.5G (lowmem = 0xe0000000).  This is the
> +     *    default value for max_ram_below_4g now.
> +     *
> +     *  - Then, to gigabyte align the memory, we move the split to 3G
> +     *    (lowmem = 0xc0000000).  But only in case we have to split in
> +     *    the first place, i.e. ram_size is larger than (traditional)
> +     *    lowmem.  And for new machine types (gigabyte_align = true)
> +     *    only, for live migration compatibility reasons.
> +     *
> +     *  - Next the max-ram-below-4g option was added, which allowed to
> +     *    reduce lowmem to a smaller value, to allow a larger PCI I/O
> +     *    window below 4G.  qemu doesn't enforce gigabyte alignment here,
> +     *    but prints a warning.
> +     *
> +     *  - Finally max-ram-below-4g got updated to also allow raising lowmem,
> +     *    so legacy non-PAE guests can get as much memory as possible in
> +     *    the 32bit address space below 4G.
> +     *
> +     * Examples:
> +     *    qemu -M pc-1.7 -m 4G    (old default)    -> 3584M low,  512M high
> +     *    qemu -M pc -m 4G        (new default)    -> 3072M low, 1024M high
> +     *    qemu -M pc,max-ram-below-4g=2G -m 4G     -> 2048M low, 2048M high
> +     *    qemu -M pc,max-ram-below-4g=4G -m 3968M  -> 3968M low (=4G-128M)

I assume max-ram-below-4g > 3.5G was unsupported before, and we
are breaking compatibility intentionally.

But what exactly is the set of values for max-ram-below-4g we
must keep working without breaking? Because this patch also
changes the resulting memory layout when
  3G < max_ram_below_4g < ram_size < 3.5G
e.g.:
  qemu -M pc,max-ram-below-4g=3200M -m 3328M
Gerd Hoffmann Jan. 20, 2016, 2:55 p.m. UTC | #8
Hi,

> > +     *    qemu -M pc,max-ram-below-4g=2G -m 4G     -> 2048M low, 2048M high

> I assume max-ram-below-4g > 3.5G was unsupported before, and we
> are breaking compatibility intentionally.

max-ram-below-4g did only reduce memory, so max-ram-below-4g > 3.5G (or
max-ram-below-4g > 3G with gigabyte align) had no effect and therefore
is something pretty pointless.

I'd expect the one case quoted above to be the only case relevant in
practice (i.e. move split from 3G to 2G for more PCI I/O space),
especially given that the option was added after gigabyte alignment
support.

> Because this patch also
> changes the resulting memory layout when
>   3G < max_ram_below_4g < ram_size < 3.5G
> e.g.:
>   qemu -M pc,max-ram-below-4g=3200M -m 3328M

Ah, I see.  With the patch applied the gigabyte align option is weighed
higher, so qemu wouldn't give you 3200M lowmem.  I would be highly
surprised to see such a configuration in the wild ...

cheers,
  Gerd
Eduardo Habkost Jan. 20, 2016, 3:34 p.m. UTC | #9
On Wed, Jan 20, 2016 at 03:55:33PM +0100, Gerd Hoffmann wrote:
>   Hi,
> 
> > > +     *    qemu -M pc,max-ram-below-4g=2G -m 4G     -> 2048M low, 2048M high
> 
> > I assume max-ram-below-4g > 3.5G was unsupported before, and we
> > are breaking compatibility intentionally.
> 
> max-ram-below-4g did only reduce memory, so max-ram-below-4g > 3.5G (or
> max-ram-below-4g > 3G with gigabyte align) had no effect and therefore
> is something pretty pointless.

I see. So, if max-ram-below-4g > 3G with gigabyte align was
unsupported too, my example below is also not relevant regarding
compatibility.

> 
> I'd expect the one case quoted above to be the only case relevant in
> practice (i.e. move split from 3G to 2G for more PCI I/O space),
> especially given that the option was added after gigabyte alignment
> support.
> 
> > Because this patch also
> > changes the resulting memory layout when
> >   3G < max_ram_below_4g < ram_size < 3.5G
> > e.g.:
> >   qemu -M pc,max-ram-below-4g=3200M -m 3328M
> 
> Ah, I see.  With the patch applied the gigabyte align option is weighed
> higher, so qemu wouldn't give you 3200M lowmem.  I would be highly
> surprised to see such a configuration in the wild ...

The new behavior looks OK, considering that the option is just
"_max_ RAM below 4G", not "split lowmem at this exact address".
But it is inconsistent with the behavior when
max-ram-below-4g < 3G (where gigabyte_align is ignored), and
something we will need to keep compatibility for a long time.

Considering that we never supported
  gigabyte_align && max_ram_below_4g > 3G ||
  max_ram_below_4g > 3.5G
before, we could simply remove the MachineClass::gigabyte_align
field from pc_piix, and just do the following:

* pc > 1.7: max_ram_below_4g = 3G
  (equivalent to gigabyte_align=true)
* pc <= 1.7: max_ram_below_4g = 3.5G
  (equivalent to gigabyte_align=false)
Eduardo Habkost Jan. 20, 2016, 5:15 p.m. UTC | #10
On Wed, Jan 20, 2016 at 01:34:29PM -0200, Eduardo Habkost wrote:
[...]
> Considering that we never supported
>   gigabyte_align && max_ram_below_4g > 3G ||
>   max_ram_below_4g > 3.5G
> before, we could simply remove the MachineClass::gigabyte_align
> field from pc_piix, and just do the following:
> 
> * pc > 1.7: max_ram_below_4g = 3G
>   (equivalent to gigabyte_align=true)
> * pc <= 1.7: max_ram_below_4g = 3.5G
>   (equivalent to gigabyte_align=false)

Ignore the suggestion above. I forgot that gigabyte_align applies
only if ram_size > 3.5GB (so setting max_ram_below_4g = 3G on
pc > 1.7 wouldn't work). So, unless somebody has a suggestion
that makes this logic simpler:

Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Michael S. Tsirkin Jan. 20, 2016, 5:25 p.m. UTC | #11
On Wed, Jan 20, 2016 at 03:15:04PM -0200, Eduardo Habkost wrote:
> On Wed, Jan 20, 2016 at 01:34:29PM -0200, Eduardo Habkost wrote:
> [...]
> > Considering that we never supported
> >   gigabyte_align && max_ram_below_4g > 3G ||
> >   max_ram_below_4g > 3.5G
> > before, we could simply remove the MachineClass::gigabyte_align
> > field from pc_piix, and just do the following:
> > 
> > * pc > 1.7: max_ram_below_4g = 3G
> >   (equivalent to gigabyte_align=true)
> > * pc <= 1.7: max_ram_below_4g = 3.5G
> >   (equivalent to gigabyte_align=false)
> 
> Ignore the suggestion above. I forgot that gigabyte_align applies
> only if ram_size > 3.5GB (so setting max_ram_below_4g = 3G on
> pc > 1.7 wouldn't work). So, unless somebody has a suggestion
> that makes this logic simpler:

I wonder whether we should just bite the bullet and ask management to
maintain the physical memory map for us, instead of trying to give us
hints.

Thoughts?


> Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
>
> -- 
> Eduardo
Gerd Hoffmann Jan. 21, 2016, 7:48 a.m. UTC | #12
On Mi, 2016-01-20 at 19:25 +0200, Michael S. Tsirkin wrote:
> On Wed, Jan 20, 2016 at 03:15:04PM -0200, Eduardo Habkost wrote:
> > On Wed, Jan 20, 2016 at 01:34:29PM -0200, Eduardo Habkost wrote:
> > [...]
> > > Considering that we never supported
> > >   gigabyte_align && max_ram_below_4g > 3G ||
> > >   max_ram_below_4g > 3.5G
> > > before, we could simply remove the MachineClass::gigabyte_align
> > > field from pc_piix, and just do the following:
> > > 
> > > * pc > 1.7: max_ram_below_4g = 3G
> > >   (equivalent to gigabyte_align=true)
> > > * pc <= 1.7: max_ram_below_4g = 3.5G
> > >   (equivalent to gigabyte_align=false)
> > 
> > Ignore the suggestion above. I forgot that gigabyte_align applies
> > only if ram_size > 3.5GB (so setting max_ram_below_4g = 3G on
> > pc > 1.7 wouldn't work). So, unless somebody has a suggestion
> > that makes this logic simpler:
> 
> I wonder whether we should just bite the bullet and ask management to
> maintain the physical memory map for us, instead of trying to give us
> hints.

I doubt this simplified things, given the backward compatibility
constrains we have.

cheers,
  Gerd
Michael S. Tsirkin Jan. 21, 2016, 9:37 a.m. UTC | #13
On Thu, Jan 21, 2016 at 08:48:53AM +0100, Gerd Hoffmann wrote:
> On Mi, 2016-01-20 at 19:25 +0200, Michael S. Tsirkin wrote:
> > On Wed, Jan 20, 2016 at 03:15:04PM -0200, Eduardo Habkost wrote:
> > > On Wed, Jan 20, 2016 at 01:34:29PM -0200, Eduardo Habkost wrote:
> > > [...]
> > > > Considering that we never supported
> > > >   gigabyte_align && max_ram_below_4g > 3G ||
> > > >   max_ram_below_4g > 3.5G
> > > > before, we could simply remove the MachineClass::gigabyte_align
> > > > field from pc_piix, and just do the following:
> > > > 
> > > > * pc > 1.7: max_ram_below_4g = 3G
> > > >   (equivalent to gigabyte_align=true)
> > > > * pc <= 1.7: max_ram_below_4g = 3.5G
> > > >   (equivalent to gigabyte_align=false)
> > > 
> > > Ignore the suggestion above. I forgot that gigabyte_align applies
> > > only if ram_size > 3.5GB (so setting max_ram_below_4g = 3G on
> > > pc > 1.7 wouldn't work). So, unless somebody has a suggestion
> > > that makes this logic simpler:
> > 
> > I wonder whether we should just bite the bullet and ask management to
> > maintain the physical memory map for us, instead of trying to give us
> > hints.
> 
> I doubt this simplified things, given the backward compatibility
> constrains we have.
> 
> cheers,
>   Gerd

That's exactly what would become simple.
For backwards compatibility we would leave things alone
if the new flags for the memory map aren't specified.

This would allow people to e.g. allocate phy address
ranges for things like nvdimm which has been
problematic in the past.


The issue as I see it is not compatibility, but rather creating a way
for both management and command line users to figure out valid address
ranges. And, it all might just be too complex for users.
Gerd Hoffmann Jan. 22, 2016, 10:51 a.m. UTC | #14
Hi,

> > > I wonder whether we should just bite the bullet and ask management to
> > > maintain the physical memory map for us, instead of trying to give us
> > > hints.
> > 
> > I doubt this simplified things, given the backward compatibility
> > constrains we have.
> > 
> > cheers,
> >   Gerd
> 
> That's exactly what would become simple.
> For backwards compatibility we would leave things alone
> if the new flags for the memory map aren't specified.

But we'll add a bunch of new code for the new config mode which allows
management to maintain the physical memory map.  And we'll expect
management know about a bunch of machine type internals.  That isn't a
simplification.

> This would allow people to e.g. allocate phy address
> ranges for things like nvdimm which has been
> problematic in the past.

Didn't follow nvdimm discussions.  If you think we really need that
anyway to solve certain issues, sure, go ahead and I happily adjust this
patch to use the new infrastructure.

cheers,
  Gerd
Michael S. Tsirkin Jan. 24, 2016, 6:37 a.m. UTC | #15
On Fri, Jan 22, 2016 at 11:51:54AM +0100, Gerd Hoffmann wrote:
>   Hi,
> 
> > > > I wonder whether we should just bite the bullet and ask management to
> > > > maintain the physical memory map for us, instead of trying to give us
> > > > hints.
> > > 
> > > I doubt this simplified things, given the backward compatibility
> > > constrains we have.
> > > 
> > > cheers,
> > >   Gerd
> > 
> > That's exactly what would become simple.
> > For backwards compatibility we would leave things alone
> > if the new flags for the memory map aren't specified.
> 
> But we'll add a bunch of new code for the new config mode which allows
> management to maintain the physical memory map.  And we'll expect
> management know about a bunch of machine type internals.


Yes we don't want that. I was vaguely thinking some kind
of query that reports the required info so management
just has to maintain that.


>  That isn't a
> simplification.
> 
> > This would allow people to e.g. allocate phy address
> > ranges for things like nvdimm which has been
> > problematic in the past.
> 
> Didn't follow nvdimm discussions.  If you think we really need that
> anyway to solve certain issues, sure, go ahead and I happily adjust this
> patch to use the new infrastructure.
> 
> cheers,
>   Gerd


I'd like to gather some feedback from management folk first.
diff mbox

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 459260b..1332269 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1887,7 +1887,7 @@  static void pc_machine_initfn(Object *obj)
                         pc_machine_get_hotplug_memory_region_size,
                         NULL, NULL, NULL, &error_abort);
 
-    pcms->max_ram_below_4g = 1ULL << 32; /* 4G */
+    pcms->max_ram_below_4g = 0xe0000000; /* 3.5G */
     object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
                         pc_machine_get_max_ram_below_4g,
                         pc_machine_set_max_ram_below_4g,
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 438cdae..3743736 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -87,29 +87,46 @@  static void pc_init1(MachineState *machine,
     PcGuestInfo *guest_info;
     ram_addr_t lowmem;
 
-    /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
-     * If it doesn't, we need to split it in chunks below and above 4G.
-     * In any case, try to make sure that guest addresses aligned at
-     * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
-     * For old machine types, use whatever split we used historically to avoid
-     * breaking migration.
+    /*
+     * Calculate ram split, for memory below and above 4G.  It's a bit
+     * complicated for backward compatibility reasons ...
+     *
+     *  - Traditional split is 3.5G (lowmem = 0xe0000000).  This is the
+     *    default value for max_ram_below_4g now.
+     *
+     *  - Then, to gigabyte align the memory, we move the split to 3G
+     *    (lowmem = 0xc0000000).  But only in case we have to split in
+     *    the first place, i.e. ram_size is larger than (traditional)
+     *    lowmem.  And for new machine types (gigabyte_align = true)
+     *    only, for live migration compatibility reasons.
+     *
+     *  - Next the max-ram-below-4g option was added, which allowed to
+     *    reduce lowmem to a smaller value, to allow a larger PCI I/O
+     *    window below 4G.  qemu doesn't enforce gigabyte alignment here,
+     *    but prints a warning.
+     *
+     *  - Finally max-ram-below-4g got updated to also allow raising lowmem,
+     *    so legacy non-PAE guests can get as much memory as possible in
+     *    the 32bit address space below 4G.
+     *
+     * Examples:
+     *    qemu -M pc-1.7 -m 4G    (old default)    -> 3584M low,  512M high
+     *    qemu -M pc -m 4G        (new default)    -> 3072M low, 1024M high
+     *    qemu -M pc,max-ram-below-4g=2G -m 4G     -> 2048M low, 2048M high
+     *    qemu -M pc,max-ram-below-4g=4G -m 3968M  -> 3968M low (=4G-128M)
      */
-    if (machine->ram_size >= 0xe0000000) {
-        lowmem = pcmc->gigabyte_align ? 0xc0000000 : 0xe0000000;
-    } else {
-        lowmem = 0xe0000000;
-    }
-
-    /* Handle the machine opt max-ram-below-4g.  It is basically doing
-     * min(qemu limit, user limit).
-     */
-    if (lowmem > pcms->max_ram_below_4g) {
-        lowmem = pcms->max_ram_below_4g;
-        if (machine->ram_size - lowmem > lowmem &&
-            lowmem & ((1ULL << 30) - 1)) {
-            error_report("Warning: Large machine and max_ram_below_4g(%"PRIu64
-                         ") not a multiple of 1G; possible bad performance.",
-                         pcms->max_ram_below_4g);
+    lowmem = pcms->max_ram_below_4g;
+    if (machine->ram_size >= pcms->max_ram_below_4g) {
+        if (pcmc->gigabyte_align) {
+            if (lowmem > 0xc0000000) {
+                lowmem = 0xc0000000;
+            }
+            if (lowmem & ((1ULL << 30) - 1)) {
+                error_report("Warning: Large machine and max_ram_below_4g "
+                             "(%" PRIu64 ") not a multiple of 1G; "
+                             "possible bad performance.",
+                             pcms->max_ram_below_4g);
+            }
         }
     }