diff mbox

[v5,2/3] pc & q35: Add new machine opt max-ram-below-4g

Message ID 1402077126-17799-3-git-send-email-dslutz@verizon.com
State New
Headers show

Commit Message

Don Slutz June 6, 2014, 5:52 p.m. UTC
This is a pc & q35 only machine opt.  One use is to allow for more
ram in a 32bit guest for example:

-machine pc,max-ram-below-4g=3.75G

If you add enough PCI devices then all mmio for them will not fit
below 4G which may not be the layout the user wanted. This allows
you to increase the below 4G address space that PCI devices can use
(aka decrease ram below 4G) and therefore in more cases not have any
mmio that is above 4G.

For example using "-machine pc,max-ram-below-4g=2G" on the command
line will limit the amount of ram that is below 4G to 2G.

Signed-off-by: Don Slutz <dslutz@verizon.com>
---
v5:
  Re-work based on:

  https://github.com/imammedo/qemu/commits/memory-hotplug-v11


 hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
 hw/i386/pc_piix.c    | 15 ++++++++++++---
 hw/i386/pc_q35.c     | 15 ++++++++++++---
 include/hw/i386/pc.h |  3 +++
 vl.c                 |  4 ++++
 5 files changed, 69 insertions(+), 6 deletions(-)

Comments

Michael S. Tsirkin June 8, 2014, 3:40 p.m. UTC | #1
On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> This is a pc & q35 only machine opt.  One use is to allow for more
> ram in a 32bit guest for example:
> 
> -machine pc,max-ram-below-4g=3.75G
> 
> If you add enough PCI devices then all mmio for them will not fit
> below 4G which may not be the layout the user wanted. This allows
> you to increase the below 4G address space that PCI devices can use
> (aka decrease ram below 4G) and therefore in more cases not have any
> mmio that is above 4G.
> 
> For example using "-machine pc,max-ram-below-4g=2G" on the command
> line will limit the amount of ram that is below 4G to 2G.
> 
> Signed-off-by: Don Slutz <dslutz@verizon.com>
> ---
> v5:
>   Re-work based on:
> 
>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> 
> 
>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
>  hw/i386/pc_piix.c    | 15 ++++++++++++---
>  hw/i386/pc_q35.c     | 15 ++++++++++++---
>  include/hw/i386/pc.h |  3 +++
>  vl.c                 |  4 ++++
>  5 files changed, 69 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 7cdba10..bccb746 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
>      visit_type_int(v, &value, name, errp);
>  }
>  
> +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> +                                         void *opaque, const char *name,
> +                                         Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    uint64_t value = pcms->max_ram_below_4g;
> +
> +    visit_type_size(v, &value, name, errp);
> +}
> +
> +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> +                                         void *opaque, const char *name,
> +                                         Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    Error *error = NULL;
> +    uint64_t value;
> +
> +    visit_type_size(v, &value, name, &error);
> +    if (error) {
> +        error_propagate(errp, error);
> +        return;
> +    }
> +    if (value > (1ULL << 32)) {
> +        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> +                  "Machine option 'max-ram-below-4g=%"PRIu64
> +                  "' expects size less then or equal to 4G", value);

less than

> +        error_propagate(errp, error);
> +        return;
> +    }
> +
> +    pcms->max_ram_below_4g = value;
> +}
> +
>  static void pc_machine_initfn(Object *obj)
>  {
>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
>                          pc_machine_get_hotplug_memory_region_size,
>                          NULL, NULL, NULL, NULL);
> +    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> +                        pc_machine_get_max_ram_below_4g,
> +                        pc_machine_set_max_ram_below_4g,
> +                        NULL, NULL, NULL);
>  }
>  
>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 40f6eaf..25f4727 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
>      DeviceState *icc_bridge;
>      FWCfgState *fw_cfg = NULL;
>      PcGuestInfo *guest_info;
> +    Object *mo = qdev_get_machine();
> +    PCMachineState *pcms = PC_MACHINE(mo);
> +    ram_addr_t lowmem = 0xe0000000;
> +
> +    if (pcms && pcms->max_ram_below_4g) {

Is pcms ever NULL? If yes why?

> +        lowmem = pcms->max_ram_below_4g;
> +    }
>  
>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
>       * If it doesn't, we need to split it in chunks below and above 4G.
> @@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
>       * For old machine types, use whatever split we used historically to avoid
>       * breaking migration.
>       */
> -    if (machine->ram_size >= 0xe0000000) {
> -        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> +    if (machine->ram_size >= lowmem) {
> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> +            lowmem = 0xc0000000;
> +        }
>          above_4g_mem_size = machine->ram_size - lowmem;
>          below_4g_mem_size = lowmem;
>      } else {


So why do we need gigabyte_align anymore?
Can't we set property to 0xc0000000 by default, and
override for old machine types?

Also, a value that isn't a multiple of 1G will lead to bad
performance for large machines which do have above_4g_mem_size.
Let's detect and print a warning.



> @@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
>      }
>  
>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
> +    object_property_add_child(mo, "icc-bridge",
>                                OBJECT(icc_bridge), NULL);
>  
>      pc_cpus_init(machine->cpu_model, icc_bridge);
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index e28ce40..155cdf1 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
>      PCIDevice *ahci;
>      DeviceState *icc_bridge;
>      PcGuestInfo *guest_info;
> +    Object *mo = qdev_get_machine();
> +    PCMachineState *pcms = PC_MACHINE(mo);
> +    ram_addr_t lowmem = 0xb0000000;
> +
> +    if (pcms && pcms->max_ram_below_4g) {
> +        lowmem = pcms->max_ram_below_4g;
> +    }
>  
>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> @@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
>       * For old machine types, use whatever split we used historically to avoid
>       * breaking migration.
>       */
> -    if (machine->ram_size >= 0xb0000000) {
> -        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> +    if (machine->ram_size >= lowmem) {
> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> +            lowmem = 0x800000000;
> +        }
>          above_4g_mem_size = machine->ram_size - lowmem;
>          below_4g_mem_size = lowmem;
>      } else {
> @@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
>      }
>  
>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
> +    object_property_add_child(mo, "icc-bridge",
>                                OBJECT(icc_bridge), NULL);
>  
>      pc_cpus_init(machine->cpu_model, icc_bridge);
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 19530bd..2d8b562 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -32,10 +32,13 @@ struct PCMachineState {
>      MemoryRegion hotplug_memory;
>  
>      HotplugHandler *acpi_dev;
> +
> +    uint64_t max_ram_below_4g;
>  };
>  
>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
>  
>  /**
>   * PCMachineClass:
> diff --git a/vl.c b/vl.c
> index 5e77a27..cffb9c5 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
>              .name = "kvm-type",
>              .type = QEMU_OPT_STRING,
>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> +        },{
> +            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> +            .type = QEMU_OPT_SIZE,
> +            .help = "maximum ram below the 4G boundary (32bit boundary)",
>          },
>          { /* End of list */ }
>      },
> -- 
> 1.8.4
Don Slutz June 9, 2014, 2:20 p.m. UTC | #2
On 06/08/14 11:40, Michael S. Tsirkin wrote:
> On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
>> This is a pc & q35 only machine opt.  One use is to allow for more
>> ram in a 32bit guest for example:
>>
>> -machine pc,max-ram-below-4g=3.75G
>>
>> If you add enough PCI devices then all mmio for them will not fit
>> below 4G which may not be the layout the user wanted. This allows
>> you to increase the below 4G address space that PCI devices can use
>> (aka decrease ram below 4G) and therefore in more cases not have any
>> mmio that is above 4G.
>>
>> For example using "-machine pc,max-ram-below-4g=2G" on the command
>> line will limit the amount of ram that is below 4G to 2G.
>>
>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>> ---
>> v5:
>>    Re-work based on:
>>
>>    https://github.com/imammedo/qemu/commits/memory-hotplug-v11
>>
>>
>>   hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
>>   hw/i386/pc_piix.c    | 15 ++++++++++++---
>>   hw/i386/pc_q35.c     | 15 ++++++++++++---
>>   include/hw/i386/pc.h |  3 +++
>>   vl.c                 |  4 ++++
>>   5 files changed, 69 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 7cdba10..bccb746 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
>>       visit_type_int(v, &value, name, errp);
>>   }
>>   
>> +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
>> +                                         void *opaque, const char *name,
>> +                                         Error **errp)
>> +{
>> +    PCMachineState *pcms = PC_MACHINE(obj);
>> +    uint64_t value = pcms->max_ram_below_4g;
>> +
>> +    visit_type_size(v, &value, name, errp);
>> +}
>> +
>> +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
>> +                                         void *opaque, const char *name,
>> +                                         Error **errp)
>> +{
>> +    PCMachineState *pcms = PC_MACHINE(obj);
>> +    Error *error = NULL;
>> +    uint64_t value;
>> +
>> +    visit_type_size(v, &value, name, &error);
>> +    if (error) {
>> +        error_propagate(errp, error);
>> +        return;
>> +    }
>> +    if (value > (1ULL << 32)) {
>> +        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
>> +                  "Machine option 'max-ram-below-4g=%"PRIu64
>> +                  "' expects size less then or equal to 4G", value);
> less than

But the test is greater then.  So "not greater then" is "less then or equal".
Or did you want the test changed?

>> +        error_propagate(errp, error);
>> +        return;
>> +    }
>> +
>> +    pcms->max_ram_below_4g = value;
>> +}
>> +
>>   static void pc_machine_initfn(Object *obj)
>>   {
>>       object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
>>                           pc_machine_get_hotplug_memory_region_size,
>>                           NULL, NULL, NULL, NULL);
>> +    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
>> +                        pc_machine_get_max_ram_below_4g,
>> +                        pc_machine_set_max_ram_below_4g,
>> +                        NULL, NULL, NULL);
>>   }
>>   
>>   static void pc_machine_class_init(ObjectClass *oc, void *data)
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 40f6eaf..25f4727 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
>>       DeviceState *icc_bridge;
>>       FWCfgState *fw_cfg = NULL;
>>       PcGuestInfo *guest_info;
>> +    Object *mo = qdev_get_machine();
>> +    PCMachineState *pcms = PC_MACHINE(mo);
>> +    ram_addr_t lowmem = 0xe0000000;
>> +
>> +    if (pcms && pcms->max_ram_below_4g) {
> Is pcms ever NULL? If yes why?

Not that I know of.  I would be happy to convert this to an assert(pcms).

>> +        lowmem = pcms->max_ram_below_4g;
>> +    }
>>   
>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
>>        * If it doesn't, we need to split it in chunks below and above 4G.
>> @@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
>>        * For old machine types, use whatever split we used historically to avoid
>>        * breaking migration.
>>        */
>> -    if (machine->ram_size >= 0xe0000000) {
>> -        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
>> +    if (machine->ram_size >= lowmem) {
>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>> +            lowmem = 0xc0000000;
>> +        }
>>           above_4g_mem_size = machine->ram_size - lowmem;
>>           below_4g_mem_size = lowmem;
>>       } else {
>
> So why do we need gigabyte_align anymore?

Because of qemu 2.0 and the user is not required to specify this option.

> Can't we set property to 0xc0000000 by default, and
> override for old machine types?

There is a strange compatibility part here.  Since this code includes ram_size (see:

http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html

) and xen has a different default.


> Also, a value that isn't a multiple of 1G will lead to bad
> performance for large machines which do have above_4g_mem_size.
> Let's detect and print a warning.

Will Do.

    -Don Slutz

>
>
>> @@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
>>       }
>>   
>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>> +    object_property_add_child(mo, "icc-bridge",
>>                                 OBJECT(icc_bridge), NULL);
>>   
>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index e28ce40..155cdf1 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
>>       PCIDevice *ahci;
>>       DeviceState *icc_bridge;
>>       PcGuestInfo *guest_info;
>> +    Object *mo = qdev_get_machine();
>> +    PCMachineState *pcms = PC_MACHINE(mo);
>> +    ram_addr_t lowmem = 0xb0000000;
>> +
>> +    if (pcms && pcms->max_ram_below_4g) {
>> +        lowmem = pcms->max_ram_below_4g;
>> +    }
>>   
>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
>>        * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
>> @@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
>>        * For old machine types, use whatever split we used historically to avoid
>>        * breaking migration.
>>        */
>> -    if (machine->ram_size >= 0xb0000000) {
>> -        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
>> +    if (machine->ram_size >= lowmem) {
>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>> +            lowmem = 0x800000000;
>> +        }
>>           above_4g_mem_size = machine->ram_size - lowmem;
>>           below_4g_mem_size = lowmem;
>>       } else {
>> @@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
>>       }
>>   
>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>> +    object_property_add_child(mo, "icc-bridge",
>>                                 OBJECT(icc_bridge), NULL);
>>   
>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>> index 19530bd..2d8b562 100644
>> --- a/include/hw/i386/pc.h
>> +++ b/include/hw/i386/pc.h
>> @@ -32,10 +32,13 @@ struct PCMachineState {
>>       MemoryRegion hotplug_memory;
>>   
>>       HotplugHandler *acpi_dev;
>> +
>> +    uint64_t max_ram_below_4g;
>>   };
>>   
>>   #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
>>   #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
>> +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
>>   
>>   /**
>>    * PCMachineClass:
>> diff --git a/vl.c b/vl.c
>> index 5e77a27..cffb9c5 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
>>               .name = "kvm-type",
>>               .type = QEMU_OPT_STRING,
>>               .help = "Specifies the KVM virtualization mode (HV, PR)",
>> +        },{
>> +            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
>> +            .type = QEMU_OPT_SIZE,
>> +            .help = "maximum ram below the 4G boundary (32bit boundary)",
>>           },
>>           { /* End of list */ }
>>       },
>> -- 
>> 1.8.4
Michael S. Tsirkin June 9, 2014, 2:38 p.m. UTC | #3
On Mon, Jun 09, 2014 at 10:20:57AM -0400, Don Slutz wrote:
> On 06/08/14 11:40, Michael S. Tsirkin wrote:
> >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> >>This is a pc & q35 only machine opt.  One use is to allow for more
> >>ram in a 32bit guest for example:
> >>
> >>-machine pc,max-ram-below-4g=3.75G
> >>
> >>If you add enough PCI devices then all mmio for them will not fit
> >>below 4G which may not be the layout the user wanted. This allows
> >>you to increase the below 4G address space that PCI devices can use
> >>(aka decrease ram below 4G) and therefore in more cases not have any
> >>mmio that is above 4G.
> >>
> >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> >>line will limit the amount of ram that is below 4G to 2G.
> >>
> >>Signed-off-by: Don Slutz <dslutz@verizon.com>
> >>---
> >>v5:
> >>   Re-work based on:
> >>
> >>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> >>
> >>
> >>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
> >>  hw/i386/pc_piix.c    | 15 ++++++++++++---
> >>  hw/i386/pc_q35.c     | 15 ++++++++++++---
> >>  include/hw/i386/pc.h |  3 +++
> >>  vl.c                 |  4 ++++
> >>  5 files changed, 69 insertions(+), 6 deletions(-)
> >>
> >>diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> >>index 7cdba10..bccb746 100644
> >>--- a/hw/i386/pc.c
> >>+++ b/hw/i386/pc.c
> >>@@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
> >>      visit_type_int(v, &value, name, errp);
> >>  }
> >>+static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> >>+                                         void *opaque, const char *name,
> >>+                                         Error **errp)
> >>+{
> >>+    PCMachineState *pcms = PC_MACHINE(obj);
> >>+    uint64_t value = pcms->max_ram_below_4g;
> >>+
> >>+    visit_type_size(v, &value, name, errp);
> >>+}
> >>+
> >>+static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> >>+                                         void *opaque, const char *name,
> >>+                                         Error **errp)
> >>+{
> >>+    PCMachineState *pcms = PC_MACHINE(obj);
> >>+    Error *error = NULL;
> >>+    uint64_t value;
> >>+
> >>+    visit_type_size(v, &value, name, &error);
> >>+    if (error) {
> >>+        error_propagate(errp, error);
> >>+        return;
> >>+    }
> >>+    if (value > (1ULL << 32)) {
> >>+        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> >>+                  "Machine option 'max-ram-below-4g=%"PRIu64
> >>+                  "' expects size less then or equal to 4G", value);
> >less than
> 
> But the test is greater then.  So "not greater then" is "less then or equal".
> Or did you want the test changed?

No, just correcting English: less than, not less then :)

> >>+        error_propagate(errp, error);
> >>+        return;
> >>+    }
> >>+
> >>+    pcms->max_ram_below_4g = value;
> >>+}
> >>+
> >>  static void pc_machine_initfn(Object *obj)
> >>  {
> >>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
> >>                          pc_machine_get_hotplug_memory_region_size,
> >>                          NULL, NULL, NULL, NULL);
> >>+    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> >>+                        pc_machine_get_max_ram_below_4g,
> >>+                        pc_machine_set_max_ram_below_4g,
> >>+                        NULL, NULL, NULL);
> >>  }
> >>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> >>diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> >>index 40f6eaf..25f4727 100644
> >>--- a/hw/i386/pc_piix.c
> >>+++ b/hw/i386/pc_piix.c
> >>@@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
> >>      DeviceState *icc_bridge;
> >>      FWCfgState *fw_cfg = NULL;
> >>      PcGuestInfo *guest_info;
> >>+    Object *mo = qdev_get_machine();
> >>+    PCMachineState *pcms = PC_MACHINE(mo);
> >>+    ram_addr_t lowmem = 0xe0000000;
> >>+
> >>+    if (pcms && pcms->max_ram_below_4g) {
> >Is pcms ever NULL? If yes why?
> 
> Not that I know of.  I would be happy to convert this to an assert(pcms).

In fact, PC_MACHINE already includes an assert doesn't it?
So no need to check it everywhere.

> >>+        lowmem = pcms->max_ram_below_4g;
> >>+    }
> >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> >>       * If it doesn't, we need to split it in chunks below and above 4G.
> >>@@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
> >>       * For old machine types, use whatever split we used historically to avoid
> >>       * breaking migration.
> >>       */
> >>-    if (machine->ram_size >= 0xe0000000) {
> >>-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> >>+    if (machine->ram_size >= lowmem) {
> >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> >>+            lowmem = 0xc0000000;
> >>+        }
> >>          above_4g_mem_size = machine->ram_size - lowmem;
> >>          below_4g_mem_size = lowmem;
> >>      } else {
> >
> >So why do we need gigabyte_align anymore?
> 
> Because of qemu 2.0 and the user is not required to specify this option.
> 
> >Can't we set property to 0xc0000000 by default, and
> >override for old machine types?
> 
> There is a strange compatibility part here.  Since this code includes ram_size (see:
> 
> http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html
> 
> ) and xen has a different default.
> 

So instead of default 0, it would be preferable to set the default to the
actual value, and let user override it.

Or if that's too hard, set max_ram_below_4g instead of setting
gigabyte_align. gigabyte_align switches everywhere is messy
enough, adding max_ram_below_4g into mix is just too messy.



> >Also, a value that isn't a multiple of 1G will lead to bad
> >performance for large machines which do have above_4g_mem_size.
> >Let's detect and print a warning.
> 
> Will Do.
> 
>    -Don Slutz
> 
> >
> >
> >>@@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
> >>      }
> >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> >>+    object_property_add_child(mo, "icc-bridge",
> >>                                OBJECT(icc_bridge), NULL);
> >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> >>diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> >>index e28ce40..155cdf1 100644
> >>--- a/hw/i386/pc_q35.c
> >>+++ b/hw/i386/pc_q35.c
> >>@@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
> >>      PCIDevice *ahci;
> >>      DeviceState *icc_bridge;
> >>      PcGuestInfo *guest_info;
> >>+    Object *mo = qdev_get_machine();
> >>+    PCMachineState *pcms = PC_MACHINE(mo);
> >>+    ram_addr_t lowmem = 0xb0000000;
> >>+
> >>+    if (pcms && pcms->max_ram_below_4g) {
> >>+        lowmem = pcms->max_ram_below_4g;
> >>+    }
> >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
> >>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> >>@@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
> >>       * For old machine types, use whatever split we used historically to avoid
> >>       * breaking migration.
> >>       */
> >>-    if (machine->ram_size >= 0xb0000000) {
> >>-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> >>+    if (machine->ram_size >= lowmem) {
> >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> >>+            lowmem = 0x800000000;
> >>+        }
> >>          above_4g_mem_size = machine->ram_size - lowmem;
> >>          below_4g_mem_size = lowmem;
> >>      } else {
> >>@@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
> >>      }
> >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> >>+    object_property_add_child(mo, "icc-bridge",
> >>                                OBJECT(icc_bridge), NULL);
> >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> >>diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> >>index 19530bd..2d8b562 100644
> >>--- a/include/hw/i386/pc.h
> >>+++ b/include/hw/i386/pc.h
> >>@@ -32,10 +32,13 @@ struct PCMachineState {
> >>      MemoryRegion hotplug_memory;
> >>      HotplugHandler *acpi_dev;
> >>+
> >>+    uint64_t max_ram_below_4g;
> >>  };
> >>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
> >>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> >>+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
> >>  /**
> >>   * PCMachineClass:
> >>diff --git a/vl.c b/vl.c
> >>index 5e77a27..cffb9c5 100644
> >>--- a/vl.c
> >>+++ b/vl.c
> >>@@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
> >>              .name = "kvm-type",
> >>              .type = QEMU_OPT_STRING,
> >>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> >>+        },{
> >>+            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> >>+            .type = QEMU_OPT_SIZE,
> >>+            .help = "maximum ram below the 4G boundary (32bit boundary)",
> >>          },
> >>          { /* End of list */ }
> >>      },
> >>-- 
> >>1.8.4
Marcel Apfelbaum June 9, 2014, 3:10 p.m. UTC | #4
Hi,

On Mon, 2014-06-09 at 17:38 +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 09, 2014 at 10:20:57AM -0400, Don Slutz wrote:
> > On 06/08/14 11:40, Michael S. Tsirkin wrote:
> > >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> > >>This is a pc & q35 only machine opt.  One use is to allow for more
> > >>ram in a 32bit guest for example:
> > >>
> > >>-machine pc,max-ram-below-4g=3.75G
> > >>
> > >>If you add enough PCI devices then all mmio for them will not fit
> > >>below 4G which may not be the layout the user wanted. This allows
> > >>you to increase the below 4G address space that PCI devices can use
> > >>(aka decrease ram below 4G) and therefore in more cases not have any
> > >>mmio that is above 4G.
> > >>
> > >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> > >>line will limit the amount of ram that is below 4G to 2G.
> > >>
> > >>Signed-off-by: Don Slutz <dslutz@verizon.com>
> > >>---
> > >>v5:
> > >>   Re-work based on:
> > >>
> > >>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> > >>
> > >>
> > >>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
> > >>  hw/i386/pc_piix.c    | 15 ++++++++++++---
> > >>  hw/i386/pc_q35.c     | 15 ++++++++++++---
> > >>  include/hw/i386/pc.h |  3 +++
> > >>  vl.c                 |  4 ++++
> > >>  5 files changed, 69 insertions(+), 6 deletions(-)
> > >>
> > >>diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > >>index 7cdba10..bccb746 100644
> > >>--- a/hw/i386/pc.c
> > >>+++ b/hw/i386/pc.c
> > >>@@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
> > >>      visit_type_int(v, &value, name, errp);
> > >>  }
> > >>+static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> > >>+                                         void *opaque, const char *name,
> > >>+                                         Error **errp)
> > >>+{
> > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > >>+    uint64_t value = pcms->max_ram_below_4g;
> > >>+
> > >>+    visit_type_size(v, &value, name, errp);
> > >>+}
> > >>+
> > >>+static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> > >>+                                         void *opaque, const char *name,
> > >>+                                         Error **errp)
> > >>+{
> > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > >>+    Error *error = NULL;
> > >>+    uint64_t value;
> > >>+
> > >>+    visit_type_size(v, &value, name, &error);
> > >>+    if (error) {
> > >>+        error_propagate(errp, error);
> > >>+        return;
> > >>+    }
> > >>+    if (value > (1ULL << 32)) {
> > >>+        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> > >>+                  "Machine option 'max-ram-below-4g=%"PRIu64
> > >>+                  "' expects size less then or equal to 4G", value);
> > >less than
> > 
> > But the test is greater then.  So "not greater then" is "less then or equal".
> > Or did you want the test changed?
> 
> No, just correcting English: less than, not less then :)
> 
> > >>+        error_propagate(errp, error);
> > >>+        return;
> > >>+    }
> > >>+
> > >>+    pcms->max_ram_below_4g = value;
> > >>+}
> > >>+
> > >>  static void pc_machine_initfn(Object *obj)
> > >>  {
> > >>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
> > >>                          pc_machine_get_hotplug_memory_region_size,
> > >>                          NULL, NULL, NULL, NULL);
> > >>+    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> > >>+                        pc_machine_get_max_ram_below_4g,
> > >>+                        pc_machine_set_max_ram_below_4g,
> > >>+                        NULL, NULL, NULL);
Maybe you can add here a sane default and avoid comparison with 0
any time you use it.
If you think you need value per machine type, you can add it to
compat props. I don't see how is related, so you may not want to do so.

> > >>  }
> > >>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> > >>diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > >>index 40f6eaf..25f4727 100644
> > >>--- a/hw/i386/pc_piix.c
> > >>+++ b/hw/i386/pc_piix.c
> > >>@@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
> > >>      DeviceState *icc_bridge;
> > >>      FWCfgState *fw_cfg = NULL;
> > >>      PcGuestInfo *guest_info;
> > >>+    Object *mo = qdev_get_machine();
> > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > >>+    ram_addr_t lowmem = 0xe0000000;
> > >>+
> > >>+    if (pcms && pcms->max_ram_below_4g) {
From my QOM understanding, max_ram_below_4g is a private field,
so it not should be used directly.
You can use QOMs object_property_get or add a pc_machine wrapper
for getting/setting the field.

> > >Is pcms ever NULL? If yes why?
> > 
> > Not that I know of.  I would be happy to convert this to an assert(pcms).
> 
> In fact, PC_MACHINE already includes an assert doesn't it?
> So no need to check it everywhere.

+1. No need for assert here. Is already done by OBJECT_CHECK.

Hope I helped,
Marcel

> 
> > >>+        lowmem = pcms->max_ram_below_4g;
> > >>+    }
> > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> > >>       * If it doesn't, we need to split it in chunks below and above 4G.
> > >>@@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
> > >>       * For old machine types, use whatever split we used historically to avoid
> > >>       * breaking migration.
> > >>       */
> > >>-    if (machine->ram_size >= 0xe0000000) {
> > >>-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> > >>+    if (machine->ram_size >= lowmem) {
> > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > >>+            lowmem = 0xc0000000;
> > >>+        }
> > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > >>          below_4g_mem_size = lowmem;
> > >>      } else {
> > >
> > >So why do we need gigabyte_align anymore?
> > 
> > Because of qemu 2.0 and the user is not required to specify this option.
> > 
> > >Can't we set property to 0xc0000000 by default, and
> > >override for old machine types?
> > 
> > There is a strange compatibility part here.  Since this code includes ram_size (see:
> > 
> > http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html
> > 
> > ) and xen has a different default.
> > 
> 
> So instead of default 0, it would be preferable to set the default to the
> actual value, and let user override it.
> 
> Or if that's too hard, set max_ram_below_4g instead of setting
> gigabyte_align. gigabyte_align switches everywhere is messy
> enough, adding max_ram_below_4g into mix is just too messy.
> 
> 
> 
> > >Also, a value that isn't a multiple of 1G will lead to bad
> > >performance for large machines which do have above_4g_mem_size.
> > >Let's detect and print a warning.
> > 
> > Will Do.
> > 
> >    -Don Slutz
> > 
> > >
> > >
> > >>@@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
> > >>      }
> > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > >>+    object_property_add_child(mo, "icc-bridge",
> > >>                                OBJECT(icc_bridge), NULL);
> > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > >>diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > >>index e28ce40..155cdf1 100644
> > >>--- a/hw/i386/pc_q35.c
> > >>+++ b/hw/i386/pc_q35.c
> > >>@@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
> > >>      PCIDevice *ahci;
> > >>      DeviceState *icc_bridge;
> > >>      PcGuestInfo *guest_info;
> > >>+    Object *mo = qdev_get_machine();
> > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > >>+    ram_addr_t lowmem = 0xb0000000;
> > >>+
> > >>+    if (pcms && pcms->max_ram_below_4g) {
> > >>+        lowmem = pcms->max_ram_below_4g;
> > >>+    }
> > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
> > >>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> > >>@@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
> > >>       * For old machine types, use whatever split we used historically to avoid
> > >>       * breaking migration.
> > >>       */
> > >>-    if (machine->ram_size >= 0xb0000000) {
> > >>-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> > >>+    if (machine->ram_size >= lowmem) {
> > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > >>+            lowmem = 0x800000000;
> > >>+        }
> > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > >>          below_4g_mem_size = lowmem;
> > >>      } else {
> > >>@@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
> > >>      }
> > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > >>+    object_property_add_child(mo, "icc-bridge",
> > >>                                OBJECT(icc_bridge), NULL);
> > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > >>diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > >>index 19530bd..2d8b562 100644
> > >>--- a/include/hw/i386/pc.h
> > >>+++ b/include/hw/i386/pc.h
> > >>@@ -32,10 +32,13 @@ struct PCMachineState {
> > >>      MemoryRegion hotplug_memory;
> > >>      HotplugHandler *acpi_dev;
> > >>+
> > >>+    uint64_t max_ram_below_4g;
> > >>  };
> > >>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
> > >>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> > >>+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
> > >>  /**
> > >>   * PCMachineClass:
> > >>diff --git a/vl.c b/vl.c
> > >>index 5e77a27..cffb9c5 100644
> > >>--- a/vl.c
> > >>+++ b/vl.c
> > >>@@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
> > >>              .name = "kvm-type",
> > >>              .type = QEMU_OPT_STRING,
> > >>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> > >>+        },{
> > >>+            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> > >>+            .type = QEMU_OPT_SIZE,
> > >>+            .help = "maximum ram below the 4G boundary (32bit boundary)",
> > >>          },
> > >>          { /* End of list */ }
> > >>      },
> > >>-- 
> > >>1.8.4
>
Igor Mammedov June 9, 2014, 3:37 p.m. UTC | #5
On Mon, 09 Jun 2014 18:10:27 +0300
Marcel Apfelbaum <marcel.a@redhat.com> wrote:

> Hi,
> 
> On Mon, 2014-06-09 at 17:38 +0300, Michael S. Tsirkin wrote:
> > On Mon, Jun 09, 2014 at 10:20:57AM -0400, Don Slutz wrote:
> > > On 06/08/14 11:40, Michael S. Tsirkin wrote:
> > > >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> > > >>This is a pc & q35 only machine opt.  One use is to allow for more
> > > >>ram in a 32bit guest for example:
> > > >>
> > > >>-machine pc,max-ram-below-4g=3.75G
> > > >>
> > > >>If you add enough PCI devices then all mmio for them will not fit
> > > >>below 4G which may not be the layout the user wanted. This allows
> > > >>you to increase the below 4G address space that PCI devices can use
> > > >>(aka decrease ram below 4G) and therefore in more cases not have any
> > > >>mmio that is above 4G.
> > > >>
> > > >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> > > >>line will limit the amount of ram that is below 4G to 2G.
> > > >>
> > > >>Signed-off-by: Don Slutz <dslutz@verizon.com>
> > > >>---
> > > >>v5:
> > > >>   Re-work based on:
> > > >>
> > > >>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> > > >>
> > > >>
> > > >>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
> > > >>  hw/i386/pc_piix.c    | 15 ++++++++++++---
> > > >>  hw/i386/pc_q35.c     | 15 ++++++++++++---
> > > >>  include/hw/i386/pc.h |  3 +++
> > > >>  vl.c                 |  4 ++++
> > > >>  5 files changed, 69 insertions(+), 6 deletions(-)
> > > >>
> > > >>diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > >>index 7cdba10..bccb746 100644
> > > >>--- a/hw/i386/pc.c
> > > >>+++ b/hw/i386/pc.c
> > > >>@@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
> > > >>      visit_type_int(v, &value, name, errp);
> > > >>  }
> > > >>+static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> > > >>+                                         void *opaque, const char *name,
> > > >>+                                         Error **errp)
> > > >>+{
> > > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > > >>+    uint64_t value = pcms->max_ram_below_4g;
> > > >>+
> > > >>+    visit_type_size(v, &value, name, errp);
> > > >>+}
> > > >>+
> > > >>+static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> > > >>+                                         void *opaque, const char *name,
> > > >>+                                         Error **errp)
> > > >>+{
> > > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > > >>+    Error *error = NULL;
> > > >>+    uint64_t value;
> > > >>+
> > > >>+    visit_type_size(v, &value, name, &error);
> > > >>+    if (error) {
> > > >>+        error_propagate(errp, error);
> > > >>+        return;
> > > >>+    }
> > > >>+    if (value > (1ULL << 32)) {
> > > >>+        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> > > >>+                  "Machine option 'max-ram-below-4g=%"PRIu64
> > > >>+                  "' expects size less then or equal to 4G", value);
> > > >less than
> > > 
> > > But the test is greater then.  So "not greater then" is "less then or equal".
> > > Or did you want the test changed?
> > 
> > No, just correcting English: less than, not less then :)
> > 
> > > >>+        error_propagate(errp, error);
> > > >>+        return;
> > > >>+    }
> > > >>+
> > > >>+    pcms->max_ram_below_4g = value;
> > > >>+}
> > > >>+
> > > >>  static void pc_machine_initfn(Object *obj)
> > > >>  {
> > > >>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
> > > >>                          pc_machine_get_hotplug_memory_region_size,
> > > >>                          NULL, NULL, NULL, NULL);
> > > >>+    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> > > >>+                        pc_machine_get_max_ram_below_4g,
> > > >>+                        pc_machine_set_max_ram_below_4g,
> > > >>+                        NULL, NULL, NULL);
> Maybe you can add here a sane default and avoid comparison with 0
> any time you use it.
+1

> If you think you need value per machine type, you can add it to
> compat props. I don't see how is related, so you may not want to do so.
Using compat_props would be great however does compat_props work for machine
type itself already?

> 
> > > >>  }
> > > >>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> > > >>diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > >>index 40f6eaf..25f4727 100644
> > > >>--- a/hw/i386/pc_piix.c
> > > >>+++ b/hw/i386/pc_piix.c
> > > >>@@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
> > > >>      DeviceState *icc_bridge;
> > > >>      FWCfgState *fw_cfg = NULL;
> > > >>      PcGuestInfo *guest_info;
> > > >>+    Object *mo = qdev_get_machine();
> > > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > > >>+    ram_addr_t lowmem = 0xe0000000;
> > > >>+
> > > >>+    if (pcms && pcms->max_ram_below_4g) {
> From my QOM understanding, max_ram_below_4g is a private field,
> so it not should be used directly.
> You can use QOMs object_property_get or add a pc_machine wrapper
> for getting/setting the field.
pc_init1() is sort of private function of PCMachine, so there is no much
point to use verbose getters/setters internally unless there is checks behind
setter.

> 
> > > >Is pcms ever NULL? If yes why?
> > > 
> > > Not that I know of.  I would be happy to convert this to an assert(pcms).
> > 
> > In fact, PC_MACHINE already includes an assert doesn't it?
> > So no need to check it everywhere.
> 
> +1. No need for assert here. Is already done by OBJECT_CHECK.
> 
> Hope I helped,
> Marcel
> 
> > 
> > > >>+        lowmem = pcms->max_ram_below_4g;
> > > >>+    }
> > > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> > > >>       * If it doesn't, we need to split it in chunks below and above 4G.
> > > >>@@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
> > > >>       * For old machine types, use whatever split we used historically to avoid
> > > >>       * breaking migration.
> > > >>       */
> > > >>-    if (machine->ram_size >= 0xe0000000) {
> > > >>-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> > > >>+    if (machine->ram_size >= lowmem) {
> > > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > > >>+            lowmem = 0xc0000000;
> > > >>+        }
> > > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > > >>          below_4g_mem_size = lowmem;
> > > >>      } else {
> > > >
> > > >So why do we need gigabyte_align anymore?
> > > 
> > > Because of qemu 2.0 and the user is not required to specify this option.
> > > 
> > > >Can't we set property to 0xc0000000 by default, and
> > > >override for old machine types?
> > > 
> > > There is a strange compatibility part here.  Since this code includes ram_size (see:
> > > 
> > > http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html
> > > 
> > > ) and xen has a different default.
> > > 
> > 
> > So instead of default 0, it would be preferable to set the default to the
> > actual value, and let user override it.
> > 
> > Or if that's too hard, set max_ram_below_4g instead of setting
> > gigabyte_align. gigabyte_align switches everywhere is messy
> > enough, adding max_ram_below_4g into mix is just too messy.
> > 
> > 
> > 
> > > >Also, a value that isn't a multiple of 1G will lead to bad
> > > >performance for large machines which do have above_4g_mem_size.
> > > >Let's detect and print a warning.
> > > 
> > > Will Do.
> > > 
> > >    -Don Slutz
> > > 
> > > >
> > > >
> > > >>@@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
> > > >>      }
> > > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > > >>+    object_property_add_child(mo, "icc-bridge",
> > > >>                                OBJECT(icc_bridge), NULL);
> > > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > > >>diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > >>index e28ce40..155cdf1 100644
> > > >>--- a/hw/i386/pc_q35.c
> > > >>+++ b/hw/i386/pc_q35.c
> > > >>@@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
> > > >>      PCIDevice *ahci;
> > > >>      DeviceState *icc_bridge;
> > > >>      PcGuestInfo *guest_info;
> > > >>+    Object *mo = qdev_get_machine();
> > > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > > >>+    ram_addr_t lowmem = 0xb0000000;
> > > >>+
> > > >>+    if (pcms && pcms->max_ram_below_4g) {
> > > >>+        lowmem = pcms->max_ram_below_4g;
> > > >>+    }
> > > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
> > > >>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> > > >>@@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
> > > >>       * For old machine types, use whatever split we used historically to avoid
> > > >>       * breaking migration.
> > > >>       */
> > > >>-    if (machine->ram_size >= 0xb0000000) {
> > > >>-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> > > >>+    if (machine->ram_size >= lowmem) {
> > > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > > >>+            lowmem = 0x800000000;
> > > >>+        }
> > > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > > >>          below_4g_mem_size = lowmem;
> > > >>      } else {
> > > >>@@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
> > > >>      }
> > > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > > >>+    object_property_add_child(mo, "icc-bridge",
> > > >>                                OBJECT(icc_bridge), NULL);
> > > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > > >>diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > > >>index 19530bd..2d8b562 100644
> > > >>--- a/include/hw/i386/pc.h
> > > >>+++ b/include/hw/i386/pc.h
> > > >>@@ -32,10 +32,13 @@ struct PCMachineState {
> > > >>      MemoryRegion hotplug_memory;
> > > >>      HotplugHandler *acpi_dev;
> > > >>+
> > > >>+    uint64_t max_ram_below_4g;
> > > >>  };
> > > >>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
> > > >>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> > > >>+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
> > > >>  /**
> > > >>   * PCMachineClass:
> > > >>diff --git a/vl.c b/vl.c
> > > >>index 5e77a27..cffb9c5 100644
> > > >>--- a/vl.c
> > > >>+++ b/vl.c
> > > >>@@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
> > > >>              .name = "kvm-type",
> > > >>              .type = QEMU_OPT_STRING,
> > > >>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> > > >>+        },{
> > > >>+            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> > > >>+            .type = QEMU_OPT_SIZE,
> > > >>+            .help = "maximum ram below the 4G boundary (32bit boundary)",
> > > >>          },
> > > >>          { /* End of list */ }
> > > >>      },
> > > >>-- 
> > > >>1.8.4
> > 
> 
> 
>
Marcel Apfelbaum June 9, 2014, 5:33 p.m. UTC | #6
On Mon, 2014-06-09 at 17:37 +0200, Igor Mammedov wrote:
> On Mon, 09 Jun 2014 18:10:27 +0300
> Marcel Apfelbaum <marcel.a@redhat.com> wrote:
> 
> > Hi,
> > 
> > On Mon, 2014-06-09 at 17:38 +0300, Michael S. Tsirkin wrote:
> > > On Mon, Jun 09, 2014 at 10:20:57AM -0400, Don Slutz wrote:
> > > > On 06/08/14 11:40, Michael S. Tsirkin wrote:
> > > > >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> > > > >>This is a pc & q35 only machine opt.  One use is to allow for more
> > > > >>ram in a 32bit guest for example:
> > > > >>
> > > > >>-machine pc,max-ram-below-4g=3.75G
> > > > >>
> > > > >>If you add enough PCI devices then all mmio for them will not fit
> > > > >>below 4G which may not be the layout the user wanted. This allows
> > > > >>you to increase the below 4G address space that PCI devices can use
> > > > >>(aka decrease ram below 4G) and therefore in more cases not have any
> > > > >>mmio that is above 4G.
> > > > >>
> > > > >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> > > > >>line will limit the amount of ram that is below 4G to 2G.
> > > > >>
> > > > >>Signed-off-by: Don Slutz <dslutz@verizon.com>
> > > > >>---
> > > > >>v5:
> > > > >>   Re-work based on:
> > > > >>
> > > > >>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> > > > >>
> > > > >>
> > > > >>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
> > > > >>  hw/i386/pc_piix.c    | 15 ++++++++++++---
> > > > >>  hw/i386/pc_q35.c     | 15 ++++++++++++---
> > > > >>  include/hw/i386/pc.h |  3 +++
> > > > >>  vl.c                 |  4 ++++
> > > > >>  5 files changed, 69 insertions(+), 6 deletions(-)
> > > > >>
> > > > >>diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > > >>index 7cdba10..bccb746 100644
> > > > >>--- a/hw/i386/pc.c
> > > > >>+++ b/hw/i386/pc.c
> > > > >>@@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
> > > > >>      visit_type_int(v, &value, name, errp);
> > > > >>  }
> > > > >>+static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> > > > >>+                                         void *opaque, const char *name,
> > > > >>+                                         Error **errp)
> > > > >>+{
> > > > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > > > >>+    uint64_t value = pcms->max_ram_below_4g;
> > > > >>+
> > > > >>+    visit_type_size(v, &value, name, errp);
> > > > >>+}
> > > > >>+
> > > > >>+static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> > > > >>+                                         void *opaque, const char *name,
> > > > >>+                                         Error **errp)
> > > > >>+{
> > > > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > > > >>+    Error *error = NULL;
> > > > >>+    uint64_t value;
> > > > >>+
> > > > >>+    visit_type_size(v, &value, name, &error);
> > > > >>+    if (error) {
> > > > >>+        error_propagate(errp, error);
> > > > >>+        return;
> > > > >>+    }
> > > > >>+    if (value > (1ULL << 32)) {
> > > > >>+        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> > > > >>+                  "Machine option 'max-ram-below-4g=%"PRIu64
> > > > >>+                  "' expects size less then or equal to 4G", value);
> > > > >less than
> > > > 
> > > > But the test is greater then.  So "not greater then" is "less then or equal".
> > > > Or did you want the test changed?
> > > 
> > > No, just correcting English: less than, not less then :)
> > > 
> > > > >>+        error_propagate(errp, error);
> > > > >>+        return;
> > > > >>+    }
> > > > >>+
> > > > >>+    pcms->max_ram_below_4g = value;
> > > > >>+}
> > > > >>+
> > > > >>  static void pc_machine_initfn(Object *obj)
> > > > >>  {
> > > > >>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
> > > > >>                          pc_machine_get_hotplug_memory_region_size,
> > > > >>                          NULL, NULL, NULL, NULL);
> > > > >>+    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> > > > >>+                        pc_machine_get_max_ram_below_4g,
> > > > >>+                        pc_machine_set_max_ram_below_4g,
> > > > >>+                        NULL, NULL, NULL);
> > Maybe you can add here a sane default and avoid comparison with 0
> > any time you use it.
> +1
> 
> > If you think you need value per machine type, you can add it to
> > compat props. I don't see how is related, so you may not want to do so.
> Using compat_props would be great however does compat_props work for machine
> type itself already?
Now that you are mentioning it, I was hoping that compat_props are converted
into QemuOpts or something and then mapped into machine properties.
I'll look into it. 

> 
> > 
> > > > >>  }
> > > > >>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> > > > >>diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > > > >>index 40f6eaf..25f4727 100644
> > > > >>--- a/hw/i386/pc_piix.c
> > > > >>+++ b/hw/i386/pc_piix.c
> > > > >>@@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
> > > > >>      DeviceState *icc_bridge;
> > > > >>      FWCfgState *fw_cfg = NULL;
> > > > >>      PcGuestInfo *guest_info;
> > > > >>+    Object *mo = qdev_get_machine();
> > > > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > > > >>+    ram_addr_t lowmem = 0xe0000000;
> > > > >>+
> > > > >>+    if (pcms && pcms->max_ram_below_4g) {
> > From my QOM understanding, max_ram_below_4g is a private field,
> > so it not should be used directly.
> > You can use QOMs object_property_get or add a pc_machine wrapper
> > for getting/setting the field.
> pc_init1() is sort of private function of PCMachine, so there is no much
> point to use verbose getters/setters internally unless there is checks behind
> setter.
I was just being QOM's advocate :). That being said, I'll not argue here,
as it is not a major issue.


Thanks,
Marcel

> 
> > 
> > > > >Is pcms ever NULL? If yes why?
> > > > 
> > > > Not that I know of.  I would be happy to convert this to an assert(pcms).
> > > 
> > > In fact, PC_MACHINE already includes an assert doesn't it?
> > > So no need to check it everywhere.
> > 
> > +1. No need for assert here. Is already done by OBJECT_CHECK.
> > 
> > Hope I helped,
> > Marcel
> > 
> > > 
> > > > >>+        lowmem = pcms->max_ram_below_4g;
> > > > >>+    }
> > > > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> > > > >>       * If it doesn't, we need to split it in chunks below and above 4G.
> > > > >>@@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
> > > > >>       * For old machine types, use whatever split we used historically to avoid
> > > > >>       * breaking migration.
> > > > >>       */
> > > > >>-    if (machine->ram_size >= 0xe0000000) {
> > > > >>-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> > > > >>+    if (machine->ram_size >= lowmem) {
> > > > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > > > >>+            lowmem = 0xc0000000;
> > > > >>+        }
> > > > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > > > >>          below_4g_mem_size = lowmem;
> > > > >>      } else {
> > > > >
> > > > >So why do we need gigabyte_align anymore?
> > > > 
> > > > Because of qemu 2.0 and the user is not required to specify this option.
> > > > 
> > > > >Can't we set property to 0xc0000000 by default, and
> > > > >override for old machine types?
> > > > 
> > > > There is a strange compatibility part here.  Since this code includes ram_size (see:
> > > > 
> > > > http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html
> > > > 
> > > > ) and xen has a different default.
> > > > 
> > > 
> > > So instead of default 0, it would be preferable to set the default to the
> > > actual value, and let user override it.
> > > 
> > > Or if that's too hard, set max_ram_below_4g instead of setting
> > > gigabyte_align. gigabyte_align switches everywhere is messy
> > > enough, adding max_ram_below_4g into mix is just too messy.
> > > 
> > > 
> > > 
> > > > >Also, a value that isn't a multiple of 1G will lead to bad
> > > > >performance for large machines which do have above_4g_mem_size.
> > > > >Let's detect and print a warning.
> > > > 
> > > > Will Do.
> > > > 
> > > >    -Don Slutz
> > > > 
> > > > >
> > > > >
> > > > >>@@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
> > > > >>      }
> > > > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > > > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > > > >>+    object_property_add_child(mo, "icc-bridge",
> > > > >>                                OBJECT(icc_bridge), NULL);
> > > > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > > > >>diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > > > >>index e28ce40..155cdf1 100644
> > > > >>--- a/hw/i386/pc_q35.c
> > > > >>+++ b/hw/i386/pc_q35.c
> > > > >>@@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
> > > > >>      PCIDevice *ahci;
> > > > >>      DeviceState *icc_bridge;
> > > > >>      PcGuestInfo *guest_info;
> > > > >>+    Object *mo = qdev_get_machine();
> > > > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > > > >>+    ram_addr_t lowmem = 0xb0000000;
> > > > >>+
> > > > >>+    if (pcms && pcms->max_ram_below_4g) {
> > > > >>+        lowmem = pcms->max_ram_below_4g;
> > > > >>+    }
> > > > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
> > > > >>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> > > > >>@@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
> > > > >>       * For old machine types, use whatever split we used historically to avoid
> > > > >>       * breaking migration.
> > > > >>       */
> > > > >>-    if (machine->ram_size >= 0xb0000000) {
> > > > >>-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> > > > >>+    if (machine->ram_size >= lowmem) {
> > > > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > > > >>+            lowmem = 0x800000000;
> > > > >>+        }
> > > > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > > > >>          below_4g_mem_size = lowmem;
> > > > >>      } else {
> > > > >>@@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
> > > > >>      }
> > > > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > > > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > > > >>+    object_property_add_child(mo, "icc-bridge",
> > > > >>                                OBJECT(icc_bridge), NULL);
> > > > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > > > >>diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > > > >>index 19530bd..2d8b562 100644
> > > > >>--- a/include/hw/i386/pc.h
> > > > >>+++ b/include/hw/i386/pc.h
> > > > >>@@ -32,10 +32,13 @@ struct PCMachineState {
> > > > >>      MemoryRegion hotplug_memory;
> > > > >>      HotplugHandler *acpi_dev;
> > > > >>+
> > > > >>+    uint64_t max_ram_below_4g;
> > > > >>  };
> > > > >>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
> > > > >>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> > > > >>+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
> > > > >>  /**
> > > > >>   * PCMachineClass:
> > > > >>diff --git a/vl.c b/vl.c
> > > > >>index 5e77a27..cffb9c5 100644
> > > > >>--- a/vl.c
> > > > >>+++ b/vl.c
> > > > >>@@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
> > > > >>              .name = "kvm-type",
> > > > >>              .type = QEMU_OPT_STRING,
> > > > >>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> > > > >>+        },{
> > > > >>+            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> > > > >>+            .type = QEMU_OPT_SIZE,
> > > > >>+            .help = "maximum ram below the 4G boundary (32bit boundary)",
> > > > >>          },
> > > > >>          { /* End of list */ }
> > > > >>      },
> > > > >>-- 
> > > > >>1.8.4
> > > 
> > 
> > 
> > 
> 
>
Don Slutz June 9, 2014, 7:13 p.m. UTC | #7
On 06/09/14 10:38, Michael S. Tsirkin wrote:
> On Mon, Jun 09, 2014 at 10:20:57AM -0400, Don Slutz wrote:
>> On 06/08/14 11:40, Michael S. Tsirkin wrote:
>>> On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
>>>> This is a pc & q35 only machine opt.  One use is to allow for more
>>>> ram in a 32bit guest for example:
>>>>
>>>> -machine pc,max-ram-below-4g=3.75G
>>>>
>>>> If you add enough PCI devices then all mmio for them will not fit
>>>> below 4G which may not be the layout the user wanted. This allows
>>>> you to increase the below 4G address space that PCI devices can use
>>>> (aka decrease ram below 4G) and therefore in more cases not have any
>>>> mmio that is above 4G.
>>>>
>>>> For example using "-machine pc,max-ram-below-4g=2G" on the command
>>>> line will limit the amount of ram that is below 4G to 2G.
>>>>
>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>>> ---
>>>> v5:
>>>>    Re-work based on:
>>>>
>>>>    https://github.com/imammedo/qemu/commits/memory-hotplug-v11
>>>>
>>>>
>>>>   hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
>>>>   hw/i386/pc_piix.c    | 15 ++++++++++++---
>>>>   hw/i386/pc_q35.c     | 15 ++++++++++++---
>>>>   include/hw/i386/pc.h |  3 +++
>>>>   vl.c                 |  4 ++++
>>>>   5 files changed, 69 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>>>> index 7cdba10..bccb746 100644
>>>> --- a/hw/i386/pc.c
>>>> +++ b/hw/i386/pc.c
>>>> @@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
>>>>       visit_type_int(v, &value, name, errp);
>>>>   }
>>>> +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
>>>> +                                         void *opaque, const char *name,
>>>> +                                         Error **errp)
>>>> +{
>>>> +    PCMachineState *pcms = PC_MACHINE(obj);
>>>> +    uint64_t value = pcms->max_ram_below_4g;
>>>> +
>>>> +    visit_type_size(v, &value, name, errp);
>>>> +}
>>>> +
>>>> +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
>>>> +                                         void *opaque, const char *name,
>>>> +                                         Error **errp)
>>>> +{
>>>> +    PCMachineState *pcms = PC_MACHINE(obj);
>>>> +    Error *error = NULL;
>>>> +    uint64_t value;
>>>> +
>>>> +    visit_type_size(v, &value, name, &error);
>>>> +    if (error) {
>>>> +        error_propagate(errp, error);
>>>> +        return;
>>>> +    }
>>>> +    if (value > (1ULL << 32)) {
>>>> +        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
>>>> +                  "Machine option 'max-ram-below-4g=%"PRIu64
>>>> +                  "' expects size less then or equal to 4G", value);
>>> less than
>> But the test is greater then.  So "not greater then" is "less then or equal".
>> Or did you want the test changed?
> No, just correcting English: less than, not less then :)
>

Sigh, will fix.


>>>> +        error_propagate(errp, error);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    pcms->max_ram_below_4g = value;
>>>> +}
>>>> +
>>>>   static void pc_machine_initfn(Object *obj)
>>>>   {
>>>>       object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
>>>>                           pc_machine_get_hotplug_memory_region_size,
>>>>                           NULL, NULL, NULL, NULL);
>>>> +    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
>>>> +                        pc_machine_get_max_ram_below_4g,
>>>> +                        pc_machine_set_max_ram_below_4g,
>>>> +                        NULL, NULL, NULL);
>>>>   }
>>>>   static void pc_machine_class_init(ObjectClass *oc, void *data)
>>>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>>>> index 40f6eaf..25f4727 100644
>>>> --- a/hw/i386/pc_piix.c
>>>> +++ b/hw/i386/pc_piix.c
>>>> @@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
>>>>       DeviceState *icc_bridge;
>>>>       FWCfgState *fw_cfg = NULL;
>>>>       PcGuestInfo *guest_info;
>>>> +    Object *mo = qdev_get_machine();
>>>> +    PCMachineState *pcms = PC_MACHINE(mo);
>>>> +    ram_addr_t lowmem = 0xe0000000;
>>>> +
>>>> +    if (pcms && pcms->max_ram_below_4g) {
>>> Is pcms ever NULL? If yes why?
>> Not that I know of.  I would be happy to convert this to an assert(pcms).
> In fact, PC_MACHINE already includes an assert doesn't it?
> So no need to check it everywhere.

You are right.  And I also found:

static void pc_init1(MachineState *machine,
                      int pci_enabled,
                      int kvmclock_enabled)
{
     PCMachineState *pc_machine = PC_MACHINE(machine);
...
     PCMachineState *pcms = PC_MACHINE(mo);
...

and so I will drop the add of pcms.



>>>> +        lowmem = pcms->max_ram_below_4g;
>>>> +    }
>>>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
>>>>        * If it doesn't, we need to split it in chunks below and above 4G.
>>>> @@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
>>>>        * For old machine types, use whatever split we used historically to avoid
>>>>        * breaking migration.
>>>>        */
>>>> -    if (machine->ram_size >= 0xe0000000) {
>>>> -        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
>>>> +    if (machine->ram_size >= lowmem) {
>>>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>>>> +            lowmem = 0xc0000000;
>>>> +        }
>>>>           above_4g_mem_size = machine->ram_size - lowmem;
>>>>           below_4g_mem_size = lowmem;
>>>>       } else {
>>> So why do we need gigabyte_align anymore?
>> Because of qemu 2.0 and the user is not required to specify this option.
>>
>>> Can't we set property to 0xc0000000 by default, and
>>> override for old machine types?
>> There is a strange compatibility part here.  Since this code includes ram_size (see:
>>
>> http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html
>>
>> ) and xen has a different default.
>>
> So instead of default 0, it would be preferable to set the default to the
> actual value, and let user override it.
>
> Or if that's too hard, set max_ram_below_4g instead of setting
> gigabyte_align. gigabyte_align switches everywhere is messy
> enough, adding max_ram_below_4g into mix is just too messy.
>

I do not see a way to encode the default since for QEMU 2.0 it depends on the specified ram size:

> This is intentional.

> If we can fit all ram into low memory, because it is less than 3.5G,
> we'll do that (pc machine type, q35 numbers are different but logic is
> the same).  This way 32bit (+non-PAE) guests can continue to have up to
> 3.5G memory.

> If we can't fit all ram into low memory (thus the guest should be able
> to access ram above 4G anyway), then we'll cut off at a gigabyte
> boundary (3G for pc machine type).  This way our ram is nicely
> gigabyte-aligned and we can get best performance benefits from huge
> pages.

> The size of the pci hole changing in the second case is only a side
> effect, it's not the main reason for the change.

> cheers,
>   Gerd

So migration of a QEMU 2.0 pc to QEMU 2.1 without gigabyte_align would require the user
to specify the correct value of max-ram-below-4g.

When you add xen into the mix I do not see a way to get right of gigabyte_align.

You have 3 cases:

1) old xen (without max-ram-below-4g), QEMU 2.1 or later.
     Will expect that QEMU acts as if max-ram-below-4g=3.75G was specified.
     I.E. gigabyte_align is ignored. Note: xen 4.4 asks for "pc,accel=xen" in some cases.


2) new xen (with max-ram-below-4g), QEMU 2.0 or earlier.
     Will expect that QEMU acts as if max-ram-below-4g=3.75G was specified.
     If any other value requested, the error from QEMU is ok.

3) new xen (with max-ram-below-4g), QEMU 2.1 or later.
      Expects that max-ram-below-4g works.


In summary, the migration of QEMU pc-i440fx-2.0 and pc-q35-2.0 guests
for accel=tcg, accel=kvm, accel=xen needs the gigabyte_align mess.


    -Don Slutz


>
>>> Also, a value that isn't a multiple of 1G will lead to bad
>>> performance for large machines which do have above_4g_mem_size.
>>> Let's detect and print a warning.
>> Will Do.
>>
>>     -Don Slutz
>>
>>>
>>>> @@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
>>>>       }
>>>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>>>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>>>> +    object_property_add_child(mo, "icc-bridge",
>>>>                                 OBJECT(icc_bridge), NULL);
>>>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>>>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>>>> index e28ce40..155cdf1 100644
>>>> --- a/hw/i386/pc_q35.c
>>>> +++ b/hw/i386/pc_q35.c
>>>> @@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
>>>>       PCIDevice *ahci;
>>>>       DeviceState *icc_bridge;
>>>>       PcGuestInfo *guest_info;
>>>> +    Object *mo = qdev_get_machine();
>>>> +    PCMachineState *pcms = PC_MACHINE(mo);
>>>> +    ram_addr_t lowmem = 0xb0000000;
>>>> +
>>>> +    if (pcms && pcms->max_ram_below_4g) {
>>>> +        lowmem = pcms->max_ram_below_4g;
>>>> +    }
>>>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
>>>>        * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
>>>> @@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
>>>>        * For old machine types, use whatever split we used historically to avoid
>>>>        * breaking migration.
>>>>        */
>>>> -    if (machine->ram_size >= 0xb0000000) {
>>>> -        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
>>>> +    if (machine->ram_size >= lowmem) {
>>>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>>>> +            lowmem = 0x800000000;
>>>> +        }
>>>>           above_4g_mem_size = machine->ram_size - lowmem;
>>>>           below_4g_mem_size = lowmem;
>>>>       } else {
>>>> @@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
>>>>       }
>>>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>>>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>>>> +    object_property_add_child(mo, "icc-bridge",
>>>>                                 OBJECT(icc_bridge), NULL);
>>>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>>>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>>>> index 19530bd..2d8b562 100644
>>>> --- a/include/hw/i386/pc.h
>>>> +++ b/include/hw/i386/pc.h
>>>> @@ -32,10 +32,13 @@ struct PCMachineState {
>>>>       MemoryRegion hotplug_memory;
>>>>       HotplugHandler *acpi_dev;
>>>> +
>>>> +    uint64_t max_ram_below_4g;
>>>>   };
>>>>   #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
>>>>   #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
>>>> +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
>>>>   /**
>>>>    * PCMachineClass:
>>>> diff --git a/vl.c b/vl.c
>>>> index 5e77a27..cffb9c5 100644
>>>> --- a/vl.c
>>>> +++ b/vl.c
>>>> @@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
>>>>               .name = "kvm-type",
>>>>               .type = QEMU_OPT_STRING,
>>>>               .help = "Specifies the KVM virtualization mode (HV, PR)",
>>>> +        },{
>>>> +            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
>>>> +            .type = QEMU_OPT_SIZE,
>>>> +            .help = "maximum ram below the 4G boundary (32bit boundary)",
>>>>           },
>>>>           { /* End of list */ }
>>>>       },
>>>> -- 
>>>> 1.8.4
Don Slutz June 9, 2014, 8:03 p.m. UTC | #8
On 06/09/14 13:33, Marcel Apfelbaum wrote:
> On Mon, 2014-06-09 at 17:37 +0200, Igor Mammedov wrote:
>> On Mon, 09 Jun 2014 18:10:27 +0300
>> Marcel Apfelbaum <marcel.a@redhat.com> wrote:
>>
>>> Hi,
>>>
>>> On Mon, 2014-06-09 at 17:38 +0300, Michael S. Tsirkin wrote:
>>>> On Mon, Jun 09, 2014 at 10:20:57AM -0400, Don Slutz wrote:
>>>>> On 06/08/14 11:40, Michael S. Tsirkin wrote:
>>>>>> On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
>>>>>>> This is a pc & q35 only machine opt.  One use is to allow for more
>>>>>>> ram in a 32bit guest for example:
>>>>>>>
>>>>>>> -machine pc,max-ram-below-4g=3.75G
>>>>>>>
>>>>>>> If you add enough PCI devices then all mmio for them will not fit
>>>>>>> below 4G which may not be the layout the user wanted. This allows
>>>>>>> you to increase the below 4G address space that PCI devices can use
>>>>>>> (aka decrease ram below 4G) and therefore in more cases not have any
>>>>>>> mmio that is above 4G.
>>>>>>>
>>>>>>> For example using "-machine pc,max-ram-below-4g=2G" on the command
>>>>>>> line will limit the amount of ram that is below 4G to 2G.
>>>>>>>
>>>>>>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>>>>>>> ---
>>>>>>> v5:
>>>>>>>    Re-work based on:
>>>>>>>
>>>>>>>    https://github.com/imammedo/qemu/commits/memory-hotplug-v11
>>>>>>>
>>>>>>>
>>>>>>>   hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
>>>>>>>   hw/i386/pc_piix.c    | 15 ++++++++++++---
>>>>>>>   hw/i386/pc_q35.c     | 15 ++++++++++++---
>>>>>>>   include/hw/i386/pc.h |  3 +++
>>>>>>>   vl.c                 |  4 ++++
>>>>>>>   5 files changed, 69 insertions(+), 6 deletions(-)
>>>>>>>
>>>>>>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>>>>>>> index 7cdba10..bccb746 100644
>>>>>>> --- a/hw/i386/pc.c
>>>>>>> +++ b/hw/i386/pc.c
>>>>>>> @@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
>>>>>>>       visit_type_int(v, &value, name, errp);
>>>>>>>   }
>>>>>>> +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
>>>>>>> +                                         void *opaque, const char *name,
>>>>>>> +                                         Error **errp)
>>>>>>> +{
>>>>>>> +    PCMachineState *pcms = PC_MACHINE(obj);
>>>>>>> +    uint64_t value = pcms->max_ram_below_4g;
>>>>>>> +
>>>>>>> +    visit_type_size(v, &value, name, errp);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
>>>>>>> +                                         void *opaque, const char *name,
>>>>>>> +                                         Error **errp)
>>>>>>> +{
>>>>>>> +    PCMachineState *pcms = PC_MACHINE(obj);
>>>>>>> +    Error *error = NULL;
>>>>>>> +    uint64_t value;
>>>>>>> +
>>>>>>> +    visit_type_size(v, &value, name, &error);
>>>>>>> +    if (error) {
>>>>>>> +        error_propagate(errp, error);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +    if (value > (1ULL << 32)) {
>>>>>>> +        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
>>>>>>> +                  "Machine option 'max-ram-below-4g=%"PRIu64
>>>>>>> +                  "' expects size less then or equal to 4G", value);
>>>>>> less than
>>>>> But the test is greater then.  So "not greater then" is "less then or equal".
>>>>> Or did you want the test changed?
>>>> No, just correcting English: less than, not less then :)
>>>>
>>>>>>> +        error_propagate(errp, error);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    pcms->max_ram_below_4g = value;
>>>>>>> +}
>>>>>>> +
>>>>>>>   static void pc_machine_initfn(Object *obj)
>>>>>>>   {
>>>>>>>       object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
>>>>>>>                           pc_machine_get_hotplug_memory_region_size,
>>>>>>>                           NULL, NULL, NULL, NULL);
>>>>>>> +    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
>>>>>>> +                        pc_machine_get_max_ram_below_4g,
>>>>>>> +                        pc_machine_set_max_ram_below_4g,
>>>>>>> +                        NULL, NULL, NULL);
>>> Maybe you can add here a sane default and avoid comparison with 0
>>> any time you use it.
>> +1
>>

The basic problem is that there is no simple default.  For pc-i440fx-1.7, pc-q35-1.7 and all
older versions there are 2 values:


xen_enabled() ==> 3.75G
!xen_enabled() ==> 3.5G

pc-i440fx-2.0 and pc-q35-2.0(q35 has different numbers but the same logic) have 3 values:

xen_enabled() ==> 3.75G

!xen_enabled()  && ram_size <= 3.5G ==> 3.5G
!xen_enabled()  && ram_size > 3.5G ==> 3.0G


Gerd has more on this in:

http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html


>>> If you think you need value per machine type, you can add it to
>>> compat props. I don't see how is related, so you may not want to do so.

I need more then 1 value per machine type.  So I do not see how one default would work.

    -Don Slutz

>> Using compat_props would be great however does compat_props work for machine
>> type itself already?
> Now that you are mentioning it, I was hoping that compat_props are converted
> into QemuOpts or something and then mapped into machine properties.
> I'll look into it.
>
>>>>>>>   }
>>>>>>>   static void pc_machine_class_init(ObjectClass *oc, void *data)
>>>>>>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>>>>>>> index 40f6eaf..25f4727 100644
>>>>>>> --- a/hw/i386/pc_piix.c
>>>>>>> +++ b/hw/i386/pc_piix.c
>>>>>>> @@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
>>>>>>>       DeviceState *icc_bridge;
>>>>>>>       FWCfgState *fw_cfg = NULL;
>>>>>>>       PcGuestInfo *guest_info;
>>>>>>> +    Object *mo = qdev_get_machine();
>>>>>>> +    PCMachineState *pcms = PC_MACHINE(mo);
>>>>>>> +    ram_addr_t lowmem = 0xe0000000;
>>>>>>> +
>>>>>>> +    if (pcms && pcms->max_ram_below_4g) {
>>>  From my QOM understanding, max_ram_below_4g is a private field,
>>> so it not should be used directly.
>>> You can use QOMs object_property_get or add a pc_machine wrapper
>>> for getting/setting the field.
>> pc_init1() is sort of private function of PCMachine, so there is no much
>> point to use verbose getters/setters internally unless there is checks behind
>> setter.
> I was just being QOM's advocate :). That being said, I'll not argue here,
> as it is not a major issue.
>
>
> Thanks,
> Marcel
>
>>>>>> Is pcms ever NULL? If yes why?
>>>>> Not that I know of.  I would be happy to convert this to an assert(pcms).
>>>> In fact, PC_MACHINE already includes an assert doesn't it?
>>>> So no need to check it everywhere.
>>> +1. No need for assert here. Is already done by OBJECT_CHECK.
>>>
>>> Hope I helped,
>>> Marcel
>>>
>>>>>>> +        lowmem = pcms->max_ram_below_4g;
>>>>>>> +    }
>>>>>>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
>>>>>>>        * If it doesn't, we need to split it in chunks below and above 4G.
>>>>>>> @@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
>>>>>>>        * For old machine types, use whatever split we used historically to avoid
>>>>>>>        * breaking migration.
>>>>>>>        */
>>>>>>> -    if (machine->ram_size >= 0xe0000000) {
>>>>>>> -        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
>>>>>>> +    if (machine->ram_size >= lowmem) {
>>>>>>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>>>>>>> +            lowmem = 0xc0000000;
>>>>>>> +        }
>>>>>>>           above_4g_mem_size = machine->ram_size - lowmem;
>>>>>>>           below_4g_mem_size = lowmem;
>>>>>>>       } else {
>>>>>> So why do we need gigabyte_align anymore?
>>>>> Because of qemu 2.0 and the user is not required to specify this option.
>>>>>
>>>>>> Can't we set property to 0xc0000000 by default, and
>>>>>> override for old machine types?
>>>>> There is a strange compatibility part here.  Since this code includes ram_size (see:
>>>>>
>>>>> http://lists.gnu.org/archive/html/qemu-devel/2014-02/msg05146.html
>>>>>
>>>>> ) and xen has a different default.
>>>>>
>>>> So instead of default 0, it would be preferable to set the default to the
>>>> actual value, and let user override it.
>>>>
>>>> Or if that's too hard, set max_ram_below_4g instead of setting
>>>> gigabyte_align. gigabyte_align switches everywhere is messy
>>>> enough, adding max_ram_below_4g into mix is just too messy.
>>>>
>>>>
>>>>
>>>>>> Also, a value that isn't a multiple of 1G will lead to bad
>>>>>> performance for large machines which do have above_4g_mem_size.
>>>>>> Let's detect and print a warning.
>>>>> Will Do.
>>>>>
>>>>>     -Don Slutz
>>>>>
>>>>>>
>>>>>>> @@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
>>>>>>>       }
>>>>>>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>>>>>>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>>>>>>> +    object_property_add_child(mo, "icc-bridge",
>>>>>>>                                 OBJECT(icc_bridge), NULL);
>>>>>>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>>>>>>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>>>>>>> index e28ce40..155cdf1 100644
>>>>>>> --- a/hw/i386/pc_q35.c
>>>>>>> +++ b/hw/i386/pc_q35.c
>>>>>>> @@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
>>>>>>>       PCIDevice *ahci;
>>>>>>>       DeviceState *icc_bridge;
>>>>>>>       PcGuestInfo *guest_info;
>>>>>>> +    Object *mo = qdev_get_machine();
>>>>>>> +    PCMachineState *pcms = PC_MACHINE(mo);
>>>>>>> +    ram_addr_t lowmem = 0xb0000000;
>>>>>>> +
>>>>>>> +    if (pcms && pcms->max_ram_below_4g) {
>>>>>>> +        lowmem = pcms->max_ram_below_4g;
>>>>>>> +    }
>>>>>>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
>>>>>>>        * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
>>>>>>> @@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
>>>>>>>        * For old machine types, use whatever split we used historically to avoid
>>>>>>>        * breaking migration.
>>>>>>>        */
>>>>>>> -    if (machine->ram_size >= 0xb0000000) {
>>>>>>> -        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
>>>>>>> +    if (machine->ram_size >= lowmem) {
>>>>>>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>>>>>>> +            lowmem = 0x800000000;
>>>>>>> +        }
>>>>>>>           above_4g_mem_size = machine->ram_size - lowmem;
>>>>>>>           below_4g_mem_size = lowmem;
>>>>>>>       } else {
>>>>>>> @@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
>>>>>>>       }
>>>>>>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>>>>>>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>>>>>>> +    object_property_add_child(mo, "icc-bridge",
>>>>>>>                                 OBJECT(icc_bridge), NULL);
>>>>>>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>>>>>>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>>>>>>> index 19530bd..2d8b562 100644
>>>>>>> --- a/include/hw/i386/pc.h
>>>>>>> +++ b/include/hw/i386/pc.h
>>>>>>> @@ -32,10 +32,13 @@ struct PCMachineState {
>>>>>>>       MemoryRegion hotplug_memory;
>>>>>>>       HotplugHandler *acpi_dev;
>>>>>>> +
>>>>>>> +    uint64_t max_ram_below_4g;
>>>>>>>   };
>>>>>>>   #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
>>>>>>>   #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
>>>>>>> +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
>>>>>>>   /**
>>>>>>>    * PCMachineClass:
>>>>>>> diff --git a/vl.c b/vl.c
>>>>>>> index 5e77a27..cffb9c5 100644
>>>>>>> --- a/vl.c
>>>>>>> +++ b/vl.c
>>>>>>> @@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
>>>>>>>               .name = "kvm-type",
>>>>>>>               .type = QEMU_OPT_STRING,
>>>>>>>               .help = "Specifies the KVM virtualization mode (HV, PR)",
>>>>>>> +        },{
>>>>>>> +            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
>>>>>>> +            .type = QEMU_OPT_SIZE,
>>>>>>> +            .help = "maximum ram below the 4G boundary (32bit boundary)",
>>>>>>>           },
>>>>>>>           { /* End of list */ }
>>>>>>>       },
>>>>>>> -- 
>>>>>>> 1.8.4
>>>
>>>
>>
>
>
Gerd Hoffmann June 10, 2014, 7:36 a.m. UTC | #9
Hi,

> > So instead of default 0, it would be preferable to set the default to the
> > actual value, and let user override it.
> >
> > Or if that's too hard, set max_ram_below_4g instead of setting
> > gigabyte_align. gigabyte_align switches everywhere is messy
> > enough, adding max_ram_below_4g into mix is just too messy.
> >
> 
> I do not see a way to encode the default since for QEMU 2.0 it depends on the specified ram size:
> 
> > This is intentional.
> 
> > If we can fit all ram into low memory, because it is less than 3.5G,
> > we'll do that (pc machine type, q35 numbers are different but logic is
> > the same).  This way 32bit (+non-PAE) guests can continue to have up to
> > 3.5G memory.
> 
> > If we can't fit all ram into low memory (thus the guest should be able
> > to access ram above 4G anyway), then we'll cut off at a gigabyte
> > boundary (3G for pc machine type).  This way our ram is nicely
> > gigabyte-aligned and we can get best performance benefits from huge
> > pages.
> 
> > The size of the pci hole changing in the second case is only a side
> > effect, it's not the main reason for the change.
> 
> > cheers,
> >   Gerd
> 
> So migration of a QEMU 2.0 pc to QEMU 2.1 without gigabyte_align would require the user
> to specify the correct value of max-ram-below-4g.

You are expected to use the same machine type on both ends for live
migration.  That is the whole point why the gigabyte alignment logic is
activated for new machine types only:  Just -M pc-<version> should be
enough to make the machine config and the vmstate wire format compatible
even with different qemu versions on both ends, without requiring the
user manually specifying obscure parameters.

> When you add xen into the mix I do not see a way to get right of gigabyte_align.
> 
> You have 3 cases:
> 
> 1) old xen (without max-ram-below-4g), QEMU 2.1 or later.
>      Will expect that QEMU acts as if max-ram-below-4g=3.75G was specified.
>      I.E. gigabyte_align is ignored. Note: xen 4.4 asks for "pc,accel=xen" in some cases.

IMO xen should use a versioned machine type to make live migration more
reliable.  IIRC this was discussed anyway for other reasons (see
xen-platform-pci discussions, picking pc-i440fx-1.6 IIRC).  That should
also make qemu use the memory layout expected by old xen.

> 3) new xen (with max-ram-below-4g), QEMU 2.1 or later.
>       Expects that max-ram-below-4g works.

Newer xen versions can switch to a newer machine type, once it knows how
to deal with the changes: other memory layout, new config switches,
whatever else might have changed ...

cheers,
  Gerd
Don Slutz June 17, 2014, 5:51 p.m. UTC | #10
On 06/10/14 03:36, Gerd Hoffmann wrote:
>    Hi,
>
>>> So instead of default 0, it would be preferable to set the default to the
>>> actual value, and let user override it.
>>>
>>> Or if that's too hard, set max_ram_below_4g instead of setting
>>> gigabyte_align. gigabyte_align switches everywhere is messy
>>> enough, adding max_ram_below_4g into mix is just too messy.
>>>
>> I do not see a way to encode the default since for QEMU 2.0 it depends on the specified ram size:
>>
>>> This is intentional.
>>> If we can fit all ram into low memory, because it is less than 3.5G,
>>> we'll do that (pc machine type, q35 numbers are different but logic is
>>> the same).  This way 32bit (+non-PAE) guests can continue to have up to
>>> 3.5G memory.
>>> If we can't fit all ram into low memory (thus the guest should be able
>>> to access ram above 4G anyway), then we'll cut off at a gigabyte
>>> boundary (3G for pc machine type).  This way our ram is nicely
>>> gigabyte-aligned and we can get best performance benefits from huge
>>> pages.
>>> The size of the pci hole changing in the second case is only a side
>>> effect, it's not the main reason for the change.
>>> cheers,
>>>    Gerd
>> So migration of a QEMU 2.0 pc to QEMU 2.1 without gigabyte_align would require the user
>> to specify the correct value of max-ram-below-4g.
> You are expected to use the same machine type on both ends for live
> migration.  That is the whole point why the gigabyte alignment logic is
> activated for new machine types only:  Just -M pc-<version> should be
> enough to make the machine config and the vmstate wire format compatible
> even with different qemu versions on both ends, without requiring the
> user manually specifying obscure parameters.
>

I agree.

>> When you add xen into the mix I do not see a way to get right of gigabyte_align.
>>
>> You have 3 cases:
>>
>> 1) old xen (without max-ram-below-4g), QEMU 2.1 or later.
>>       Will expect that QEMU acts as if max-ram-below-4g=3.75G was specified.
>>       I.E. gigabyte_align is ignored. Note: xen 4.4 asks for "pc,accel=xen" in some cases.
> IMO xen should use a versioned machine type to make live migration more
> reliable.  IIRC this was discussed anyway for other reasons (see
> xen-platform-pci discussions, picking pc-i440fx-1.6 IIRC).

That is happening as far as I know and does not directly impact this
patch set.

>    That should
> also make qemu use the memory layout expected by old xen.

Currently xen just ignores the memory layout that QEMU sets up
and does it's own way.  So no, this does not make QEMU use the
memory layout expected by old xen.

Patch #1 (xen-hvm: Fix xen_hvm_init)
is all about letting the rest of QEMU know about the changed
memory layout.  This patch and patch #3 (xen-hvm: Pass is_default
to xen_hvm_init) are in addition to allowing QEMU to have more
memory layouts but also allows xen to have more memory layouts
like a gigabyte aligned one.

>> 3) new xen (with max-ram-below-4g), QEMU 2.1 or later.
>>        Expects that max-ram-below-4g works.
> Newer xen versions can switch to a newer machine type, once it knows how
> to deal with the changes: other memory layout, new config switches,
> whatever else might have changed ...

Yes

    -Don Slutz


> cheers,
>    Gerd
>
>
Michael S. Tsirkin June 17, 2014, 6:22 p.m. UTC | #11
On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> This is a pc & q35 only machine opt.  One use is to allow for more
> ram in a 32bit guest for example:
> 
> -machine pc,max-ram-below-4g=3.75G
>
> If you add enough PCI devices then all mmio for them will not fit
> below 4G which may not be the layout the user wanted. This allows
> you to increase the below 4G address space that PCI devices can use
> (aka decrease ram below 4G) and therefore in more cases not have any
> mmio that is above 4G.
> 
> For example using "-machine pc,max-ram-below-4g=2G" on the command
> line will limit the amount of ram that is below 4G to 2G.

I'm not sure I get it.

All this only has effect if you have >4G RAM, right?
Presumably you then have a 64 bit guest so why does it
care about memory/MMIO being below 4G?


> 
> Signed-off-by: Don Slutz <dslutz@verizon.com>
> ---
> v5:
>   Re-work based on:
> 
>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> 
> 
>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
>  hw/i386/pc_piix.c    | 15 ++++++++++++---
>  hw/i386/pc_q35.c     | 15 ++++++++++++---
>  include/hw/i386/pc.h |  3 +++
>  vl.c                 |  4 ++++
>  5 files changed, 69 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 7cdba10..bccb746 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
>      visit_type_int(v, &value, name, errp);
>  }
>  
> +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> +                                         void *opaque, const char *name,
> +                                         Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    uint64_t value = pcms->max_ram_below_4g;
> +
> +    visit_type_size(v, &value, name, errp);
> +}
> +
> +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> +                                         void *opaque, const char *name,
> +                                         Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    Error *error = NULL;
> +    uint64_t value;
> +
> +    visit_type_size(v, &value, name, &error);
> +    if (error) {
> +        error_propagate(errp, error);
> +        return;
> +    }
> +    if (value > (1ULL << 32)) {
> +        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> +                  "Machine option 'max-ram-below-4g=%"PRIu64
> +                  "' expects size less then or equal to 4G", value);
> +        error_propagate(errp, error);
> +        return;
> +    }
> +
> +    pcms->max_ram_below_4g = value;
> +}
> +
>  static void pc_machine_initfn(Object *obj)
>  {
>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
>                          pc_machine_get_hotplug_memory_region_size,
>                          NULL, NULL, NULL, NULL);
> +    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> +                        pc_machine_get_max_ram_below_4g,
> +                        pc_machine_set_max_ram_below_4g,
> +                        NULL, NULL, NULL);
>  }
>  
>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 40f6eaf..25f4727 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
>      DeviceState *icc_bridge;
>      FWCfgState *fw_cfg = NULL;
>      PcGuestInfo *guest_info;
> +    Object *mo = qdev_get_machine();
> +    PCMachineState *pcms = PC_MACHINE(mo);
> +    ram_addr_t lowmem = 0xe0000000;
> +
> +    if (pcms && pcms->max_ram_below_4g) {
> +        lowmem = pcms->max_ram_below_4g;
> +    }
>  
>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
>       * If it doesn't, we need to split it in chunks below and above 4G.
> @@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
>       * For old machine types, use whatever split we used historically to avoid
>       * breaking migration.
>       */
> -    if (machine->ram_size >= 0xe0000000) {
> -        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> +    if (machine->ram_size >= lowmem) {
> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> +            lowmem = 0xc0000000;
> +        }
>          above_4g_mem_size = machine->ram_size - lowmem;
>          below_4g_mem_size = lowmem;
>      } else {
> @@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
>      }
>  
>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
> +    object_property_add_child(mo, "icc-bridge",
>                                OBJECT(icc_bridge), NULL);
>  
>      pc_cpus_init(machine->cpu_model, icc_bridge);
> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> index e28ce40..155cdf1 100644
> --- a/hw/i386/pc_q35.c
> +++ b/hw/i386/pc_q35.c
> @@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
>      PCIDevice *ahci;
>      DeviceState *icc_bridge;
>      PcGuestInfo *guest_info;
> +    Object *mo = qdev_get_machine();
> +    PCMachineState *pcms = PC_MACHINE(mo);
> +    ram_addr_t lowmem = 0xb0000000;
> +
> +    if (pcms && pcms->max_ram_below_4g) {
> +        lowmem = pcms->max_ram_below_4g;
> +    }
>  
>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> @@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
>       * For old machine types, use whatever split we used historically to avoid
>       * breaking migration.
>       */
> -    if (machine->ram_size >= 0xb0000000) {
> -        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> +    if (machine->ram_size >= lowmem) {
> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> +            lowmem = 0x800000000;
> +        }
>          above_4g_mem_size = machine->ram_size - lowmem;
>          below_4g_mem_size = lowmem;
>      } else {
> @@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
>      }
>  
>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
> +    object_property_add_child(mo, "icc-bridge",
>                                OBJECT(icc_bridge), NULL);
>  
>      pc_cpus_init(machine->cpu_model, icc_bridge);
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 19530bd..2d8b562 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -32,10 +32,13 @@ struct PCMachineState {
>      MemoryRegion hotplug_memory;
>  
>      HotplugHandler *acpi_dev;
> +
> +    uint64_t max_ram_below_4g;
>  };
>  
>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
>  
>  /**
>   * PCMachineClass:
> diff --git a/vl.c b/vl.c
> index 5e77a27..cffb9c5 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
>              .name = "kvm-type",
>              .type = QEMU_OPT_STRING,
>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> +        },{
> +            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> +            .type = QEMU_OPT_SIZE,
> +            .help = "maximum ram below the 4G boundary (32bit boundary)",
>          },
>          { /* End of list */ }
>      },
> -- 
> 1.8.4
Don Slutz June 17, 2014, 6:44 p.m. UTC | #12
On 06/17/14 14:22, Michael S. Tsirkin wrote:
> On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
>> This is a pc & q35 only machine opt.  One use is to allow for more
>> ram in a 32bit guest for example:
>>
>> -machine pc,max-ram-below-4g=3.75G
>>
>> If you add enough PCI devices then all mmio for them will not fit
>> below 4G which may not be the layout the user wanted. This allows
>> you to increase the below 4G address space that PCI devices can use
>> (aka decrease ram below 4G) and therefore in more cases not have any
>> mmio that is above 4G.
>>
>> For example using "-machine pc,max-ram-below-4g=2G" on the command
>> line will limit the amount of ram that is below 4G to 2G.
> I'm not sure I get it.
>
> All this only has effect if you have >4G RAM, right?

Nope.  When it takes effect includes RAM > max-ram-below-4g. I.E.

    -machine pc,max-ram-below-4g=2G -m 3G

Will have 2G in 32bit space (below 4G) and 1G in 64bit space (above 4G).

> Presumably you then have a 64 bit guest so why does it
> care about memory/MMIO being below 4G?

It is not the guest that matters, it is all the PCI devices in use. There are
ones (all old hardware) that only support 32bit addresses.  When using
these you may need more room.

Also pci-passthru of real hardware that is 32bit only may require this.
    -Don Slutz


>
>
>> Signed-off-by: Don Slutz <dslutz@verizon.com>
>> ---
>> v5:
>>    Re-work based on:
>>
>>    https://github.com/imammedo/qemu/commits/memory-hotplug-v11
>>
>>
>>   hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
>>   hw/i386/pc_piix.c    | 15 ++++++++++++---
>>   hw/i386/pc_q35.c     | 15 ++++++++++++---
>>   include/hw/i386/pc.h |  3 +++
>>   vl.c                 |  4 ++++
>>   5 files changed, 69 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 7cdba10..bccb746 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
>>       visit_type_int(v, &value, name, errp);
>>   }
>>   
>> +static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
>> +                                         void *opaque, const char *name,
>> +                                         Error **errp)
>> +{
>> +    PCMachineState *pcms = PC_MACHINE(obj);
>> +    uint64_t value = pcms->max_ram_below_4g;
>> +
>> +    visit_type_size(v, &value, name, errp);
>> +}
>> +
>> +static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
>> +                                         void *opaque, const char *name,
>> +                                         Error **errp)
>> +{
>> +    PCMachineState *pcms = PC_MACHINE(obj);
>> +    Error *error = NULL;
>> +    uint64_t value;
>> +
>> +    visit_type_size(v, &value, name, &error);
>> +    if (error) {
>> +        error_propagate(errp, error);
>> +        return;
>> +    }
>> +    if (value > (1ULL << 32)) {
>> +        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
>> +                  "Machine option 'max-ram-below-4g=%"PRIu64
>> +                  "' expects size less then or equal to 4G", value);
>> +        error_propagate(errp, error);
>> +        return;
>> +    }
>> +
>> +    pcms->max_ram_below_4g = value;
>> +}
>> +
>>   static void pc_machine_initfn(Object *obj)
>>   {
>>       object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
>>                           pc_machine_get_hotplug_memory_region_size,
>>                           NULL, NULL, NULL, NULL);
>> +    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
>> +                        pc_machine_get_max_ram_below_4g,
>> +                        pc_machine_set_max_ram_below_4g,
>> +                        NULL, NULL, NULL);
>>   }
>>   
>>   static void pc_machine_class_init(ObjectClass *oc, void *data)
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 40f6eaf..25f4727 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
>>       DeviceState *icc_bridge;
>>       FWCfgState *fw_cfg = NULL;
>>       PcGuestInfo *guest_info;
>> +    Object *mo = qdev_get_machine();
>> +    PCMachineState *pcms = PC_MACHINE(mo);
>> +    ram_addr_t lowmem = 0xe0000000;
>> +
>> +    if (pcms && pcms->max_ram_below_4g) {
>> +        lowmem = pcms->max_ram_below_4g;
>> +    }
>>   
>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
>>        * If it doesn't, we need to split it in chunks below and above 4G.
>> @@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
>>        * For old machine types, use whatever split we used historically to avoid
>>        * breaking migration.
>>        */
>> -    if (machine->ram_size >= 0xe0000000) {
>> -        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
>> +    if (machine->ram_size >= lowmem) {
>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>> +            lowmem = 0xc0000000;
>> +        }
>>           above_4g_mem_size = machine->ram_size - lowmem;
>>           below_4g_mem_size = lowmem;
>>       } else {
>> @@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
>>       }
>>   
>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>> +    object_property_add_child(mo, "icc-bridge",
>>                                 OBJECT(icc_bridge), NULL);
>>   
>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index e28ce40..155cdf1 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
>>       PCIDevice *ahci;
>>       DeviceState *icc_bridge;
>>       PcGuestInfo *guest_info;
>> +    Object *mo = qdev_get_machine();
>> +    PCMachineState *pcms = PC_MACHINE(mo);
>> +    ram_addr_t lowmem = 0xb0000000;
>> +
>> +    if (pcms && pcms->max_ram_below_4g) {
>> +        lowmem = pcms->max_ram_below_4g;
>> +    }
>>   
>>       /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
>>        * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
>> @@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
>>        * For old machine types, use whatever split we used historically to avoid
>>        * breaking migration.
>>        */
>> -    if (machine->ram_size >= 0xb0000000) {
>> -        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
>> +    if (machine->ram_size >= lowmem) {
>> +        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
>> +            lowmem = 0x800000000;
>> +        }
>>           above_4g_mem_size = machine->ram_size - lowmem;
>>           below_4g_mem_size = lowmem;
>>       } else {
>> @@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
>>       }
>>   
>>       icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
>> -    object_property_add_child(qdev_get_machine(), "icc-bridge",
>> +    object_property_add_child(mo, "icc-bridge",
>>                                 OBJECT(icc_bridge), NULL);
>>   
>>       pc_cpus_init(machine->cpu_model, icc_bridge);
>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>> index 19530bd..2d8b562 100644
>> --- a/include/hw/i386/pc.h
>> +++ b/include/hw/i386/pc.h
>> @@ -32,10 +32,13 @@ struct PCMachineState {
>>       MemoryRegion hotplug_memory;
>>   
>>       HotplugHandler *acpi_dev;
>> +
>> +    uint64_t max_ram_below_4g;
>>   };
>>   
>>   #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
>>   #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
>> +#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
>>   
>>   /**
>>    * PCMachineClass:
>> diff --git a/vl.c b/vl.c
>> index 5e77a27..cffb9c5 100644
>> --- a/vl.c
>> +++ b/vl.c
>> @@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
>>               .name = "kvm-type",
>>               .type = QEMU_OPT_STRING,
>>               .help = "Specifies the KVM virtualization mode (HV, PR)",
>> +        },{
>> +            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
>> +            .type = QEMU_OPT_SIZE,
>> +            .help = "maximum ram below the 4G boundary (32bit boundary)",
>>           },
>>           { /* End of list */ }
>>       },
>> -- 
>> 1.8.4
Michael S. Tsirkin June 17, 2014, 7:43 p.m. UTC | #13
On Tue, Jun 17, 2014 at 02:44:41PM -0400, Don Slutz wrote:
> On 06/17/14 14:22, Michael S. Tsirkin wrote:
> >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> >>This is a pc & q35 only machine opt.  One use is to allow for more
> >>ram in a 32bit guest for example:
> >>
> >>-machine pc,max-ram-below-4g=3.75G
> >>
> >>If you add enough PCI devices then all mmio for them will not fit
> >>below 4G which may not be the layout the user wanted. This allows
> >>you to increase the below 4G address space that PCI devices can use
> >>(aka decrease ram below 4G) and therefore in more cases not have any
> >>mmio that is above 4G.
> >>
> >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> >>line will limit the amount of ram that is below 4G to 2G.
> >I'm not sure I get it.
> >
> >All this only has effect if you have >4G RAM, right?
> 
> Nope.  When it takes effect includes RAM > max-ram-below-4g. I.E.
> 
>    -machine pc,max-ram-below-4g=2G -m 3G
> 
> Will have 2G in 32bit space (below 4G) and 1G in 64bit space (above 4G).
> 
> >Presumably you then have a 64 bit guest so why does it
> >care about memory/MMIO being below 4G?
> 
> It is not the guest that matters, it is all the PCI devices in use. There are
> ones (all old hardware) that only support 32bit addresses.

Emulated devices? Let's just teach them to support 64 bit BARs.
Looks like a nicer solution than asking user to make
this decision.

>  When using
> these you may need more room.
> 
> Also pci-passthru of real hardware that is 32bit only may require this.
>    -Don Slutz

Guest and host BARs are unrelated so it seems we could allow 64 bit BARs
even when they are 32 bit on host.

Though PCI Express spec requires 64 bit BARs since version 1.0,
are there really many devices like this out there?
Care giving examples?
They won't well on physical systems either.

> 
> >
> >
> >>Signed-off-by: Don Slutz <dslutz@verizon.com>
> >>---
> >>v5:
> >>   Re-work based on:
> >>
> >>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> >>
> >>
> >>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
> >>  hw/i386/pc_piix.c    | 15 ++++++++++++---
> >>  hw/i386/pc_q35.c     | 15 ++++++++++++---
> >>  include/hw/i386/pc.h |  3 +++
> >>  vl.c                 |  4 ++++
> >>  5 files changed, 69 insertions(+), 6 deletions(-)
> >>
> >>diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> >>index 7cdba10..bccb746 100644
> >>--- a/hw/i386/pc.c
> >>+++ b/hw/i386/pc.c
> >>@@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
> >>      visit_type_int(v, &value, name, errp);
> >>  }
> >>+static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> >>+                                         void *opaque, const char *name,
> >>+                                         Error **errp)
> >>+{
> >>+    PCMachineState *pcms = PC_MACHINE(obj);
> >>+    uint64_t value = pcms->max_ram_below_4g;
> >>+
> >>+    visit_type_size(v, &value, name, errp);
> >>+}
> >>+
> >>+static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> >>+                                         void *opaque, const char *name,
> >>+                                         Error **errp)
> >>+{
> >>+    PCMachineState *pcms = PC_MACHINE(obj);
> >>+    Error *error = NULL;
> >>+    uint64_t value;
> >>+
> >>+    visit_type_size(v, &value, name, &error);
> >>+    if (error) {
> >>+        error_propagate(errp, error);
> >>+        return;
> >>+    }
> >>+    if (value > (1ULL << 32)) {
> >>+        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> >>+                  "Machine option 'max-ram-below-4g=%"PRIu64
> >>+                  "' expects size less then or equal to 4G", value);
> >>+        error_propagate(errp, error);
> >>+        return;
> >>+    }
> >>+
> >>+    pcms->max_ram_below_4g = value;
> >>+}
> >>+
> >>  static void pc_machine_initfn(Object *obj)
> >>  {
> >>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
> >>                          pc_machine_get_hotplug_memory_region_size,
> >>                          NULL, NULL, NULL, NULL);
> >>+    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> >>+                        pc_machine_get_max_ram_below_4g,
> >>+                        pc_machine_set_max_ram_below_4g,
> >>+                        NULL, NULL, NULL);
> >>  }
> >>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> >>diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> >>index 40f6eaf..25f4727 100644
> >>--- a/hw/i386/pc_piix.c
> >>+++ b/hw/i386/pc_piix.c
> >>@@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
> >>      DeviceState *icc_bridge;
> >>      FWCfgState *fw_cfg = NULL;
> >>      PcGuestInfo *guest_info;
> >>+    Object *mo = qdev_get_machine();
> >>+    PCMachineState *pcms = PC_MACHINE(mo);
> >>+    ram_addr_t lowmem = 0xe0000000;
> >>+
> >>+    if (pcms && pcms->max_ram_below_4g) {
> >>+        lowmem = pcms->max_ram_below_4g;
> >>+    }
> >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> >>       * If it doesn't, we need to split it in chunks below and above 4G.
> >>@@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
> >>       * For old machine types, use whatever split we used historically to avoid
> >>       * breaking migration.
> >>       */
> >>-    if (machine->ram_size >= 0xe0000000) {
> >>-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> >>+    if (machine->ram_size >= lowmem) {
> >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> >>+            lowmem = 0xc0000000;
> >>+        }
> >>          above_4g_mem_size = machine->ram_size - lowmem;
> >>          below_4g_mem_size = lowmem;
> >>      } else {
> >>@@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
> >>      }
> >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> >>+    object_property_add_child(mo, "icc-bridge",
> >>                                OBJECT(icc_bridge), NULL);
> >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> >>diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> >>index e28ce40..155cdf1 100644
> >>--- a/hw/i386/pc_q35.c
> >>+++ b/hw/i386/pc_q35.c
> >>@@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
> >>      PCIDevice *ahci;
> >>      DeviceState *icc_bridge;
> >>      PcGuestInfo *guest_info;
> >>+    Object *mo = qdev_get_machine();
> >>+    PCMachineState *pcms = PC_MACHINE(mo);
> >>+    ram_addr_t lowmem = 0xb0000000;
> >>+
> >>+    if (pcms && pcms->max_ram_below_4g) {
> >>+        lowmem = pcms->max_ram_below_4g;
> >>+    }
> >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
> >>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> >>@@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
> >>       * For old machine types, use whatever split we used historically to avoid
> >>       * breaking migration.
> >>       */
> >>-    if (machine->ram_size >= 0xb0000000) {
> >>-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> >>+    if (machine->ram_size >= lowmem) {
> >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> >>+            lowmem = 0x800000000;
> >>+        }
> >>          above_4g_mem_size = machine->ram_size - lowmem;
> >>          below_4g_mem_size = lowmem;
> >>      } else {
> >>@@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
> >>      }
> >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> >>+    object_property_add_child(mo, "icc-bridge",
> >>                                OBJECT(icc_bridge), NULL);
> >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> >>diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> >>index 19530bd..2d8b562 100644
> >>--- a/include/hw/i386/pc.h
> >>+++ b/include/hw/i386/pc.h
> >>@@ -32,10 +32,13 @@ struct PCMachineState {
> >>      MemoryRegion hotplug_memory;
> >>      HotplugHandler *acpi_dev;
> >>+
> >>+    uint64_t max_ram_below_4g;
> >>  };
> >>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
> >>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> >>+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
> >>  /**
> >>   * PCMachineClass:
> >>diff --git a/vl.c b/vl.c
> >>index 5e77a27..cffb9c5 100644
> >>--- a/vl.c
> >>+++ b/vl.c
> >>@@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
> >>              .name = "kvm-type",
> >>              .type = QEMU_OPT_STRING,
> >>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> >>+        },{
> >>+            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> >>+            .type = QEMU_OPT_SIZE,
> >>+            .help = "maximum ram below the 4G boundary (32bit boundary)",
> >>          },
> >>          { /* End of list */ }
> >>      },
> >>-- 
> >>1.8.4
Michael S. Tsirkin June 17, 2014, 8:05 p.m. UTC | #14
On Tue, Jun 17, 2014 at 10:43:29PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 17, 2014 at 02:44:41PM -0400, Don Slutz wrote:
> > On 06/17/14 14:22, Michael S. Tsirkin wrote:
> > >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> > >>This is a pc & q35 only machine opt.  One use is to allow for more
> > >>ram in a 32bit guest for example:
> > >>
> > >>-machine pc,max-ram-below-4g=3.75G
> > >>
> > >>If you add enough PCI devices then all mmio for them will not fit
> > >>below 4G which may not be the layout the user wanted. This allows
> > >>you to increase the below 4G address space that PCI devices can use
> > >>(aka decrease ram below 4G) and therefore in more cases not have any
> > >>mmio that is above 4G.
> > >>
> > >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> > >>line will limit the amount of ram that is below 4G to 2G.
> > >I'm not sure I get it.
> > >
> > >All this only has effect if you have >4G RAM, right?
> > 
> > Nope.  When it takes effect includes RAM > max-ram-below-4g. I.E.
> > 
> >    -machine pc,max-ram-below-4g=2G -m 3G
> > 
> > Will have 2G in 32bit space (below 4G) and 1G in 64bit space (above 4G).
> > 
> > >Presumably you then have a 64 bit guest so why does it
> > >care about memory/MMIO being below 4G?
> > 
> > It is not the guest that matters, it is all the PCI devices in use. There are
> > ones (all old hardware) that only support 32bit addresses.
> 
> Emulated devices? Let's just teach them to support 64 bit BARs.
> Looks like a nicer solution than asking user to make
> this decision.
> 
> >  When using
> > these you may need more room.
> > 
> > Also pci-passthru of real hardware that is 32bit only may require this.
> >    -Don Slutz
> 
> Guest and host BARs are unrelated so it seems we could allow 64 bit BARs
> even when they are 32 bit on host.
> 
> Though PCI Express spec requires 64 bit BARs since version 1.0,
> are there really many devices like this out there?
> Care giving examples?
> They won't well on physical systems either.


Hmm if we are using a pci to pci bridge and bar is
non prefetcheable then that limits us to 32 bit.

So I'd like to hear a bit more about the usecase,
but it's sounding kind of reasonable.

Let's go with what I suggested in 20140617185426.GC15610@redhat.com
then:

	- user sets an upper limit
	- we do min(qemu limit, user limit)

probably also warn if user-supplied limit causes
1g alignment issues.


> > 
> > >
> > >
> > >>Signed-off-by: Don Slutz <dslutz@verizon.com>
> > >>---
> > >>v5:
> > >>   Re-work based on:
> > >>
> > >>   https://github.com/imammedo/qemu/commits/memory-hotplug-v11
> > >>
> > >>
> > >>  hw/i386/pc.c         | 38 ++++++++++++++++++++++++++++++++++++++
> > >>  hw/i386/pc_piix.c    | 15 ++++++++++++---
> > >>  hw/i386/pc_q35.c     | 15 ++++++++++++---
> > >>  include/hw/i386/pc.h |  3 +++
> > >>  vl.c                 |  4 ++++
> > >>  5 files changed, 69 insertions(+), 6 deletions(-)
> > >>
> > >>diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > >>index 7cdba10..bccb746 100644
> > >>--- a/hw/i386/pc.c
> > >>+++ b/hw/i386/pc.c
> > >>@@ -1644,11 +1644,49 @@ pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
> > >>      visit_type_int(v, &value, name, errp);
> > >>  }
> > >>+static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
> > >>+                                         void *opaque, const char *name,
> > >>+                                         Error **errp)
> > >>+{
> > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > >>+    uint64_t value = pcms->max_ram_below_4g;
> > >>+
> > >>+    visit_type_size(v, &value, name, errp);
> > >>+}
> > >>+
> > >>+static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
> > >>+                                         void *opaque, const char *name,
> > >>+                                         Error **errp)
> > >>+{
> > >>+    PCMachineState *pcms = PC_MACHINE(obj);
> > >>+    Error *error = NULL;
> > >>+    uint64_t value;
> > >>+
> > >>+    visit_type_size(v, &value, name, &error);
> > >>+    if (error) {
> > >>+        error_propagate(errp, error);
> > >>+        return;
> > >>+    }
> > >>+    if (value > (1ULL << 32)) {
> > >>+        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
> > >>+                  "Machine option 'max-ram-below-4g=%"PRIu64
> > >>+                  "' expects size less then or equal to 4G", value);
> > >>+        error_propagate(errp, error);
> > >>+        return;
> > >>+    }
> > >>+
> > >>+    pcms->max_ram_below_4g = value;
> > >>+}
> > >>+
> > >>  static void pc_machine_initfn(Object *obj)
> > >>  {
> > >>      object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
> > >>                          pc_machine_get_hotplug_memory_region_size,
> > >>                          NULL, NULL, NULL, NULL);
> > >>+    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
> > >>+                        pc_machine_get_max_ram_below_4g,
> > >>+                        pc_machine_set_max_ram_below_4g,
> > >>+                        NULL, NULL, NULL);
> > >>  }
> > >>  static void pc_machine_class_init(ObjectClass *oc, void *data)
> > >>diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> > >>index 40f6eaf..25f4727 100644
> > >>--- a/hw/i386/pc_piix.c
> > >>+++ b/hw/i386/pc_piix.c
> > >>@@ -98,6 +98,13 @@ static void pc_init1(MachineState *machine,
> > >>      DeviceState *icc_bridge;
> > >>      FWCfgState *fw_cfg = NULL;
> > >>      PcGuestInfo *guest_info;
> > >>+    Object *mo = qdev_get_machine();
> > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > >>+    ram_addr_t lowmem = 0xe0000000;
> > >>+
> > >>+    if (pcms && pcms->max_ram_below_4g) {
> > >>+        lowmem = pcms->max_ram_below_4g;
> > >>+    }
> > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
> > >>       * If it doesn't, we need to split it in chunks below and above 4G.
> > >>@@ -106,8 +113,10 @@ static void pc_init1(MachineState *machine,
> > >>       * For old machine types, use whatever split we used historically to avoid
> > >>       * breaking migration.
> > >>       */
> > >>-    if (machine->ram_size >= 0xe0000000) {
> > >>-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
> > >>+    if (machine->ram_size >= lowmem) {
> > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > >>+            lowmem = 0xc0000000;
> > >>+        }
> > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > >>          below_4g_mem_size = lowmem;
> > >>      } else {
> > >>@@ -122,7 +131,7 @@ static void pc_init1(MachineState *machine,
> > >>      }
> > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > >>+    object_property_add_child(mo, "icc-bridge",
> > >>                                OBJECT(icc_bridge), NULL);
> > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > >>diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> > >>index e28ce40..155cdf1 100644
> > >>--- a/hw/i386/pc_q35.c
> > >>+++ b/hw/i386/pc_q35.c
> > >>@@ -85,6 +85,13 @@ static void pc_q35_init(MachineState *machine)
> > >>      PCIDevice *ahci;
> > >>      DeviceState *icc_bridge;
> > >>      PcGuestInfo *guest_info;
> > >>+    Object *mo = qdev_get_machine();
> > >>+    PCMachineState *pcms = PC_MACHINE(mo);
> > >>+    ram_addr_t lowmem = 0xb0000000;
> > >>+
> > >>+    if (pcms && pcms->max_ram_below_4g) {
> > >>+        lowmem = pcms->max_ram_below_4g;
> > >>+    }
> > >>      /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
> > >>       * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
> > >>@@ -95,8 +102,10 @@ static void pc_q35_init(MachineState *machine)
> > >>       * For old machine types, use whatever split we used historically to avoid
> > >>       * breaking migration.
> > >>       */
> > >>-    if (machine->ram_size >= 0xb0000000) {
> > >>-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
> > >>+    if (machine->ram_size >= lowmem) {
> > >>+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
> > >>+            lowmem = 0x800000000;
> > >>+        }
> > >>          above_4g_mem_size = machine->ram_size - lowmem;
> > >>          below_4g_mem_size = lowmem;
> > >>      } else {
> > >>@@ -111,7 +120,7 @@ static void pc_q35_init(MachineState *machine)
> > >>      }
> > >>      icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
> > >>-    object_property_add_child(qdev_get_machine(), "icc-bridge",
> > >>+    object_property_add_child(mo, "icc-bridge",
> > >>                                OBJECT(icc_bridge), NULL);
> > >>      pc_cpus_init(machine->cpu_model, icc_bridge);
> > >>diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> > >>index 19530bd..2d8b562 100644
> > >>--- a/include/hw/i386/pc.h
> > >>+++ b/include/hw/i386/pc.h
> > >>@@ -32,10 +32,13 @@ struct PCMachineState {
> > >>      MemoryRegion hotplug_memory;
> > >>      HotplugHandler *acpi_dev;
> > >>+
> > >>+    uint64_t max_ram_below_4g;
> > >>  };
> > >>  #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
> > >>  #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
> > >>+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
> > >>  /**
> > >>   * PCMachineClass:
> > >>diff --git a/vl.c b/vl.c
> > >>index 5e77a27..cffb9c5 100644
> > >>--- a/vl.c
> > >>+++ b/vl.c
> > >>@@ -382,6 +382,10 @@ static QemuOptsList qemu_machine_opts = {
> > >>              .name = "kvm-type",
> > >>              .type = QEMU_OPT_STRING,
> > >>              .help = "Specifies the KVM virtualization mode (HV, PR)",
> > >>+        },{
> > >>+            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
> > >>+            .type = QEMU_OPT_SIZE,
> > >>+            .help = "maximum ram below the 4G boundary (32bit boundary)",
> > >>          },
> > >>          { /* End of list */ }
> > >>      },
> > >>-- 
> > >>1.8.4
Konrad Rzeszutek Wilk June 17, 2014, 8:08 p.m. UTC | #15
On Tue, Jun 17, 2014 at 10:43:29PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 17, 2014 at 02:44:41PM -0400, Don Slutz wrote:
> > On 06/17/14 14:22, Michael S. Tsirkin wrote:
> > >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> > >>This is a pc & q35 only machine opt.  One use is to allow for more
> > >>ram in a 32bit guest for example:
> > >>
> > >>-machine pc,max-ram-below-4g=3.75G
> > >>
> > >>If you add enough PCI devices then all mmio for them will not fit
> > >>below 4G which may not be the layout the user wanted. This allows
> > >>you to increase the below 4G address space that PCI devices can use
> > >>(aka decrease ram below 4G) and therefore in more cases not have any
> > >>mmio that is above 4G.
> > >>
> > >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> > >>line will limit the amount of ram that is below 4G to 2G.
> > >I'm not sure I get it.
> > >
> > >All this only has effect if you have >4G RAM, right?
> > 
> > Nope.  When it takes effect includes RAM > max-ram-below-4g. I.E.
> > 
> >    -machine pc,max-ram-below-4g=2G -m 3G
> > 
> > Will have 2G in 32bit space (below 4G) and 1G in 64bit space (above 4G).
> > 
> > >Presumably you then have a 64 bit guest so why does it
> > >care about memory/MMIO being below 4G?
> > 
> > It is not the guest that matters, it is all the PCI devices in use. There are
> > ones (all old hardware) that only support 32bit addresses.
> 
> Emulated devices? Let's just teach them to support 64 bit BARs.
> Looks like a nicer solution than asking user to make
> this decision.

I presume real normal PCI devices.
> 
> >  When using
> > these you may need more room.
> > 
> > Also pci-passthru of real hardware that is 32bit only may require this.
> >    -Don Slutz
> 
> Guest and host BARs are unrelated so it seems we could allow 64 bit BARs
> even when they are 32 bit on host.

That assumes that the devices are OK with having their BARs remmaped.
That is not always the case and some of them might not like that.

Also OSes might want to only use 32-bit BARs as that is what they
had been written for. I presume since Don is working for a telecom
that this might be some ancient networking hardware with super-fast
BSD OS. 
> 
> Though PCI Express spec requires 64 bit BARs since version 1.0,
> are there really many devices like this out there?
> Care giving examples?

I think he mentioned PCI devices, not PCIe.
Michael S. Tsirkin June 17, 2014, 8:17 p.m. UTC | #16
On Tue, Jun 17, 2014 at 04:08:54PM -0400, Konrad Rzeszutek Wilk wrote:
> On Tue, Jun 17, 2014 at 10:43:29PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Jun 17, 2014 at 02:44:41PM -0400, Don Slutz wrote:
> > > On 06/17/14 14:22, Michael S. Tsirkin wrote:
> > > >On Fri, Jun 06, 2014 at 01:52:05PM -0400, Don Slutz wrote:
> > > >>This is a pc & q35 only machine opt.  One use is to allow for more
> > > >>ram in a 32bit guest for example:
> > > >>
> > > >>-machine pc,max-ram-below-4g=3.75G
> > > >>
> > > >>If you add enough PCI devices then all mmio for them will not fit
> > > >>below 4G which may not be the layout the user wanted. This allows
> > > >>you to increase the below 4G address space that PCI devices can use
> > > >>(aka decrease ram below 4G) and therefore in more cases not have any
> > > >>mmio that is above 4G.
> > > >>
> > > >>For example using "-machine pc,max-ram-below-4g=2G" on the command
> > > >>line will limit the amount of ram that is below 4G to 2G.
> > > >I'm not sure I get it.
> > > >
> > > >All this only has effect if you have >4G RAM, right?
> > > 
> > > Nope.  When it takes effect includes RAM > max-ram-below-4g. I.E.
> > > 
> > >    -machine pc,max-ram-below-4g=2G -m 3G
> > > 
> > > Will have 2G in 32bit space (below 4G) and 1G in 64bit space (above 4G).
> > > 
> > > >Presumably you then have a 64 bit guest so why does it
> > > >care about memory/MMIO being below 4G?
> > > 
> > > It is not the guest that matters, it is all the PCI devices in use. There are
> > > ones (all old hardware) that only support 32bit addresses.
> > 
> > Emulated devices? Let's just teach them to support 64 bit BARs.
> > Looks like a nicer solution than asking user to make
> > this decision.
> 
> I presume real normal PCI devices.
> > 
> > >  When using
> > > these you may need more room.
> > > 
> > > Also pci-passthru of real hardware that is 32bit only may require this.
> > >    -Don Slutz
> > 
> > Guest and host BARs are unrelated so it seems we could allow 64 bit BARs
> > even when they are 32 bit on host.
> 
> That assumes that the devices are OK with having their BARs remmaped.
> That is not always the case and some of them might not like that.
> 
> Also OSes might want to only use 32-bit BARs as that is what they
> had been written for. I presume since Don is working for a telecom
> that this might be some ancient networking hardware with super-fast
> BSD OS. 
> > 
> > Though PCI Express spec requires 64 bit BARs since version 1.0,
> > are there really many devices like this out there?
> > Care giving examples?
> 
> I think he mentioned PCI devices, not PCIe.

OK that's a small niche but it makes some sense.
So let's focus on this case, it can be addressed
cleanly:

	default: max-ram-below-4g=4g
	pc:
		low = MIN(low, max-ram-below-4g)

no special handling for any corner cases.

For bonus points detect and warn if value chosen causes
alignment issues.
Pasi Kärkkäinen June 17, 2014, 8:35 p.m. UTC | #17
On Tue, Jun 17, 2014 at 04:08:54PM -0400, Konrad Rzeszutek Wilk wrote:
> On Tue, Jun 17, 2014 at 10:43:29PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Jun 17, 2014 at 02:44:41PM -0400, Don Slutz wrote:
> > > 
> > > >Presumably you then have a 64 bit guest so why does it
> > > >care about memory/MMIO being below 4G?
> > > 
> > > It is not the guest that matters, it is all the PCI devices in use. There are
> > > ones (all old hardware) that only support 32bit addresses.
> > 
> > Emulated devices? Let's just teach them to support 64 bit BARs.
> > Looks like a nicer solution than asking user to make
> > this decision.
> 
> I presume real normal PCI devices.
>

Yeah, I believe this thread is about enabling/fixing PCI passthru of (existing old legacy) physical PCI devices,
so there's no way to "teach" them to support 64b BARs..


> > 
> > >  When using
> > > these you may need more room.
> > > 
> > > Also pci-passthru of real hardware that is 32bit only may require this.
> > >    -Don Slutz
> > 


-- Pasi
Gerd Hoffmann June 18, 2014, 9:52 a.m. UTC | #18
Hi,

> > IMO xen should use a versioned machine type to make live migration more
> > reliable.  IIRC this was discussed anyway for other reasons (see
> > xen-platform-pci discussions, picking pc-i440fx-1.6 IIRC).
> 
> That is happening as far as I know and does not directly impact this
> patch set.
> 
> >    That should
> > also make qemu use the memory layout expected by old xen.
> 
> Currently xen just ignores the memory layout that QEMU sets up
> and does it's own way.  So no, this does not make QEMU use the
> memory layout expected by old xen.

Why?  "-M pc-i440fx-1.6" will make qemu use the same memory layout it
used to have before gbalign support was added.  And that should match
what old xen versions are setting up ...

cheers,
  Gerd
Don Slutz June 18, 2014, 1:28 p.m. UTC | #19
On 06/17/14 16:17, Michael S. Tsirkin wrote:
> On Tue, Jun 17, 2014 at 04:08:54PM -0400, Konrad Rzeszutek Wilk wrote:
>> On Tue, Jun 17, 2014 at 10:43:29PM +0300, Michael S. Tsirkin wrote:
[snip]
>> I think he mentioned PCI devices, not PCIe.
> OK that's a small niche but it makes some sense.
> So let's focus on this case, it can be addressed
> cleanly:
>
> 	default: max-ram-below-4g=4g
> 	pc:
> 		low = MIN(low, max-ram-below-4g)
>
> no special handling for any corner cases.
>
> For bonus points detect and warn if value chosen causes
> alignment issues.
>

v7 of this is being tested.  Expect to post today.

    -Don Slutz
diff mbox

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7cdba10..bccb746 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1644,11 +1644,49 @@  pc_machine_get_hotplug_memory_region_size(Object *obj, Visitor *v, void *opaque,
     visit_type_int(v, &value, name, errp);
 }
 
+static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v,
+                                         void *opaque, const char *name,
+                                         Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    uint64_t value = pcms->max_ram_below_4g;
+
+    visit_type_size(v, &value, name, errp);
+}
+
+static void pc_machine_set_max_ram_below_4g(Object *obj, Visitor *v,
+                                         void *opaque, const char *name,
+                                         Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    Error *error = NULL;
+    uint64_t value;
+
+    visit_type_size(v, &value, name, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+    if (value > (1ULL << 32)) {
+        error_set(&error, ERROR_CLASS_GENERIC_ERROR,
+                  "Machine option 'max-ram-below-4g=%"PRIu64
+                  "' expects size less then or equal to 4G", value);
+        error_propagate(errp, error);
+        return;
+    }
+
+    pcms->max_ram_below_4g = value;
+}
+
 static void pc_machine_initfn(Object *obj)
 {
     object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
                         pc_machine_get_hotplug_memory_region_size,
                         NULL, NULL, NULL, NULL);
+    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G,  "size",
+                        pc_machine_get_max_ram_below_4g,
+                        pc_machine_set_max_ram_below_4g,
+                        NULL, NULL, NULL);
 }
 
 static void pc_machine_class_init(ObjectClass *oc, void *data)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 40f6eaf..25f4727 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -98,6 +98,13 @@  static void pc_init1(MachineState *machine,
     DeviceState *icc_bridge;
     FWCfgState *fw_cfg = NULL;
     PcGuestInfo *guest_info;
+    Object *mo = qdev_get_machine();
+    PCMachineState *pcms = PC_MACHINE(mo);
+    ram_addr_t lowmem = 0xe0000000;
+
+    if (pcms && pcms->max_ram_below_4g) {
+        lowmem = pcms->max_ram_below_4g;
+    }
 
     /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory).
      * If it doesn't, we need to split it in chunks below and above 4G.
@@ -106,8 +113,10 @@  static void pc_init1(MachineState *machine,
      * For old machine types, use whatever split we used historically to avoid
      * breaking migration.
      */
-    if (machine->ram_size >= 0xe0000000) {
-        ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000;
+    if (machine->ram_size >= lowmem) {
+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
+            lowmem = 0xc0000000;
+        }
         above_4g_mem_size = machine->ram_size - lowmem;
         below_4g_mem_size = lowmem;
     } else {
@@ -122,7 +131,7 @@  static void pc_init1(MachineState *machine,
     }
 
     icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
-    object_property_add_child(qdev_get_machine(), "icc-bridge",
+    object_property_add_child(mo, "icc-bridge",
                               OBJECT(icc_bridge), NULL);
 
     pc_cpus_init(machine->cpu_model, icc_bridge);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index e28ce40..155cdf1 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -85,6 +85,13 @@  static void pc_q35_init(MachineState *machine)
     PCIDevice *ahci;
     DeviceState *icc_bridge;
     PcGuestInfo *guest_info;
+    Object *mo = qdev_get_machine();
+    PCMachineState *pcms = PC_MACHINE(mo);
+    ram_addr_t lowmem = 0xb0000000;
+
+    if (pcms && pcms->max_ram_below_4g) {
+        lowmem = pcms->max_ram_below_4g;
+    }
 
     /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
      * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
@@ -95,8 +102,10 @@  static void pc_q35_init(MachineState *machine)
      * For old machine types, use whatever split we used historically to avoid
      * breaking migration.
      */
-    if (machine->ram_size >= 0xb0000000) {
-        ram_addr_t lowmem = gigabyte_align ? 0x80000000 : 0xb0000000;
+    if (machine->ram_size >= lowmem) {
+        if (!(pcms && pcms->max_ram_below_4g) && gigabyte_align) {
+            lowmem = 0x800000000;
+        }
         above_4g_mem_size = machine->ram_size - lowmem;
         below_4g_mem_size = lowmem;
     } else {
@@ -111,7 +120,7 @@  static void pc_q35_init(MachineState *machine)
     }
 
     icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
-    object_property_add_child(qdev_get_machine(), "icc-bridge",
+    object_property_add_child(mo, "icc-bridge",
                               OBJECT(icc_bridge), NULL);
 
     pc_cpus_init(machine->cpu_model, icc_bridge);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 19530bd..2d8b562 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -32,10 +32,13 @@  struct PCMachineState {
     MemoryRegion hotplug_memory;
 
     HotplugHandler *acpi_dev;
+
+    uint64_t max_ram_below_4g;
 };
 
 #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device"
 #define PC_MACHINE_MEMHP_REGION_SIZE "hotplug-memory-region-size"
+#define PC_MACHINE_MAX_RAM_BELOW_4G "max-ram-below-4g"
 
 /**
  * PCMachineClass:
diff --git a/vl.c b/vl.c
index 5e77a27..cffb9c5 100644
--- a/vl.c
+++ b/vl.c
@@ -382,6 +382,10 @@  static QemuOptsList qemu_machine_opts = {
             .name = "kvm-type",
             .type = QEMU_OPT_STRING,
             .help = "Specifies the KVM virtualization mode (HV, PR)",
+        },{
+            .name = PC_MACHINE_MAX_RAM_BELOW_4G,
+            .type = QEMU_OPT_SIZE,
+            .help = "maximum ram below the 4G boundary (32bit boundary)",
         },
         { /* End of list */ }
     },