diff mbox

i386: pc: align gpa<->hpa on 1GB boundary (v3)

Message ID 20131106015543.GA20766@amt.cnet
State New
Headers show

Commit Message

Marcelo Tosatti Nov. 6, 2013, 1:55 a.m. UTC
v2: condition enablement of new mapping to new machine types (Paolo)
v3: fix changelog

-----


Align guest physical address and host physical address
beyond guest 4GB on a 1GB boundary.

Otherwise 1GB TLBs cannot be cached for the range.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Comments

Igor Mammedov Nov. 6, 2013, 11:59 a.m. UTC | #1
On Tue, 5 Nov 2013 23:55:43 -0200
Marcelo Tosatti <mtosatti@redhat.com> wrote:

> 
> 
> v2: condition enablement of new mapping to new machine types (Paolo)
> v3: fix changelog
> 
> -----
> 
> 
> Align guest physical address and host physical address
> beyond guest 4GB on a 1GB boundary.
> 
> Otherwise 1GB TLBs cannot be cached for the range.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 0c313fe..534e067 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1116,7 +1116,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
[...]
> +        /*
> +         *
> +         * If 1GB hugepages are used to back guest RAM, map guest address
> +         * space in the range [ramsize,ramsize+holesize] to the ram block
> +         * range [holestart, 4GB]
> +         *
> +         *                      0      h     4G     [ramsize,ramsize+holesize]
> +         *
> +         * guest-addr-space     [      ]     [      ][xxx]
> +         *                                /----------/
> +         * contiguous-ram-block [      ][xxx][     ]
> +         *
> +         * So that memory beyond 4GB is aligned on a 1GB boundary,
> +         * at the host physical address space.
> +         *
> +         */
> +        if (guest_info->gb_align) {
'gb_align' is one shot usage, it would be better to just add it as an argument
to pc_memory_init(). That would allow to avoid extending PcGuestInfo needlessly,
since gb_align isn't reused.

> +            unsigned long holesize = 0x100000000ULL - below_4g_mem_size;
> +
> +            memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
> +                                    0x100000000ULL,
> +                                    above_4g_mem_size - holesize);
> +            memory_region_add_subregion(system_memory, 0x100000000ULL,
>                                      ram_above_4g);
[...]
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 6083839..00afe4a 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -20,6 +20,7 @@ typedef struct PcPciInfo {
>  struct PcGuestInfo {
>      bool has_pci_info;
>      bool isapc_ram_fw;
> +    bool gb_align;
>      FWCfgState *fw_cfg;
>  };
it doesn't apply anymore.
Paolo Bonzini Nov. 6, 2013, 12:07 p.m. UTC | #2
Il 06/11/2013 12:59, Igor Mammedov ha scritto:
> On Tue, 5 Nov 2013 23:55:43 -0200
> Marcelo Tosatti <mtosatti@redhat.com> wrote:
> 
>>
>>
>> v2: condition enablement of new mapping to new machine types (Paolo)
>> v3: fix changelog
>>
>> -----
>>
>>
>> Align guest physical address and host physical address
>> beyond guest 4GB on a 1GB boundary.
>>
>> Otherwise 1GB TLBs cannot be cached for the range.
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 0c313fe..534e067 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -1116,7 +1116,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
> [...]
>> +        /*
>> +         *
>> +         * If 1GB hugepages are used to back guest RAM, map guest address
>> +         * space in the range [ramsize,ramsize+holesize] to the ram block
>> +         * range [holestart, 4GB]
>> +         *
>> +         *                      0      h     4G     [ramsize,ramsize+holesize]
>> +         *
>> +         * guest-addr-space     [      ]     [      ][xxx]
>> +         *                                /----------/
>> +         * contiguous-ram-block [      ][xxx][     ]
>> +         *
>> +         * So that memory beyond 4GB is aligned on a 1GB boundary,
>> +         * at the host physical address space.
>> +         *
>> +         */
>> +        if (guest_info->gb_align) {
> 'gb_align' is one shot usage, it would be better to just add it as an argument
> to pc_memory_init(). That would allow to avoid extending PcGuestInfo needlessly,
> since gb_align isn't reused.

No, Marcelo's way is better.  pc_memory_init already has too many
arguments, moving them to PcGuestInfo (which ultimately might become
properties of the /machine QOM object) is the right thing to do.

Paolo

>> +            unsigned long holesize = 0x100000000ULL - below_4g_mem_size;
>> +
>> +            memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
>> +                                    0x100000000ULL,
>> +                                    above_4g_mem_size - holesize);
>> +            memory_region_add_subregion(system_memory, 0x100000000ULL,
>>                                      ram_above_4g);
> [...]
>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>> index 6083839..00afe4a 100644
>> --- a/include/hw/i386/pc.h
>> +++ b/include/hw/i386/pc.h
>> @@ -20,6 +20,7 @@ typedef struct PcPciInfo {
>>  struct PcGuestInfo {
>>      bool has_pci_info;
>>      bool isapc_ram_fw;
>> +    bool gb_align;
>>      FWCfgState *fw_cfg;
>>  };
> it doesn't apply anymore.
>
Igor Mammedov Nov. 6, 2013, 12:22 p.m. UTC | #3
On Wed, 06 Nov 2013 13:07:26 +0100
Paolo Bonzini <pbonzini@redhat.com> wrote:

> Il 06/11/2013 12:59, Igor Mammedov ha scritto:
> > On Tue, 5 Nov 2013 23:55:43 -0200
> > Marcelo Tosatti <mtosatti@redhat.com> wrote:
> > 
> >>
> >>
> >> v2: condition enablement of new mapping to new machine types (Paolo)
> >> v3: fix changelog
> >>
> >> -----
> >>
> >>
> >> Align guest physical address and host physical address
> >> beyond guest 4GB on a 1GB boundary.
> >>
> >> Otherwise 1GB TLBs cannot be cached for the range.
> >>
> >> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> >>
> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> >> index 0c313fe..534e067 100644
> >> --- a/hw/i386/pc.c
> >> +++ b/hw/i386/pc.c
> >> @@ -1116,7 +1116,7 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
> > [...]
> >> +        /*
> >> +         *
> >> +         * If 1GB hugepages are used to back guest RAM, map guest address
> >> +         * space in the range [ramsize,ramsize+holesize] to the ram block
> >> +         * range [holestart, 4GB]
> >> +         *
> >> +         *                      0      h     4G     [ramsize,ramsize+holesize]
> >> +         *
> >> +         * guest-addr-space     [      ]     [      ][xxx]
> >> +         *                                /----------/
> >> +         * contiguous-ram-block [      ][xxx][     ]
> >> +         *
> >> +         * So that memory beyond 4GB is aligned on a 1GB boundary,
> >> +         * at the host physical address space.
> >> +         *
> >> +         */
> >> +        if (guest_info->gb_align) {
> > 'gb_align' is one shot usage, it would be better to just add it as an argument
> > to pc_memory_init(). That would allow to avoid extending PcGuestInfo needlessly,
> > since gb_align isn't reused.
> 
> No, Marcelo's way is better.  pc_memory_init already has too many
> arguments, moving them to PcGuestInfo (which ultimately might become
> properties of the /machine QOM object) is the right thing to do.
In general I agree. But unless there is plans to reuse gb_align in future,
it doesn't really belong to PcGuestInfo (this change however looks like
cannibalizing structure for argument passing only).

> 
> Paolo
> 
> >> +            unsigned long holesize = 0x100000000ULL - below_4g_mem_size;
> >> +
> >> +            memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
> >> +                                    0x100000000ULL,
> >> +                                    above_4g_mem_size - holesize);
> >> +            memory_region_add_subregion(system_memory, 0x100000000ULL,
> >>                                      ram_above_4g);
> > [...]
> >> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> >> index 6083839..00afe4a 100644
> >> --- a/include/hw/i386/pc.h
> >> +++ b/include/hw/i386/pc.h
> >> @@ -20,6 +20,7 @@ typedef struct PcPciInfo {
> >>  struct PcGuestInfo {
> >>      bool has_pci_info;
> >>      bool isapc_ram_fw;
> >> +    bool gb_align;
> >>      FWCfgState *fw_cfg;
> >>  };
> > it doesn't apply anymore.
> > 
> 
>
Paolo Bonzini Nov. 6, 2013, 12:24 p.m. UTC | #4
Il 06/11/2013 13:22, Igor Mammedov ha scritto:
>>> > > 'gb_align' is one shot usage, it would be better to just add it as an argument
>>> > > to pc_memory_init(). That would allow to avoid extending PcGuestInfo needlessly,
>>> > > since gb_align isn't reused.
>> > 
>> > No, Marcelo's way is better.  pc_memory_init already has too many
>> > arguments, moving them to PcGuestInfo (which ultimately might become
>> > properties of the /machine QOM object) is the right thing to do.
> In general I agree. But unless there is plans to reuse gb_align in future,

The plan is to turn it (and also has_acpi_build etc.) into a global
property that can be set using the usual compat-property machinery.

> it doesn't really belong to PcGuestInfo (this change however looks like
> cannibalizing structure for argument passing only).

For now, it doesn't just look like that, it is like that. :)  But it is
still a step in the right direction.

Paolo
diff mbox

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0c313fe..534e067 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1116,7 +1116,7 @@  FWCfgState *pc_memory_init(MemoryRegion *system_memory,
 {
     int linux_boot, i;
     MemoryRegion *ram, *option_rom_mr;
-    MemoryRegion *ram_below_4g, *ram_above_4g;
+    MemoryRegion *ram_below_4g, *ram_above_4g, *ram_above_4g_piecetwo;
     FWCfgState *fw_cfg;
 
     linux_boot = (kernel_filename != NULL);
@@ -1136,10 +1136,46 @@  FWCfgState *pc_memory_init(MemoryRegion *system_memory,
     memory_region_add_subregion(system_memory, 0, ram_below_4g);
     if (above_4g_mem_size > 0) {
         ram_above_4g = g_malloc(sizeof(*ram_above_4g));
-        memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
-                                 below_4g_mem_size, above_4g_mem_size);
-        memory_region_add_subregion(system_memory, 0x100000000ULL,
+
+        /*
+         *
+         * If 1GB hugepages are used to back guest RAM, map guest address
+         * space in the range [ramsize,ramsize+holesize] to the ram block
+         * range [holestart, 4GB]
+         *
+         *                      0      h     4G     [ramsize,ramsize+holesize]
+         *
+         * guest-addr-space     [      ]     [      ][xxx]
+         *                                /----------/
+         * contiguous-ram-block [      ][xxx][     ]
+         *
+         * So that memory beyond 4GB is aligned on a 1GB boundary,
+         * at the host physical address space.
+         *
+         */
+        if (guest_info->gb_align) {
+            unsigned long holesize = 0x100000000ULL - below_4g_mem_size;
+
+            memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
+                                    0x100000000ULL,
+                                    above_4g_mem_size - holesize);
+            memory_region_add_subregion(system_memory, 0x100000000ULL,
                                     ram_above_4g);
+
+            ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo));
+            memory_region_init_alias(ram_above_4g_piecetwo, NULL,
+                                     "ram-above-4g-piecetwo", ram,
+                                     0x100000000ULL - holesize, holesize);
+            memory_region_add_subregion(system_memory,
+                                        0x100000000ULL +
+                                        above_4g_mem_size - holesize,
+                                        ram_above_4g_piecetwo);
+        } else {
+            memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
+                                    below_4g_mem_size, above_4g_mem_size);
+            memory_region_add_subregion(system_memory, 0x100000000ULL,
+                                    ram_above_4g);
+        }
     }
 
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c6042c7..305a4cd 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -59,6 +59,7 @@  static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
 
 static bool has_pvpanic;
 static bool has_pci_info = true;
+static bool gb_align = true;
 
 /* PC hardware initialisation */
 static void pc_init1(QEMUMachineInitArgs *args,
@@ -124,6 +125,7 @@  static void pc_init1(QEMUMachineInitArgs *args,
     guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size);
     guest_info->has_pci_info = has_pci_info;
     guest_info->isapc_ram_fw = !pci_enabled;
+    guest_info->gb_align = gb_align;
 
     /* allocate ram and load rom/bios */
     if (!xen_enabled()) {
@@ -236,8 +238,14 @@  static void pc_init_pci(QEMUMachineInitArgs *args)
     pc_init1(args, 1, 1);
 }
 
+static void pc_compat_1_7(QEMUMachineInitArgs *args)
+{
+    gb_align = false;
+}
+
 static void pc_compat_1_6(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_7(args);
     has_pci_info = false;
     rom_file_in_ram = false;
 }
@@ -269,6 +277,12 @@  static void pc_compat_1_2(QEMUMachineInitArgs *args)
     disable_kvm_pv_eoi();
 }
 
+static void pc_init_pci_1_7(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_7(args);
+    pc_init_pci(args);
+}
+
 static void pc_init_pci_1_6(QEMUMachineInitArgs *args)
 {
     pc_compat_1_6(args);
@@ -339,13 +353,21 @@  static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
     .desc = "Standard PC (i440FX + PIIX, 1996)", \
     .hot_add_cpu = pc_hot_add_cpu
 
+#define PC_I440FX_1_8_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
+static QEMUMachine pc_i440fx_machine_v1_8 = {
+    PC_I440FX_1_8_MACHINE_OPTIONS,
+    .name = "pc-i440fx-1.8",
+    .alias = "pc",
+    .init = pc_init_pci,
+    .is_default = 1,
+};
+
 #define PC_I440FX_1_7_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
 static QEMUMachine pc_i440fx_machine_v1_7 = {
     PC_I440FX_1_7_MACHINE_OPTIONS,
     .name = "pc-i440fx-1.7",
     .alias = "pc",
-    .init = pc_init_pci,
-    .is_default = 1,
+    .init = pc_init_pci_1_7,
 };
 
 #define PC_I440FX_1_6_MACHINE_OPTIONS PC_I440FX_MACHINE_OPTIONS
@@ -747,6 +769,7 @@  static QEMUMachine xenfv_machine = {
 
 static void pc_machine_init(void)
 {
+    qemu_register_machine(&pc_i440fx_machine_v1_8);
     qemu_register_machine(&pc_i440fx_machine_v1_7);
     qemu_register_machine(&pc_i440fx_machine_v1_6);
     qemu_register_machine(&pc_i440fx_machine_v1_5);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index ca84e1c..45bec19 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -49,6 +49,7 @@ 
 
 static bool has_pvpanic;
 static bool has_pci_info = true;
+static bool gb_align = true;
 
 /* PC hardware initialisation */
 static void pc_q35_init(QEMUMachineInitArgs *args)
@@ -111,6 +112,7 @@  static void pc_q35_init(QEMUMachineInitArgs *args)
     guest_info = pc_guest_info_init(below_4g_mem_size, above_4g_mem_size);
     guest_info->has_pci_info = has_pci_info;
     guest_info->isapc_ram_fw = false;
+    guest_info->gb_align = gb_align;
 
     /* allocate ram and load rom/bios */
     if (!xen_enabled()) {
@@ -220,8 +222,14 @@  static void pc_q35_init(QEMUMachineInitArgs *args)
     }
 }
 
+static void pc_compat_1_7(QEMUMachineInitArgs *args)
+{
+   gb_align = false;
+}
+
 static void pc_compat_1_6(QEMUMachineInitArgs *args)
 {
+    pc_compat_1_7(args);
     has_pci_info = false;
     rom_file_in_ram = false;
 }
@@ -240,6 +248,12 @@  static void pc_compat_1_4(QEMUMachineInitArgs *args)
     x86_cpu_compat_set_features("Westmere", FEAT_1_ECX, 0, CPUID_EXT_PCLMULQDQ);
 }
 
+static void pc_q35_init_1_7(QEMUMachineInitArgs *args)
+{
+    pc_compat_1_7(args);
+    pc_q35_init(args);
+}
+
 static void pc_q35_init_1_6(QEMUMachineInitArgs *args)
 {
     pc_compat_1_6(args);
@@ -263,13 +277,22 @@  static void pc_q35_init_1_4(QEMUMachineInitArgs *args)
     .desc = "Standard PC (Q35 + ICH9, 2009)", \
     .hot_add_cpu = pc_hot_add_cpu
 
+#define PC_Q35_1_8_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
+
+static QEMUMachine pc_q35_machine_v1_8 = {
+    PC_Q35_1_8_MACHINE_OPTIONS,
+    .name = "pc-q35-1.8",
+    .alias = "q35",
+    .init = pc_q35_init,
+};
+
 #define PC_Q35_1_7_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
 
 static QEMUMachine pc_q35_machine_v1_7 = {
     PC_Q35_1_7_MACHINE_OPTIONS,
     .name = "pc-q35-1.7",
     .alias = "q35",
-    .init = pc_q35_init,
+    .init = pc_q35_init_1_7,
 };
 
 #define PC_Q35_1_6_MACHINE_OPTIONS PC_Q35_MACHINE_OPTIONS
@@ -310,6 +333,7 @@  static QEMUMachine pc_q35_machine_v1_4 = {
 
 static void pc_q35_machine_init(void)
 {
+    qemu_register_machine(&pc_q35_machine_v1_8);
     qemu_register_machine(&pc_q35_machine_v1_7);
     qemu_register_machine(&pc_q35_machine_v1_6);
     qemu_register_machine(&pc_q35_machine_v1_5);
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 6083839..00afe4a 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -20,6 +20,7 @@  typedef struct PcPciInfo {
 struct PcGuestInfo {
     bool has_pci_info;
     bool isapc_ram_fw;
+    bool gb_align;
     FWCfgState *fw_cfg;
 };