Patchwork [RFC,v3,19/19,SeaBIOS] Calculate pcimem_start and pcimem64_start from SRAT entries

login
register
mail settings
Submitter Vasilis Liaskovitis
Date Sept. 21, 2012, 11:17 a.m.
Message ID <1348226255-4226-20-git-send-email-vasilis.liaskovitis@profitbricks.com>
Download mbox | patch
Permalink /patch/185716/
State New
Headers show

Comments

Vasilis Liaskovitis - Sept. 21, 2012, 11:17 a.m.
pcimem_start and pcimem64_start are adjusted from srat entries. For this reason,
paravirt info (NUMA SRAT entries and number of cpus) need to be read before pci_setup.
Imho, this is an ugly code change since SRAT bios tables and number of
cpus have to be read earlier. But the advantage is that no new paravirt interface
is introduced. Suggestions to make the code change cleaner are welcome.

The alternative patch (will be sent as a reply to this patch) implements a
paravirt interface to read the starting values of pcimem_start and
pcimem64_start from QEMU.

Signed-off-by: Vasilis Liaskovitis <vasilis.liaskovitis@profitbricks.com>
---
 src/acpi.c    |   82 ++++++++++++++++++++++++++++++++++++++++----------------
 src/acpi.h    |    3 ++
 src/pciinit.c |    6 +++-
 src/post.c    |    3 ++
 src/smp.c     |    4 +++
 5 files changed, 72 insertions(+), 26 deletions(-)
Wen Congyang - Sept. 24, 2012, 6:51 a.m.
At 09/21/2012 07:17 PM, Vasilis Liaskovitis Wrote:
> pcimem_start and pcimem64_start are adjusted from srat entries. For this reason,
> paravirt info (NUMA SRAT entries and number of cpus) need to be read before pci_setup.
> Imho, this is an ugly code change since SRAT bios tables and number of
> cpus have to be read earlier. But the advantage is that no new paravirt interface
> is introduced. Suggestions to make the code change cleaner are welcome.
> 
> The alternative patch (will be sent as a reply to this patch) implements a
> paravirt interface to read the starting values of pcimem_start and
> pcimem64_start from QEMU.
> 
> Signed-off-by: Vasilis Liaskovitis <vasilis.liaskovitis@profitbricks.com>
> ---
>  src/acpi.c    |   82 ++++++++++++++++++++++++++++++++++++++++----------------
>  src/acpi.h    |    3 ++
>  src/pciinit.c |    6 +++-
>  src/post.c    |    3 ++
>  src/smp.c     |    4 +++
>  5 files changed, 72 insertions(+), 26 deletions(-)
> 
> diff --git a/src/acpi.c b/src/acpi.c
> index 1223b52..9e99aa7 100644
> --- a/src/acpi.c
> +++ b/src/acpi.c
> @@ -428,7 +428,10 @@ encodeLen(u8 *ssdt_ptr, int length, int bytes)
>  #define MEM_OFFSET_END   63
>  #define MEM_OFFSET_SIZE  79
>  
> -u64 nb_hp_memslots = 0;
> +u64 nb_hp_memslots = 0, nb_numanodes;
> +u64 *numa_data, *hp_memdata;
> +u64 below_4g_hp_mem_size = 0;
> +u64 above_4g_hp_mem_size = 0;
>  struct srat_memory_affinity *mem;
>  
>  #define SSDT_SIGNATURE 0x54445353 // SSDT
> @@ -763,17 +766,7 @@ acpi_build_srat_memory(struct srat_memory_affinity *numamem,
>  static void *
>  build_srat(void)
>  {
> -    int nb_numa_nodes = qemu_cfg_get_numa_nodes();
> -
> -    u64 *numadata = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numa_nodes));
> -    if (!numadata) {
> -        warn_noalloc();
> -        return NULL;
> -    }
> -
> -    qemu_cfg_get_numa_data(numadata, MaxCountCPUs + nb_numa_nodes);
> -
> -    qemu_cfg_get_numa_data(&nb_hp_memslots, 1);
> +    int nb_numa_nodes = nb_numanodes;
>      struct system_resource_affinity_table *srat;
>      int srat_size = sizeof(*srat) +
>          sizeof(struct srat_processor_affinity) * MaxCountCPUs +
> @@ -782,7 +775,7 @@ build_srat(void)
>      srat = malloc_high(srat_size);
>      if (!srat) {
>          warn_noalloc();
> -        free(numadata);
> +        free(numa_data);
>          return NULL;
>      }
>  
> @@ -791,6 +784,7 @@ build_srat(void)
>      struct srat_processor_affinity *core = (void*)(srat + 1);
>      int i;
>      u64 curnode;
> +    u64 *numadata = numa_data;
>  
>      for (i = 0; i < MaxCountCPUs; ++i) {
>          core->type = SRAT_PROCESSOR;
> @@ -847,15 +841,7 @@ build_srat(void)
>      mem = (void*)numamem;
>  
>      if (nb_hp_memslots) {
> -        u64 *hpmemdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots));
> -        if (!hpmemdata) {
> -            warn_noalloc();
> -            free(hpmemdata);
> -            free(numadata);
> -            return NULL;
> -        }
> -
> -        qemu_cfg_get_numa_data(hpmemdata, 3 * nb_hp_memslots);
> +        u64 *hpmemdata = hp_memdata;
>  
>          for (i = 1; i < nb_hp_memslots + 1; ++i) {
>              mem_base = *hpmemdata++;
> @@ -865,7 +851,7 @@ build_srat(void)
>              numamem++;
>              slots++;
>          }
> -        free(hpmemdata);
> +        free(hp_memdata);
>      }
>  
>      for (; slots < nb_numa_nodes + nb_hp_memslots + 2; slots++) {
> @@ -875,10 +861,58 @@ build_srat(void)
>  
>      build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1);
>  
> -    free(numadata);
> +    free(numa_data);
>      return srat;
>  }
>  
> +/* QEMU paravirt SRAT entries need to be read in before pci initilization */
> +void read_srat_early(void)
> +{
> +    int i;
> +
> +    nb_numanodes = qemu_cfg_get_numa_nodes();
> +    u64 *hpmemdata;
> +    u64 mem_len, mem_base;
> +
> +    numa_data = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numanodes));
> +    if (!numa_data) {
> +        warn_noalloc();
> +    }
> +
> +    qemu_cfg_get_numa_data(numa_data, MaxCountCPUs + nb_numanodes);
> +    qemu_cfg_get_numa_data(&nb_hp_memslots, 1);
> +
> +    if (nb_hp_memslots) {
> +        hp_memdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots));
> +        if (!hp_memdata) {
> +            warn_noalloc();
> +            free(hp_memdata);
> +            free(numa_data);
> +        }
> +
> +        qemu_cfg_get_numa_data(hp_memdata, 3 * nb_hp_memslots);
> +        hpmemdata = hp_memdata;
> +
> +        for (i = 1; i < nb_hp_memslots + 1; ++i) {
> +            mem_base = *hpmemdata++;
> +            mem_len = *hpmemdata++;
> +            hpmemdata++;
> +            if (mem_base >= 0x100000000LL) {
> +                above_4g_hp_mem_size += mem_len;
> +            }
> +            /* if dimm fits before pci hole, append it normally */
> +            else if (mem_base + mem_len <= BUILD_PCIMEM_START) {
> +                below_4g_hp_mem_size += mem_len;
> +            }
> +            /* otherwise place it above 4GB */
> +            else {
> +                above_4g_hp_mem_size += mem_len;
> +            }
> +        }
> +
> +    }
> +}
> +
>  static const struct pci_device_id acpi_find_tbl[] = {
>      /* PIIX4 Power Management device. */
>      PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, NULL),
> diff --git a/src/acpi.h b/src/acpi.h
> index cb21561..d29837f 100644
> --- a/src/acpi.h
> +++ b/src/acpi.h
> @@ -5,6 +5,9 @@
>  
>  void acpi_bios_init(void);
>  u32 find_resume_vector(void);
> +void read_srat_early(void);
> +extern u64 below_4g_hp_mem_size;
> +extern u64 above_4g_hp_mem_size;
>  
>  #define RSDP_SIGNATURE 0x2052545020445352LL // "RSD PTR "
>  
> diff --git a/src/pciinit.c b/src/pciinit.c
> index 31115ee..c5a4b24 100644
> --- a/src/pciinit.c
> +++ b/src/pciinit.c
> @@ -12,6 +12,7 @@
>  #include "ioport.h" // PORT_ATA1_CMD_BASE
>  #include "config.h" // CONFIG_*
>  #include "xen.h" // usingXen
> +#include "acpi.h"
>  
>  #define PCI_DEVICE_MEM_MIN     0x1000
>  #define PCI_BRIDGE_IO_MIN      0x1000
> @@ -597,7 +598,7 @@ static void pci_region_map_entries(struct pci_bus *busses, struct pci_region *r)
>  
>  static void pci_bios_map_devices(struct pci_bus *busses)
>  {
> -    pcimem_start = RamSize;
> +    pcimem_start = RamSize + below_4g_hp_mem_size;
>  
>      if (pci_bios_init_root_regions(busses)) {
>          struct pci_region r64_mem, r64_pref;
> @@ -616,7 +617,8 @@ static void pci_bios_map_devices(struct pci_bus *busses)
>          u64 align_mem = pci_region_align(&r64_mem);
>          u64 align_pref = pci_region_align(&r64_pref);
>  
> -        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
> +        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G +
> +                above_4g_hp_mem_size, align_mem);
>          r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
>          pcimem64_start = r64_mem.base;
>          pcimem64_end = r64_pref.base + sum_pref;
> diff --git a/src/post.c b/src/post.c
> index 924b311..c37730b 100644
> --- a/src/post.c
> +++ b/src/post.c
> @@ -234,6 +234,9 @@ maininit(void)
>      // Initialize mtrr
>      mtrr_setup();
>  
> +    smp_get_ncpus();
> +    read_srat_early();
> +
>      // Initialize pci
>      pci_setup();
>      smm_init();
> diff --git a/src/smp.c b/src/smp.c
> index 4975412..3922776 100644
> --- a/src/smp.c
> +++ b/src/smp.c
> @@ -138,7 +138,11 @@ smp_probe(void)
>  
>      // Restore memory.
>      *(u64*)BUILD_AP_BOOT_ADDR = old;
> +}
>  
> +void
> +smp_get_ncpus(void)

You don't declare this function, and use it in another file. It will break
building:
src/post.c: In function ‘maininit’:
src/post.c:237: warning: implicit declaration of function ‘smp_get_ncpus’
src/smp.c:144: note: previous definition of ‘smp_get_ncpus’ was here
src/post.c:237: error: incompatible implicit declaration of function ‘smp_get_ncpus’
src/smp.c:144: note: previous definition of ‘smp_get_ncpus’ was here

Thanks
Wen Congyang

> +{
>      MaxCountCPUs = qemu_cfg_get_max_cpus();
>      if (!MaxCountCPUs || MaxCountCPUs < CountCPUs)
>          MaxCountCPUs = CountCPUs;

Patch

diff --git a/src/acpi.c b/src/acpi.c
index 1223b52..9e99aa7 100644
--- a/src/acpi.c
+++ b/src/acpi.c
@@ -428,7 +428,10 @@  encodeLen(u8 *ssdt_ptr, int length, int bytes)
 #define MEM_OFFSET_END   63
 #define MEM_OFFSET_SIZE  79
 
-u64 nb_hp_memslots = 0;
+u64 nb_hp_memslots = 0, nb_numanodes;
+u64 *numa_data, *hp_memdata;
+u64 below_4g_hp_mem_size = 0;
+u64 above_4g_hp_mem_size = 0;
 struct srat_memory_affinity *mem;
 
 #define SSDT_SIGNATURE 0x54445353 // SSDT
@@ -763,17 +766,7 @@  acpi_build_srat_memory(struct srat_memory_affinity *numamem,
 static void *
 build_srat(void)
 {
-    int nb_numa_nodes = qemu_cfg_get_numa_nodes();
-
-    u64 *numadata = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numa_nodes));
-    if (!numadata) {
-        warn_noalloc();
-        return NULL;
-    }
-
-    qemu_cfg_get_numa_data(numadata, MaxCountCPUs + nb_numa_nodes);
-
-    qemu_cfg_get_numa_data(&nb_hp_memslots, 1);
+    int nb_numa_nodes = nb_numanodes;
     struct system_resource_affinity_table *srat;
     int srat_size = sizeof(*srat) +
         sizeof(struct srat_processor_affinity) * MaxCountCPUs +
@@ -782,7 +775,7 @@  build_srat(void)
     srat = malloc_high(srat_size);
     if (!srat) {
         warn_noalloc();
-        free(numadata);
+        free(numa_data);
         return NULL;
     }
 
@@ -791,6 +784,7 @@  build_srat(void)
     struct srat_processor_affinity *core = (void*)(srat + 1);
     int i;
     u64 curnode;
+    u64 *numadata = numa_data;
 
     for (i = 0; i < MaxCountCPUs; ++i) {
         core->type = SRAT_PROCESSOR;
@@ -847,15 +841,7 @@  build_srat(void)
     mem = (void*)numamem;
 
     if (nb_hp_memslots) {
-        u64 *hpmemdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots));
-        if (!hpmemdata) {
-            warn_noalloc();
-            free(hpmemdata);
-            free(numadata);
-            return NULL;
-        }
-
-        qemu_cfg_get_numa_data(hpmemdata, 3 * nb_hp_memslots);
+        u64 *hpmemdata = hp_memdata;
 
         for (i = 1; i < nb_hp_memslots + 1; ++i) {
             mem_base = *hpmemdata++;
@@ -865,7 +851,7 @@  build_srat(void)
             numamem++;
             slots++;
         }
-        free(hpmemdata);
+        free(hp_memdata);
     }
 
     for (; slots < nb_numa_nodes + nb_hp_memslots + 2; slots++) {
@@ -875,10 +861,58 @@  build_srat(void)
 
     build_header((void*)srat, SRAT_SIGNATURE, srat_size, 1);
 
-    free(numadata);
+    free(numa_data);
     return srat;
 }
 
+/* QEMU paravirt SRAT entries need to be read in before pci initilization */
+void read_srat_early(void)
+{
+    int i;
+
+    nb_numanodes = qemu_cfg_get_numa_nodes();
+    u64 *hpmemdata;
+    u64 mem_len, mem_base;
+
+    numa_data = malloc_tmphigh(sizeof(u64) * (MaxCountCPUs + nb_numanodes));
+    if (!numa_data) {
+        warn_noalloc();
+    }
+
+    qemu_cfg_get_numa_data(numa_data, MaxCountCPUs + nb_numanodes);
+    qemu_cfg_get_numa_data(&nb_hp_memslots, 1);
+
+    if (nb_hp_memslots) {
+        hp_memdata = malloc_tmphigh(sizeof(u64) * (3 * nb_hp_memslots));
+        if (!hp_memdata) {
+            warn_noalloc();
+            free(hp_memdata);
+            free(numa_data);
+        }
+
+        qemu_cfg_get_numa_data(hp_memdata, 3 * nb_hp_memslots);
+        hpmemdata = hp_memdata;
+
+        for (i = 1; i < nb_hp_memslots + 1; ++i) {
+            mem_base = *hpmemdata++;
+            mem_len = *hpmemdata++;
+            hpmemdata++;
+            if (mem_base >= 0x100000000LL) {
+                above_4g_hp_mem_size += mem_len;
+            }
+            /* if dimm fits before pci hole, append it normally */
+            else if (mem_base + mem_len <= BUILD_PCIMEM_START) {
+                below_4g_hp_mem_size += mem_len;
+            }
+            /* otherwise place it above 4GB */
+            else {
+                above_4g_hp_mem_size += mem_len;
+            }
+        }
+
+    }
+}
+
 static const struct pci_device_id acpi_find_tbl[] = {
     /* PIIX4 Power Management device. */
     PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, NULL),
diff --git a/src/acpi.h b/src/acpi.h
index cb21561..d29837f 100644
--- a/src/acpi.h
+++ b/src/acpi.h
@@ -5,6 +5,9 @@ 
 
 void acpi_bios_init(void);
 u32 find_resume_vector(void);
+void read_srat_early(void);
+extern u64 below_4g_hp_mem_size;
+extern u64 above_4g_hp_mem_size;
 
 #define RSDP_SIGNATURE 0x2052545020445352LL // "RSD PTR "
 
diff --git a/src/pciinit.c b/src/pciinit.c
index 31115ee..c5a4b24 100644
--- a/src/pciinit.c
+++ b/src/pciinit.c
@@ -12,6 +12,7 @@ 
 #include "ioport.h" // PORT_ATA1_CMD_BASE
 #include "config.h" // CONFIG_*
 #include "xen.h" // usingXen
+#include "acpi.h"
 
 #define PCI_DEVICE_MEM_MIN     0x1000
 #define PCI_BRIDGE_IO_MIN      0x1000
@@ -597,7 +598,7 @@  static void pci_region_map_entries(struct pci_bus *busses, struct pci_region *r)
 
 static void pci_bios_map_devices(struct pci_bus *busses)
 {
-    pcimem_start = RamSize;
+    pcimem_start = RamSize + below_4g_hp_mem_size;
 
     if (pci_bios_init_root_regions(busses)) {
         struct pci_region r64_mem, r64_pref;
@@ -616,7 +617,8 @@  static void pci_bios_map_devices(struct pci_bus *busses)
         u64 align_mem = pci_region_align(&r64_mem);
         u64 align_pref = pci_region_align(&r64_pref);
 
-        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G, align_mem);
+        r64_mem.base = ALIGN(0x100000000LL + RamSizeOver4G +
+                above_4g_hp_mem_size, align_mem);
         r64_pref.base = ALIGN(r64_mem.base + sum_mem, align_pref);
         pcimem64_start = r64_mem.base;
         pcimem64_end = r64_pref.base + sum_pref;
diff --git a/src/post.c b/src/post.c
index 924b311..c37730b 100644
--- a/src/post.c
+++ b/src/post.c
@@ -234,6 +234,9 @@  maininit(void)
     // Initialize mtrr
     mtrr_setup();
 
+    smp_get_ncpus();
+    read_srat_early();
+
     // Initialize pci
     pci_setup();
     smm_init();
diff --git a/src/smp.c b/src/smp.c
index 4975412..3922776 100644
--- a/src/smp.c
+++ b/src/smp.c
@@ -138,7 +138,11 @@  smp_probe(void)
 
     // Restore memory.
     *(u64*)BUILD_AP_BOOT_ADDR = old;
+}
 
+void
+smp_get_ncpus(void)
+{
     MaxCountCPUs = qemu_cfg_get_max_cpus();
     if (!MaxCountCPUs || MaxCountCPUs < CountCPUs)
         MaxCountCPUs = CountCPUs;