diff mbox series

[v5,5/8] acpi: introduce AcpiDeviceIfClass.build_mem_ranges hook

Message ID 20190614155626.27932-6-tao3.xu@intel.com
State New
Headers show
Series Build ACPI Heterogeneous Memory Attribute Table (HMAT) | expand

Commit Message

Tao Xu June 14, 2019, 3:56 p.m. UTC
Add build_mem_ranges callback to AcpiDeviceIfClass and use
it for generating SRAT and HMAT numa memory ranges.

Suggested-by: Igor Mammedov <imammedo@redhat.com>
Co-developed-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
---

Changes in v5 -> v4:
    - Add the missing if 'mem_len > 0' in pc_build_mem_ranges() (Igor)
    - Correct the descriptions of build_mem_ranges
    in AcpiDeviceIfClass (Igor)
    - Use GArray for NUMA memory ranges data (Igor)
    - Add the reason of using stub (Igor)
---
 hw/acpi/piix4.c                      |   1 +
 hw/i386/acpi-build.c                 | 133 +++++++++++++++++----------
 hw/isa/lpc_ich9.c                    |   1 +
 include/hw/acpi/acpi_dev_interface.h |   4 +
 include/hw/i386/pc.h                 |   1 +
 include/sysemu/numa.h                |  12 +++
 stubs/Makefile.objs                  |   1 +
 stubs/pc_build_mem_ranges.c          |  14 +++
 8 files changed, 120 insertions(+), 47 deletions(-)
 create mode 100644 stubs/pc_build_mem_ranges.c

Comments

Igor Mammedov July 1, 2019, 10:59 a.m. UTC | #1
On Fri, 14 Jun 2019 23:56:23 +0800
Tao Xu <tao3.xu@intel.com> wrote:

> Add build_mem_ranges callback to AcpiDeviceIfClass and use
> it for generating SRAT and HMAT numa memory ranges.
> 
> Suggested-by: Igor Mammedov <imammedo@redhat.com>
> Co-developed-by: Liu Jingqi <jingqi.liu@intel.com>
> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
> Signed-off-by: Tao Xu <tao3.xu@intel.com>
> ---
> 
> Changes in v5 -> v4:
>     - Add the missing if 'mem_len > 0' in pc_build_mem_ranges() (Igor)
>     - Correct the descriptions of build_mem_ranges
>     in AcpiDeviceIfClass (Igor)
>     - Use GArray for NUMA memory ranges data (Igor)
>     - Add the reason of using stub (Igor)
> ---
>  hw/acpi/piix4.c                      |   1 +
>  hw/i386/acpi-build.c                 | 133 +++++++++++++++++----------
>  hw/isa/lpc_ich9.c                    |   1 +
>  include/hw/acpi/acpi_dev_interface.h |   4 +
>  include/hw/i386/pc.h                 |   1 +
>  include/sysemu/numa.h                |  12 +++
>  stubs/Makefile.objs                  |   1 +
>  stubs/pc_build_mem_ranges.c          |  14 +++
>  8 files changed, 120 insertions(+), 47 deletions(-)
>  create mode 100644 stubs/pc_build_mem_ranges.c
> 
> diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
> index ec4e186cec..bc078c1ad7 100644
> --- a/hw/acpi/piix4.c
> +++ b/hw/acpi/piix4.c
> @@ -702,6 +702,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
>      adevc->ospm_status = piix4_ospm_status;
>      adevc->send_event = piix4_send_gpe;
>      adevc->madt_cpu = pc_madt_cpu_entry;
> +    adevc->build_mem_ranges = pc_build_mem_ranges;
>  }
>  
>  static const TypeInfo piix4_pm_info = {
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 055e677c30..44dd447fa5 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2279,18 +2279,89 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
>  #define HOLE_640K_START  (640 * KiB)
>  #define HOLE_640K_END   (1 * MiB)
>  
> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms)
> +{
> +    uint64_t mem_len, mem_base, next_base;
> +    int i;
> +    PCMachineState *pcms = PC_MACHINE(ms);
> +    NumaState *nstat = ms->numa_state;
> +    NumaMemRange *mem_range;
> +    nstat->mem_ranges_num = 0;
> +    next_base = 0;
> +
> +    /*
> +     * the memory map is a bit tricky, it contains at least one hole
> +     * from 640k-1M and possibly another one from 3.5G-4G.
> +     */
> +
> +    for (i = 0; i < pcms->numa_nodes; ++i) {
> +        mem_base = next_base;
> +        mem_len = pcms->node_mem[i];
> +        next_base = mem_base + mem_len;
> +
> +        /* Cut out the 640K hole */
> +        if (mem_base <= HOLE_640K_START &&
> +            next_base > HOLE_640K_START) {
> +            mem_len -= next_base - HOLE_640K_START;
> +            if (mem_len > 0) {
> +                mem_range = acpi_data_push(nstat->mem_ranges,
> +                                           sizeof *mem_range);
> +                mem_range->base = mem_base;
> +                mem_range->length = mem_len;
> +                mem_range->node = i;
> +                nstat->mem_ranges_num++;
> +            }
> +
> +            /* Check for the rare case: 640K < RAM < 1M */
> +            if (next_base <= HOLE_640K_END) {
> +                next_base = HOLE_640K_END;
> +                continue;
> +            }
> +            mem_base = HOLE_640K_END;
> +            mem_len = next_base - HOLE_640K_END;
> +        }
> +
> +        /* Cut out the ACPI_PCI hole */
> +        if (mem_base <= pcms->below_4g_mem_size &&
> +            next_base > pcms->below_4g_mem_size) {
> +            mem_len -= next_base - pcms->below_4g_mem_size;
> +            if (mem_len > 0) {
> +                mem_range = acpi_data_push(nstat->mem_ranges,
> +                                           sizeof *mem_range);
> +                mem_range->base = mem_base;
> +                mem_range->length = mem_len;
> +                mem_range->node = i;
> +                nstat->mem_ranges_num++;
> +            }
> +            mem_base = 1ULL << 32;
> +            mem_len = next_base - pcms->below_4g_mem_size;
> +            next_base = mem_base + mem_len;
> +        }
> +        if (mem_len > 0) {
> +            mem_range = acpi_data_push(nstat->mem_ranges,
> +                                       sizeof *mem_range);
> +            mem_range->base = mem_base;
> +            mem_range->length = mem_len;
> +            mem_range->node = i;
> +            nstat->mem_ranges_num++;
> +        }
> +    }
> +}
> +
>  static void
>  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  {
>      AcpiSystemResourceAffinityTable *srat;
>      AcpiSratMemoryAffinity *numamem;
>  
> -    int i;
> -    int srat_start, numa_start, slots;
> -    uint64_t mem_len, mem_base, next_base;
> +    int i, srat_start, numa_start, slots;
>      MachineClass *mc = MACHINE_GET_CLASS(machine);
>      const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine);
>      PCMachineState *pcms = PC_MACHINE(machine);
> +    AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev);
> +    AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev);
> +    NumaState *nstat = machine->numa_state;
> +    NumaMemRange *mem_range;
>      ram_addr_t hotplugabble_address_space_size =
>          object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE,
>                                  NULL);
> @@ -2327,57 +2398,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>          }
>      }
>  
> +    if (pcms->numa_nodes && !nstat->mem_ranges_num) {
suggest to drop nstat->mem_ranges_num field and use nstat->mem_ranges->len instead,
also its probably better to initialize nstat->mem_ranges
at the same place where ms->numa_state is initialized, so that
a specific platform code won't need to duplicate it.

> +        nstat->mem_ranges = g_array_new(false, true /* clear */,
> +                                        sizeof *mem_range);
> +        adevc->build_mem_ranges(adev, machine);
> +    }
>  
> -    /* the memory map is a bit tricky, it contains at least one hole
> -     * from 640k-1M and possibly another one from 3.5G-4G.
> -     */
> -    next_base = 0;
>      numa_start = table_data->len;
>  
> -    for (i = 1; i < pcms->numa_nodes + 1; ++i) {
> -        mem_base = next_base;
> -        mem_len = pcms->node_mem[i - 1];
> -        next_base = mem_base + mem_len;
> -
> -        /* Cut out the 640K hole */
> -        if (mem_base <= HOLE_640K_START &&
> -            next_base > HOLE_640K_START) {
> -            mem_len -= next_base - HOLE_640K_START;
> -            if (mem_len > 0) {
> -                numamem = acpi_data_push(table_data, sizeof *numamem);
> -                build_srat_memory(numamem, mem_base, mem_len, i - 1,
> -                                  MEM_AFFINITY_ENABLED);
> -            }
> -
> -            /* Check for the rare case: 640K < RAM < 1M */
> -            if (next_base <= HOLE_640K_END) {
> -                next_base = HOLE_640K_END;
> -                continue;
> -            }
> -            mem_base = HOLE_640K_END;
> -            mem_len = next_base - HOLE_640K_END;
> -        }
> -
> -        /* Cut out the ACPI_PCI hole */
> -        if (mem_base <= pcms->below_4g_mem_size &&
> -            next_base > pcms->below_4g_mem_size) {
> -            mem_len -= next_base - pcms->below_4g_mem_size;
> -            if (mem_len > 0) {
> -                numamem = acpi_data_push(table_data, sizeof *numamem);
> -                build_srat_memory(numamem, mem_base, mem_len, i - 1,
> -                                  MEM_AFFINITY_ENABLED);
> -            }
> -            mem_base = 1ULL << 32;
> -            mem_len = next_base - pcms->below_4g_mem_size;
> -            next_base = mem_base + mem_len;
> -        }
> -
> -        if (mem_len > 0) {
> +    for (i = 0; i < nstat->mem_ranges_num; i++) {
> +        mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i);
> +        if (mem_range->length > 0) {
why do we have this condition,
I'd assume adevc->build_mem_ranges() shouldn't return empty ranges.

>              numamem = acpi_data_push(table_data, sizeof *numamem);
> -            build_srat_memory(numamem, mem_base, mem_len, i - 1,
> +            build_srat_memory(numamem, mem_range->base,
> +                              mem_range->length,
> +                              mem_range->node,
>                                MEM_AFFINITY_ENABLED);
>          }
>      }
> +
>      slots = (table_data->len - numa_start) / sizeof *numamem;
>      for (; slots < pcms->numa_nodes + 2; slots++) {
>          numamem = acpi_data_push(table_data, sizeof *numamem);
> diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
> index 35d17246e9..20d919c63d 100644
> --- a/hw/isa/lpc_ich9.c
> +++ b/hw/isa/lpc_ich9.c
> @@ -801,6 +801,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
>      adevc->ospm_status = ich9_pm_ospm_status;
>      adevc->send_event = ich9_send_gpe;
>      adevc->madt_cpu = pc_madt_cpu_entry;
> +    adevc->build_mem_ranges = pc_build_mem_ranges;
>  }
>  
>  static const TypeInfo ich9_lpc_info = {
> diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
> index 43ff119179..5956b5ea33 100644
> --- a/include/hw/acpi/acpi_dev_interface.h
> +++ b/include/hw/acpi/acpi_dev_interface.h
> @@ -39,6 +39,8 @@ void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event);
>   *           for CPU indexed by @uid in @apic_ids array,
>   *           returned structure types are:
>   *           0 - Local APIC, 9 - Local x2APIC, 0xB - GICC
> + * build_mem_ranges: build memory ranges of ACPI SRAT (except misc
> + * and hotplug SRAT ranges) and HMAT
>   *
>   * Interface is designed for providing unified interface
>   * to generic ACPI functionality that could be used without
> @@ -54,5 +56,7 @@ typedef struct AcpiDeviceIfClass {
>      void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev);
>      void (*madt_cpu)(AcpiDeviceIf *adev, int uid,
>                       const CPUArchIdList *apic_ids, GArray *entry);
> +    void (*build_mem_ranges)(AcpiDeviceIf *adev, MachineState *ms);
> +
>  } AcpiDeviceIfClass;
>  #endif
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 5d5636241e..21b9ac3d11 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -281,6 +281,7 @@ void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
>  /* acpi-build.c */
>  void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
>                         const CPUArchIdList *apic_ids, GArray *entry);
> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms);
>  
>  /* e820 types */
>  #define E820_RAM        1
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 437eb21fef..e3c85b77bc 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -20,6 +20,12 @@ struct NumaNodeMem {
>      uint64_t node_plugged_mem;
>  };
>  
> +typedef struct NumaMemRange {
> +    uint64_t base;
> +    uint64_t length;
> +    uint32_t node;
> +} NumaMemRange;
> +
>  struct NumaState {
>      /* Number of NUMA nodes */
>      int num_nodes;
> @@ -29,6 +35,12 @@ struct NumaState {
>  
>      /* NUMA nodes information */
>      NodeInfo nodes[MAX_NODES];
> +
> +    /* Number of NUMA memory ranges */
> +    uint32_t mem_ranges_num;
> +
> +    /* NUMA memory ranges */
> +    GArray *mem_ranges;
>  };
>  typedef struct NumaState NumaState;
>  
> diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
> index 9c7393b08c..4f0cdc1a45 100644
> --- a/stubs/Makefile.objs
> +++ b/stubs/Makefile.objs
> @@ -33,6 +33,7 @@ stub-obj-y += qmp_memory_device.o
>  stub-obj-y += target-monitor-defs.o
>  stub-obj-y += target-get-monitor-def.o
>  stub-obj-y += pc_madt_cpu_entry.o
> +stub-obj-y += pc_build_mem_ranges.o
>  stub-obj-y += vmgenid.o
>  stub-obj-y += xen-common.o
>  stub-obj-y += xen-hvm.o
> diff --git a/stubs/pc_build_mem_ranges.c b/stubs/pc_build_mem_ranges.c
> new file mode 100644
> index 0000000000..997cdfe00b
> --- /dev/null
> +++ b/stubs/pc_build_mem_ranges.c
> @@ -0,0 +1,14 @@
> +/*
> + * Stub for pc_build_mem_ranges().
> + * piix4 is used not only pc, but also mips and etc. In order to add
> + * build_mem_ranges callback to AcpiDeviceIfClass and use pc_build_mem_ranges
> + * in hw/acpi/piix4.c, pc_build_mem_ranges() stub is added to make other arch
> + * can compile successfully.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/i386/pc.h"
> +
> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms)
> +{
> +}
Tao Xu July 2, 2019, 1:12 a.m. UTC | #2
On 7/1/2019 6:59 PM, Igor Mammedov wrote:
> On Fri, 14 Jun 2019 23:56:23 +0800
> Tao Xu <tao3.xu@intel.com> wrote:
> 
>> Add build_mem_ranges callback to AcpiDeviceIfClass and use
>> it for generating SRAT and HMAT numa memory ranges.
>>
>> Suggested-by: Igor Mammedov <imammedo@redhat.com>
>> Co-developed-by: Liu Jingqi <jingqi.liu@intel.com>
>> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>> ---
>>
>> Changes in v5 -> v4:
>>      - Add the missing if 'mem_len > 0' in pc_build_mem_ranges() (Igor)
>>      - Correct the descriptions of build_mem_ranges
>>      in AcpiDeviceIfClass (Igor)
>>      - Use GArray for NUMA memory ranges data (Igor)
>>      - Add the reason of using stub (Igor)
>> ---
>>   hw/acpi/piix4.c                      |   1 +
>>   hw/i386/acpi-build.c                 | 133 +++++++++++++++++----------
>>   hw/isa/lpc_ich9.c                    |   1 +
>>   include/hw/acpi/acpi_dev_interface.h |   4 +
>>   include/hw/i386/pc.h                 |   1 +
>>   include/sysemu/numa.h                |  12 +++
>>   stubs/Makefile.objs                  |   1 +
>>   stubs/pc_build_mem_ranges.c          |  14 +++
>>   8 files changed, 120 insertions(+), 47 deletions(-)
>>   create mode 100644 stubs/pc_build_mem_ranges.c
>>
>> diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
>> index ec4e186cec..bc078c1ad7 100644
>> --- a/hw/acpi/piix4.c
>> +++ b/hw/acpi/piix4.c
>> @@ -702,6 +702,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
>>       adevc->ospm_status = piix4_ospm_status;
>>       adevc->send_event = piix4_send_gpe;
>>       adevc->madt_cpu = pc_madt_cpu_entry;
>> +    adevc->build_mem_ranges = pc_build_mem_ranges;
>>   }
>>   
>>   static const TypeInfo piix4_pm_info = {
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index 055e677c30..44dd447fa5 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -2279,18 +2279,89 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
>>   #define HOLE_640K_START  (640 * KiB)
>>   #define HOLE_640K_END   (1 * MiB)
>>   
>> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms)
>> +{
>> +    uint64_t mem_len, mem_base, next_base;
>> +    int i;
>> +    PCMachineState *pcms = PC_MACHINE(ms);
>> +    NumaState *nstat = ms->numa_state;
>> +    NumaMemRange *mem_range;
>> +    nstat->mem_ranges_num = 0;
>> +    next_base = 0;
>> +
>> +    /*
>> +     * the memory map is a bit tricky, it contains at least one hole
>> +     * from 640k-1M and possibly another one from 3.5G-4G.
>> +     */
>> +
>> +    for (i = 0; i < pcms->numa_nodes; ++i) {
>> +        mem_base = next_base;
>> +        mem_len = pcms->node_mem[i];
>> +        next_base = mem_base + mem_len;
>> +
>> +        /* Cut out the 640K hole */
>> +        if (mem_base <= HOLE_640K_START &&
>> +            next_base > HOLE_640K_START) {
>> +            mem_len -= next_base - HOLE_640K_START;
>> +            if (mem_len > 0) {
>> +                mem_range = acpi_data_push(nstat->mem_ranges,
>> +                                           sizeof *mem_range);
>> +                mem_range->base = mem_base;
>> +                mem_range->length = mem_len;
>> +                mem_range->node = i;
>> +                nstat->mem_ranges_num++;
>> +            }
>> +
>> +            /* Check for the rare case: 640K < RAM < 1M */
>> +            if (next_base <= HOLE_640K_END) {
>> +                next_base = HOLE_640K_END;
>> +                continue;
>> +            }
>> +            mem_base = HOLE_640K_END;
>> +            mem_len = next_base - HOLE_640K_END;
>> +        }
>> +
>> +        /* Cut out the ACPI_PCI hole */
>> +        if (mem_base <= pcms->below_4g_mem_size &&
>> +            next_base > pcms->below_4g_mem_size) {
>> +            mem_len -= next_base - pcms->below_4g_mem_size;
>> +            if (mem_len > 0) {
>> +                mem_range = acpi_data_push(nstat->mem_ranges,
>> +                                           sizeof *mem_range);
>> +                mem_range->base = mem_base;
>> +                mem_range->length = mem_len;
>> +                mem_range->node = i;
>> +                nstat->mem_ranges_num++;
>> +            }
>> +            mem_base = 1ULL << 32;
>> +            mem_len = next_base - pcms->below_4g_mem_size;
>> +            next_base = mem_base + mem_len;
>> +        }
>> +        if (mem_len > 0) {
>> +            mem_range = acpi_data_push(nstat->mem_ranges,
>> +                                       sizeof *mem_range);
>> +            mem_range->base = mem_base;
>> +            mem_range->length = mem_len;
>> +            mem_range->node = i;
>> +            nstat->mem_ranges_num++;
>> +        }
>> +    }
>> +}
>> +
>>   static void
>>   build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>   {
>>       AcpiSystemResourceAffinityTable *srat;
>>       AcpiSratMemoryAffinity *numamem;
>>   
>> -    int i;
>> -    int srat_start, numa_start, slots;
>> -    uint64_t mem_len, mem_base, next_base;
>> +    int i, srat_start, numa_start, slots;
>>       MachineClass *mc = MACHINE_GET_CLASS(machine);
>>       const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine);
>>       PCMachineState *pcms = PC_MACHINE(machine);
>> +    AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev);
>> +    AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev);
>> +    NumaState *nstat = machine->numa_state;
>> +    NumaMemRange *mem_range;
>>       ram_addr_t hotplugabble_address_space_size =
>>           object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE,
>>                                   NULL);
>> @@ -2327,57 +2398,25 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>>           }
>>       }
>>   
>> +    if (pcms->numa_nodes && !nstat->mem_ranges_num) {
> suggest to drop nstat->mem_ranges_num field and use nstat->mem_ranges->len instead,
> also its probably better to initialize nstat->mem_ranges
> at the same place where ms->numa_state is initialized, so that
> a specific platform code won't need to duplicate it.

OK, I will improve it.
> 
>> +        nstat->mem_ranges = g_array_new(false, true /* clear */,
>> +                                        sizeof *mem_range);
>> +        adevc->build_mem_ranges(adev, machine);
>> +    }
>>   
>> -    /* the memory map is a bit tricky, it contains at least one hole
>> -     * from 640k-1M and possibly another one from 3.5G-4G.
>> -     */
>> -    next_base = 0;
>>       numa_start = table_data->len;
>>   
>> -    for (i = 1; i < pcms->numa_nodes + 1; ++i) {
>> -        mem_base = next_base;
>> -        mem_len = pcms->node_mem[i - 1];
>> -        next_base = mem_base + mem_len;
>> -
>> -        /* Cut out the 640K hole */
>> -        if (mem_base <= HOLE_640K_START &&
>> -            next_base > HOLE_640K_START) {
>> -            mem_len -= next_base - HOLE_640K_START;
>> -            if (mem_len > 0) {
>> -                numamem = acpi_data_push(table_data, sizeof *numamem);
>> -                build_srat_memory(numamem, mem_base, mem_len, i - 1,
>> -                                  MEM_AFFINITY_ENABLED);
>> -            }
>> -
>> -            /* Check for the rare case: 640K < RAM < 1M */
>> -            if (next_base <= HOLE_640K_END) {
>> -                next_base = HOLE_640K_END;
>> -                continue;
>> -            }
>> -            mem_base = HOLE_640K_END;
>> -            mem_len = next_base - HOLE_640K_END;
>> -        }
>> -
>> -        /* Cut out the ACPI_PCI hole */
>> -        if (mem_base <= pcms->below_4g_mem_size &&
>> -            next_base > pcms->below_4g_mem_size) {
>> -            mem_len -= next_base - pcms->below_4g_mem_size;
>> -            if (mem_len > 0) {
>> -                numamem = acpi_data_push(table_data, sizeof *numamem);
>> -                build_srat_memory(numamem, mem_base, mem_len, i - 1,
>> -                                  MEM_AFFINITY_ENABLED);
>> -            }
>> -            mem_base = 1ULL << 32;
>> -            mem_len = next_base - pcms->below_4g_mem_size;
>> -            next_base = mem_base + mem_len;
>> -        }
>> -
>> -        if (mem_len > 0) {
>> +    for (i = 0; i < nstat->mem_ranges_num; i++) {
>> +        mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i);
>> +        if (mem_range->length > 0) {
> why do we have this condition,
> I'd assume adevc->build_mem_ranges() shouldn't return empty ranges.
> 
OK I will drop this condition.

>>               numamem = acpi_data_push(table_data, sizeof *numamem);
>> -            build_srat_memory(numamem, mem_base, mem_len, i - 1,
>> +            build_srat_memory(numamem, mem_range->base,
>> +                              mem_range->length,
>> +                              mem_range->node,
>>                                 MEM_AFFINITY_ENABLED);
>>           }
>>       }
>> +
>>       slots = (table_data->len - numa_start) / sizeof *numamem;
>>       for (; slots < pcms->numa_nodes + 2; slots++) {
>>           numamem = acpi_data_push(table_data, sizeof *numamem);
>> diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
>> index 35d17246e9..20d919c63d 100644
>> --- a/hw/isa/lpc_ich9.c
>> +++ b/hw/isa/lpc_ich9.c
>> @@ -801,6 +801,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
>>       adevc->ospm_status = ich9_pm_ospm_status;
>>       adevc->send_event = ich9_send_gpe;
>>       adevc->madt_cpu = pc_madt_cpu_entry;
>> +    adevc->build_mem_ranges = pc_build_mem_ranges;
>>   }
>>   
>>   static const TypeInfo ich9_lpc_info = {
>> diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
>> index 43ff119179..5956b5ea33 100644
>> --- a/include/hw/acpi/acpi_dev_interface.h
>> +++ b/include/hw/acpi/acpi_dev_interface.h
>> @@ -39,6 +39,8 @@ void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event);
>>    *           for CPU indexed by @uid in @apic_ids array,
>>    *           returned structure types are:
>>    *           0 - Local APIC, 9 - Local x2APIC, 0xB - GICC
>> + * build_mem_ranges: build memory ranges of ACPI SRAT (except misc
>> + * and hotplug SRAT ranges) and HMAT
>>    *
>>    * Interface is designed for providing unified interface
>>    * to generic ACPI functionality that could be used without
>> @@ -54,5 +56,7 @@ typedef struct AcpiDeviceIfClass {
>>       void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev);
>>       void (*madt_cpu)(AcpiDeviceIf *adev, int uid,
>>                        const CPUArchIdList *apic_ids, GArray *entry);
>> +    void (*build_mem_ranges)(AcpiDeviceIf *adev, MachineState *ms);
>> +
>>   } AcpiDeviceIfClass;
>>   #endif
>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>> index 5d5636241e..21b9ac3d11 100644
>> --- a/include/hw/i386/pc.h
>> +++ b/include/hw/i386/pc.h
>> @@ -281,6 +281,7 @@ void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
>>   /* acpi-build.c */
>>   void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
>>                          const CPUArchIdList *apic_ids, GArray *entry);
>> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms);
>>   
>>   /* e820 types */
>>   #define E820_RAM        1
>> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
>> index 437eb21fef..e3c85b77bc 100644
>> --- a/include/sysemu/numa.h
>> +++ b/include/sysemu/numa.h
>> @@ -20,6 +20,12 @@ struct NumaNodeMem {
>>       uint64_t node_plugged_mem;
>>   };
>>   
>> +typedef struct NumaMemRange {
>> +    uint64_t base;
>> +    uint64_t length;
>> +    uint32_t node;
>> +} NumaMemRange;
>> +
>>   struct NumaState {
>>       /* Number of NUMA nodes */
>>       int num_nodes;
>> @@ -29,6 +35,12 @@ struct NumaState {
>>   
>>       /* NUMA nodes information */
>>       NodeInfo nodes[MAX_NODES];
>> +
>> +    /* Number of NUMA memory ranges */
>> +    uint32_t mem_ranges_num;
>> +
>> +    /* NUMA memory ranges */
>> +    GArray *mem_ranges;
>>   };
>>   typedef struct NumaState NumaState;
>>   
>> diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
>> index 9c7393b08c..4f0cdc1a45 100644
>> --- a/stubs/Makefile.objs
>> +++ b/stubs/Makefile.objs
>> @@ -33,6 +33,7 @@ stub-obj-y += qmp_memory_device.o
>>   stub-obj-y += target-monitor-defs.o
>>   stub-obj-y += target-get-monitor-def.o
>>   stub-obj-y += pc_madt_cpu_entry.o
>> +stub-obj-y += pc_build_mem_ranges.o
>>   stub-obj-y += vmgenid.o
>>   stub-obj-y += xen-common.o
>>   stub-obj-y += xen-hvm.o
>> diff --git a/stubs/pc_build_mem_ranges.c b/stubs/pc_build_mem_ranges.c
>> new file mode 100644
>> index 0000000000..997cdfe00b
>> --- /dev/null
>> +++ b/stubs/pc_build_mem_ranges.c
>> @@ -0,0 +1,14 @@
>> +/*
>> + * Stub for pc_build_mem_ranges().
>> + * piix4 is used not only pc, but also mips and etc. In order to add
>> + * build_mem_ranges callback to AcpiDeviceIfClass and use pc_build_mem_ranges
>> + * in hw/acpi/piix4.c, pc_build_mem_ranges() stub is added to make other arch
>> + * can compile successfully.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "hw/i386/pc.h"
>> +
>> +void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms)
>> +{
>> +}
>
diff mbox series

Patch

diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index ec4e186cec..bc078c1ad7 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -702,6 +702,7 @@  static void piix4_pm_class_init(ObjectClass *klass, void *data)
     adevc->ospm_status = piix4_ospm_status;
     adevc->send_event = piix4_send_gpe;
     adevc->madt_cpu = pc_madt_cpu_entry;
+    adevc->build_mem_ranges = pc_build_mem_ranges;
 }
 
 static const TypeInfo piix4_pm_info = {
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 055e677c30..44dd447fa5 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2279,18 +2279,89 @@  build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog)
 #define HOLE_640K_START  (640 * KiB)
 #define HOLE_640K_END   (1 * MiB)
 
+void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms)
+{
+    uint64_t mem_len, mem_base, next_base;
+    int i;
+    PCMachineState *pcms = PC_MACHINE(ms);
+    NumaState *nstat = ms->numa_state;
+    NumaMemRange *mem_range;
+    nstat->mem_ranges_num = 0;
+    next_base = 0;
+
+    /*
+     * the memory map is a bit tricky, it contains at least one hole
+     * from 640k-1M and possibly another one from 3.5G-4G.
+     */
+
+    for (i = 0; i < pcms->numa_nodes; ++i) {
+        mem_base = next_base;
+        mem_len = pcms->node_mem[i];
+        next_base = mem_base + mem_len;
+
+        /* Cut out the 640K hole */
+        if (mem_base <= HOLE_640K_START &&
+            next_base > HOLE_640K_START) {
+            mem_len -= next_base - HOLE_640K_START;
+            if (mem_len > 0) {
+                mem_range = acpi_data_push(nstat->mem_ranges,
+                                           sizeof *mem_range);
+                mem_range->base = mem_base;
+                mem_range->length = mem_len;
+                mem_range->node = i;
+                nstat->mem_ranges_num++;
+            }
+
+            /* Check for the rare case: 640K < RAM < 1M */
+            if (next_base <= HOLE_640K_END) {
+                next_base = HOLE_640K_END;
+                continue;
+            }
+            mem_base = HOLE_640K_END;
+            mem_len = next_base - HOLE_640K_END;
+        }
+
+        /* Cut out the ACPI_PCI hole */
+        if (mem_base <= pcms->below_4g_mem_size &&
+            next_base > pcms->below_4g_mem_size) {
+            mem_len -= next_base - pcms->below_4g_mem_size;
+            if (mem_len > 0) {
+                mem_range = acpi_data_push(nstat->mem_ranges,
+                                           sizeof *mem_range);
+                mem_range->base = mem_base;
+                mem_range->length = mem_len;
+                mem_range->node = i;
+                nstat->mem_ranges_num++;
+            }
+            mem_base = 1ULL << 32;
+            mem_len = next_base - pcms->below_4g_mem_size;
+            next_base = mem_base + mem_len;
+        }
+        if (mem_len > 0) {
+            mem_range = acpi_data_push(nstat->mem_ranges,
+                                       sizeof *mem_range);
+            mem_range->base = mem_base;
+            mem_range->length = mem_len;
+            mem_range->node = i;
+            nstat->mem_ranges_num++;
+        }
+    }
+}
+
 static void
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 {
     AcpiSystemResourceAffinityTable *srat;
     AcpiSratMemoryAffinity *numamem;
 
-    int i;
-    int srat_start, numa_start, slots;
-    uint64_t mem_len, mem_base, next_base;
+    int i, srat_start, numa_start, slots;
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(machine);
     PCMachineState *pcms = PC_MACHINE(machine);
+    AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev);
+    AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev);
+    NumaState *nstat = machine->numa_state;
+    NumaMemRange *mem_range;
     ram_addr_t hotplugabble_address_space_size =
         object_property_get_int(OBJECT(pcms), PC_MACHINE_DEVMEM_REGION_SIZE,
                                 NULL);
@@ -2327,57 +2398,25 @@  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
         }
     }
 
+    if (pcms->numa_nodes && !nstat->mem_ranges_num) {
+        nstat->mem_ranges = g_array_new(false, true /* clear */,
+                                        sizeof *mem_range);
+        adevc->build_mem_ranges(adev, machine);
+    }
 
-    /* the memory map is a bit tricky, it contains at least one hole
-     * from 640k-1M and possibly another one from 3.5G-4G.
-     */
-    next_base = 0;
     numa_start = table_data->len;
 
-    for (i = 1; i < pcms->numa_nodes + 1; ++i) {
-        mem_base = next_base;
-        mem_len = pcms->node_mem[i - 1];
-        next_base = mem_base + mem_len;
-
-        /* Cut out the 640K hole */
-        if (mem_base <= HOLE_640K_START &&
-            next_base > HOLE_640K_START) {
-            mem_len -= next_base - HOLE_640K_START;
-            if (mem_len > 0) {
-                numamem = acpi_data_push(table_data, sizeof *numamem);
-                build_srat_memory(numamem, mem_base, mem_len, i - 1,
-                                  MEM_AFFINITY_ENABLED);
-            }
-
-            /* Check for the rare case: 640K < RAM < 1M */
-            if (next_base <= HOLE_640K_END) {
-                next_base = HOLE_640K_END;
-                continue;
-            }
-            mem_base = HOLE_640K_END;
-            mem_len = next_base - HOLE_640K_END;
-        }
-
-        /* Cut out the ACPI_PCI hole */
-        if (mem_base <= pcms->below_4g_mem_size &&
-            next_base > pcms->below_4g_mem_size) {
-            mem_len -= next_base - pcms->below_4g_mem_size;
-            if (mem_len > 0) {
-                numamem = acpi_data_push(table_data, sizeof *numamem);
-                build_srat_memory(numamem, mem_base, mem_len, i - 1,
-                                  MEM_AFFINITY_ENABLED);
-            }
-            mem_base = 1ULL << 32;
-            mem_len = next_base - pcms->below_4g_mem_size;
-            next_base = mem_base + mem_len;
-        }
-
-        if (mem_len > 0) {
+    for (i = 0; i < nstat->mem_ranges_num; i++) {
+        mem_range = &g_array_index(nstat->mem_ranges, NumaMemRange, i);
+        if (mem_range->length > 0) {
             numamem = acpi_data_push(table_data, sizeof *numamem);
-            build_srat_memory(numamem, mem_base, mem_len, i - 1,
+            build_srat_memory(numamem, mem_range->base,
+                              mem_range->length,
+                              mem_range->node,
                               MEM_AFFINITY_ENABLED);
         }
     }
+
     slots = (table_data->len - numa_start) / sizeof *numamem;
     for (; slots < pcms->numa_nodes + 2; slots++) {
         numamem = acpi_data_push(table_data, sizeof *numamem);
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 35d17246e9..20d919c63d 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -801,6 +801,7 @@  static void ich9_lpc_class_init(ObjectClass *klass, void *data)
     adevc->ospm_status = ich9_pm_ospm_status;
     adevc->send_event = ich9_send_gpe;
     adevc->madt_cpu = pc_madt_cpu_entry;
+    adevc->build_mem_ranges = pc_build_mem_ranges;
 }
 
 static const TypeInfo ich9_lpc_info = {
diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
index 43ff119179..5956b5ea33 100644
--- a/include/hw/acpi/acpi_dev_interface.h
+++ b/include/hw/acpi/acpi_dev_interface.h
@@ -39,6 +39,8 @@  void acpi_send_event(DeviceState *dev, AcpiEventStatusBits event);
  *           for CPU indexed by @uid in @apic_ids array,
  *           returned structure types are:
  *           0 - Local APIC, 9 - Local x2APIC, 0xB - GICC
+ * build_mem_ranges: build memory ranges of ACPI SRAT (except misc
+ * and hotplug SRAT ranges) and HMAT
  *
  * Interface is designed for providing unified interface
  * to generic ACPI functionality that could be used without
@@ -54,5 +56,7 @@  typedef struct AcpiDeviceIfClass {
     void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev);
     void (*madt_cpu)(AcpiDeviceIf *adev, int uid,
                      const CPUArchIdList *apic_ids, GArray *entry);
+    void (*build_mem_ranges)(AcpiDeviceIf *adev, MachineState *ms);
+
 } AcpiDeviceIfClass;
 #endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 5d5636241e..21b9ac3d11 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -281,6 +281,7 @@  void pc_system_firmware_init(PCMachineState *pcms, MemoryRegion *rom_memory);
 /* acpi-build.c */
 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
                        const CPUArchIdList *apic_ids, GArray *entry);
+void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms);
 
 /* e820 types */
 #define E820_RAM        1
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index 437eb21fef..e3c85b77bc 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -20,6 +20,12 @@  struct NumaNodeMem {
     uint64_t node_plugged_mem;
 };
 
+typedef struct NumaMemRange {
+    uint64_t base;
+    uint64_t length;
+    uint32_t node;
+} NumaMemRange;
+
 struct NumaState {
     /* Number of NUMA nodes */
     int num_nodes;
@@ -29,6 +35,12 @@  struct NumaState {
 
     /* NUMA nodes information */
     NodeInfo nodes[MAX_NODES];
+
+    /* Number of NUMA memory ranges */
+    uint32_t mem_ranges_num;
+
+    /* NUMA memory ranges */
+    GArray *mem_ranges;
 };
 typedef struct NumaState NumaState;
 
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 9c7393b08c..4f0cdc1a45 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -33,6 +33,7 @@  stub-obj-y += qmp_memory_device.o
 stub-obj-y += target-monitor-defs.o
 stub-obj-y += target-get-monitor-def.o
 stub-obj-y += pc_madt_cpu_entry.o
+stub-obj-y += pc_build_mem_ranges.o
 stub-obj-y += vmgenid.o
 stub-obj-y += xen-common.o
 stub-obj-y += xen-hvm.o
diff --git a/stubs/pc_build_mem_ranges.c b/stubs/pc_build_mem_ranges.c
new file mode 100644
index 0000000000..997cdfe00b
--- /dev/null
+++ b/stubs/pc_build_mem_ranges.c
@@ -0,0 +1,14 @@ 
+/*
+ * Stub for pc_build_mem_ranges().
+ * piix4 is used not only pc, but also mips and etc. In order to add
+ * build_mem_ranges callback to AcpiDeviceIfClass and use pc_build_mem_ranges
+ * in hw/acpi/piix4.c, pc_build_mem_ranges() stub is added to make other arch
+ * can compile successfully.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/i386/pc.h"
+
+void pc_build_mem_ranges(AcpiDeviceIf *adev, MachineState *ms)
+{
+}