diff mbox series

[v12,09/11] hmat acpi: Build System Locality Latency and Bandwidth Information Structure(s)

Message ID 20190920074349.2616-10-tao3.xu@intel.com
State New
Headers show
Series Build ACPI Heterogeneous Memory Attribute Table (HMAT) | expand

Commit Message

Tao Xu Sept. 20, 2019, 7:43 a.m. UTC
From: Liu Jingqi <jingqi.liu@intel.com>

This structure describes the memory access latency and bandwidth
information from various memory access initiator proximity domains.
The latency and bandwidth numbers represented in this structure
correspond to rated latency and bandwidth for the platform.
The software could use this information as hint for optimization.

Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
---

Changes in v12:
    - Fix a bug that if HMAT is enabled and without hmat-lb setting,
      QEMU will crash. (reported by Danmei Wei)

Changes in v11:
    - Calculate base in build_hmat_lb().
---
 hw/acpi/hmat.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/acpi/hmat.h |   2 +
 2 files changed, 127 insertions(+), 1 deletion(-)

Comments

Igor Mammedov Oct. 3, 2019, 2:41 p.m. UTC | #1
On Fri, 20 Sep 2019 15:43:47 +0800
Tao Xu <tao3.xu@intel.com> wrote:

> From: Liu Jingqi <jingqi.liu@intel.com>
> 
> This structure describes the memory access latency and bandwidth
> information from various memory access initiator proximity domains.
> The latency and bandwidth numbers represented in this structure
> correspond to rated latency and bandwidth for the platform.
> The software could use this information as hint for optimization.
> 
> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
> Signed-off-by: Tao Xu <tao3.xu@intel.com>
> ---
> 
> Changes in v12:
>     - Fix a bug that if HMAT is enabled and without hmat-lb setting,
>       QEMU will crash. (reported by Danmei Wei)
> 
> Changes in v11:
>     - Calculate base in build_hmat_lb().
> ---
>  hw/acpi/hmat.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/acpi/hmat.h |   2 +
>  2 files changed, 127 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
> index 1368fce7ee..e7be849581 100644
> --- a/hw/acpi/hmat.c
> +++ b/hw/acpi/hmat.c
> @@ -27,6 +27,7 @@
>  #include "qemu/osdep.h"
>  #include "sysemu/numa.h"
>  #include "hw/acpi/hmat.h"
> +#include "qemu/error-report.h"
>  
>  /*
>   * ACPI 6.3:
> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, int initiator,
>      build_append_int_noprefix(table_data, 0, 8);
>  }
>  
> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int len)
> +{
> +    int i;
> +
> +    for (i = 0; i < len; i++) {
> +        if (lb_data[i] / base >= UINT16_MAX) {
> +            return true;
> +        }
> +    }
> +
> +    return false;
> +}
I suggest to do this check at CLI parsing time

> +/*
> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
> + * Structure: Table 5-146
> + */
> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
> +                          uint32_t num_initiator, uint32_t num_target,
> +                          uint32_t *initiator_list, int type)
> +{
> +    uint8_t mask = 0x0f; 
> +    uint32_t s = num_initiator;
> +    uint32_t t = num_target;
drop this locals and use arguments directly

> +    uint64_t base = 1;
> +    uint64_t *lb_data;
> +    int i, unit;
> +
> +    /* Type */
> +    build_append_int_noprefix(table_data, 1, 2);
> +    /* Reserved */
> +    build_append_int_noprefix(table_data, 0, 2);
> +    /* Length */
> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 * s * t, 4);
                                             ^^^^
to me above looks like /dev/random output, absolutely unreadable.
Suggest to use local var (like: lb_length) for expression with comments
beside magic numbers.

> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & mask, 1);

why do you need to use mask here?

> +    /* Data Type */
> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);

Isn't hmat_lb->data_type and passed argument 'type' the same?


> +    /* Reserved */
> +    build_append_int_noprefix(table_data, 0, 2);
> +    /* Number of Initiator Proximity Domains (s) */
> +    build_append_int_noprefix(table_data, s, 4);
> +    /* Number of Target Proximity Domains (t) */
> +    build_append_int_noprefix(table_data, t, 4);
> +    /* Reserved */
> +    build_append_int_noprefix(table_data, 0, 4);
> +
> +    if (HMAT_IS_LATENCY(type)) {
> +        unit = 1000;
> +        lb_data = hmat_lb->latency;
> +    } else {
> +        unit = 1024;
> +        lb_data = hmat_lb->bandwidth;
> +    }
> +
> +    while (entry_overflow(lb_data, base, s * t)) {
> +        for (i = 0; i < s * t; i++) {
> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
> +                error_report("Invalid latency/bandwidth input, all "
> +                "latencies/bandwidths should be specified in the same units.");
> +                exit(1);
> +            }
> +        }
> +        base *= unit;
> +    }
Can you clarify what you are trying to check here?

> +
> +    /* Entry Base Unit */
> +    build_append_int_noprefix(table_data, base, 8);
> +
> +    /* Initiator Proximity Domain List */
> +    for (i = 0; i < s; i++) {
> +        build_append_int_noprefix(table_data, initiator_list[i], 4);
> +    }
> +
> +    /* Target Proximity Domain List */
> +    for (i = 0; i < t; i++) {
> +        build_append_int_noprefix(table_data, i, 4);
> +    }
> +
> +    /* Latency or Bandwidth Entries */
> +    for (i = 0; i < s * t; i++) {
> +        uint16_t entry;
> +
> +        if (HMAT_IS_LATENCY(type)) {
drop if condition and reuse lb_data, that you've just initialized above


> +            entry = hmat_lb->latency[i] / base;
...
> +            entry = hmat_lb->bandwidth[i] / base;
I'm not sure that above is correct.
Pls clarify math behind above 2 expressions

> +        }
> +
> +        build_append_int_noprefix(table_data, entry, 2);
> +    }
> +}
> +
>  /* Build HMAT sub table structures */
>  static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
>  {
>      uint16_t flags;
> -    int i;
> +    uint32_t *initiator_list = NULL;
> +    int i, j, hrchy, type;
s/hrchy/hierarchy/

> +    HMAT_LB_Info *numa_hmat_lb;
>  
>      for (i = 0; i < nstat->num_nodes; i++) {
>          flags = 0;
> @@ -82,6 +177,35 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
>  
>          build_hmat_mpda(table_data, flags, nstat->nodes[i].initiator, i);
>      }
> +
> +    if (nstat->num_initiator) {
> +        initiator_list = g_malloc0(nstat->num_initiator * sizeof(uint32_t));
> +        for (i = 0, j = 0; i < nstat->num_nodes; i++) {
> +            if (nstat->nodes[i].has_cpu) {
> +                initiator_list[j] = i;
> +                j++;
> +            }
> +        }
> +    }
> +
> +    /*
> +     * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
> +     * Structure: Table 5-146
> +     */
> +    for (hrchy = HMAT_LB_MEM_MEMORY;
> +         hrchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hrchy++) {
> +        for (type = HMAT_LB_DATA_ACCESS_LATENCY;
> +             type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
> +            numa_hmat_lb = nstat->hmat_lb[hrchy][type];
> +
> +            if (numa_hmat_lb) {
> +                build_hmat_lb(table_data, numa_hmat_lb, nstat->num_initiator,
> +                              nstat->num_nodes, initiator_list, type);
> +            }
> +        }
> +    }
> +
> +    g_free(initiator_list);
>  }
>  
>  void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat)
> diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
> index 0c1839cf6f..1154dfb48e 100644
> --- a/hw/acpi/hmat.h
> +++ b/hw/acpi/hmat.h
> @@ -40,6 +40,8 @@
>   */
>  #define HMAT_PROX_INIT_VALID 0x1
>  
> +#define HMAT_IS_LATENCY(type) (type <= HMAT_LB_DATA_WRITE_LATENCY)

it's not worth to create macro for 1-off calculation, just drop it
and s/if (HMAT_IS_LATENCY(type))/if(type <= HMAT_LB_DATA_WRITE_LATENCY)/

> +
>  void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat);
>  
>  #endif
Tao Xu Oct. 10, 2019, 6:53 a.m. UTC | #2
On 10/3/2019 10:41 PM, Igor Mammedov wrote:
> On Fri, 20 Sep 2019 15:43:47 +0800
> Tao Xu <tao3.xu@intel.com> wrote:
> 
>> From: Liu Jingqi <jingqi.liu@intel.com>
>>
>> This structure describes the memory access latency and bandwidth
>> information from various memory access initiator proximity domains.
>> The latency and bandwidth numbers represented in this structure
>> correspond to rated latency and bandwidth for the platform.
>> The software could use this information as hint for optimization.
>>
>> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>> ---
>>
>> Changes in v12:
>>      - Fix a bug that if HMAT is enabled and without hmat-lb setting,
>>        QEMU will crash. (reported by Danmei Wei)
>>
>> Changes in v11:
>>      - Calculate base in build_hmat_lb().
>> ---
>>   hw/acpi/hmat.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>   hw/acpi/hmat.h |   2 +
>>   2 files changed, 127 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
>> index 1368fce7ee..e7be849581 100644
>> --- a/hw/acpi/hmat.c
>> +++ b/hw/acpi/hmat.c
>> @@ -27,6 +27,7 @@
>>   #include "qemu/osdep.h"
>>   #include "sysemu/numa.h"
>>   #include "hw/acpi/hmat.h"
>> +#include "qemu/error-report.h"
>>   
>>   /*
>>    * ACPI 6.3:
>> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, int initiator,
>>       build_append_int_noprefix(table_data, 0, 8);
>>   }
>>   
>> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int len)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < len; i++) {
>> +        if (lb_data[i] / base >= UINT16_MAX) {
>> +            return true;
>> +        }
>> +    }
>> +
>> +    return false;
>> +}
> I suggest to do this check at CLI parsing time
> 
>> +/*
>> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
>> + * Structure: Table 5-146
>> + */
>> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
>> +                          uint32_t num_initiator, uint32_t num_target,
>> +                          uint32_t *initiator_list, int type)
>> +{
>> +    uint8_t mask = 0x0f;
>> +    uint32_t s = num_initiator;
>> +    uint32_t t = num_target;
> drop this locals and use arguments directly
> 
>> +    uint64_t base = 1;
>> +    uint64_t *lb_data;
>> +    int i, unit;
>> +
>> +    /* Type */
>> +    build_append_int_noprefix(table_data, 1, 2);
>> +    /* Reserved */
>> +    build_append_int_noprefix(table_data, 0, 2);
>> +    /* Length */
>> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 * s * t, 4);
>                                               ^^^^
> to me above looks like /dev/random output, absolutely unreadable.
> Suggest to use local var (like: lb_length) for expression with comments
> beside magic numbers.
> 
>> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
>> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & mask, 1);
> 
> why do you need to use mask here?
> 
Because Bits[7:4] Reserved, so I use mask to keep it reserved.

>> +    /* Data Type */
>> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
> 
> Isn't hmat_lb->data_type and passed argument 'type' the same?
> 
Yes, I will drop 'type'.
> 
>> +    /* Reserved */
>> +    build_append_int_noprefix(table_data, 0, 2);
>> +    /* Number of Initiator Proximity Domains (s) */
>> +    build_append_int_noprefix(table_data, s, 4);
>> +    /* Number of Target Proximity Domains (t) */
>> +    build_append_int_noprefix(table_data, t, 4);
>> +    /* Reserved */
>> +    build_append_int_noprefix(table_data, 0, 4);
>> +
>> +    if (HMAT_IS_LATENCY(type)) {
>> +        unit = 1000;
>> +        lb_data = hmat_lb->latency;
>> +    } else {
>> +        unit = 1024;
>> +        lb_data = hmat_lb->bandwidth;
>> +    }
>> +
>> +    while (entry_overflow(lb_data, base, s * t)) {
>> +        for (i = 0; i < s * t; i++) {
>> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
>> +                error_report("Invalid latency/bandwidth input, all "
>> +                "latencies/bandwidths should be specified in the same units.");
>> +                exit(1);
>> +            }
>> +        }
>> +        base *= unit;
>> +    }
> Can you clarify what you are trying to check here?
> 
This part I use entry_overflow() to check if uint16 can store entry. If 
can't store and the entries matrix can be divisible by unit * base, then 
base will be unit * base.

For example, if lb_data[i] are 1048576(1TB/s) and 1024(1GB/s), unit is 
1024, so 1048576 is bigger than UINT16_MAX, and can be divisible by 1024 
* 1, so base is 1024 and entries are 1024 and 1 (see entry = 
hmat_lb->latency[i] / base;). The benefit is even user input different 
unit(TB/s vs GB/s), we can still store the data as far as possible.

>> +
>> +    /* Entry Base Unit */
>> +    build_append_int_noprefix(table_data, base, 8);
>> +
>> +    /* Initiator Proximity Domain List */
>> +    for (i = 0; i < s; i++) {
>> +        build_append_int_noprefix(table_data, initiator_list[i], 4);
>> +    }
>> +
>> +    /* Target Proximity Domain List */
>> +    for (i = 0; i < t; i++) {
>> +        build_append_int_noprefix(table_data, i, 4);
>> +    }
>> +
>> +    /* Latency or Bandwidth Entries */
>> +    for (i = 0; i < s * t; i++) {
>> +        uint16_t entry;
>> +
>> +        if (HMAT_IS_LATENCY(type)) {
> drop if condition and reuse lb_data, that you've just initialized above
> 
> 
>> +            entry = hmat_lb->latency[i] / base;
> ...
>> +            entry = hmat_lb->bandwidth[i] / base;
> I'm not sure that above is correct.
> Pls clarify math behind above 2 expressions
> 
>> +        }
>> +
>> +        build_append_int_noprefix(table_data, entry, 2);
>> +    }
>> +}
>> +
>>   /* Build HMAT sub table structures */
>>   static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
>>   {
>>       uint16_t flags;
>> -    int i;
>> +    uint32_t *initiator_list = NULL;
>> +    int i, j, hrchy, type;
> s/hrchy/hierarchy/
> 
>> +    HMAT_LB_Info *numa_hmat_lb;
>>   
>>       for (i = 0; i < nstat->num_nodes; i++) {
>>           flags = 0;
>> @@ -82,6 +177,35 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
>>   
>>           build_hmat_mpda(table_data, flags, nstat->nodes[i].initiator, i);
>>       }
>> +
>> +    if (nstat->num_initiator) {
>> +        initiator_list = g_malloc0(nstat->num_initiator * sizeof(uint32_t));
>> +        for (i = 0, j = 0; i < nstat->num_nodes; i++) {
>> +            if (nstat->nodes[i].has_cpu) {
>> +                initiator_list[j] = i;
>> +                j++;
>> +            }
>> +        }
>> +    }
>> +
>> +    /*
>> +     * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
>> +     * Structure: Table 5-146
>> +     */
>> +    for (hrchy = HMAT_LB_MEM_MEMORY;
>> +         hrchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hrchy++) {
>> +        for (type = HMAT_LB_DATA_ACCESS_LATENCY;
>> +             type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
>> +            numa_hmat_lb = nstat->hmat_lb[hrchy][type];
>> +
>> +            if (numa_hmat_lb) {
>> +                build_hmat_lb(table_data, numa_hmat_lb, nstat->num_initiator,
>> +                              nstat->num_nodes, initiator_list, type);
>> +            }
>> +        }
>> +    }
>> +
>> +    g_free(initiator_list);
>>   }
>>   
>>   void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat)
>> diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
>> index 0c1839cf6f..1154dfb48e 100644
>> --- a/hw/acpi/hmat.h
>> +++ b/hw/acpi/hmat.h
>> @@ -40,6 +40,8 @@
>>    */
>>   #define HMAT_PROX_INIT_VALID 0x1
>>   
>> +#define HMAT_IS_LATENCY(type) (type <= HMAT_LB_DATA_WRITE_LATENCY)
> 
> it's not worth to create macro for 1-off calculation, just drop it
> and s/if (HMAT_IS_LATENCY(type))/if(type <= HMAT_LB_DATA_WRITE_LATENCY)/
> 
>> +
>>   void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat);
>>   
>>   #endif
>
Igor Mammedov Oct. 11, 2019, 2:08 p.m. UTC | #3
On Thu, 10 Oct 2019 14:53:56 +0800
Tao Xu <tao3.xu@intel.com> wrote:

> On 10/3/2019 10:41 PM, Igor Mammedov wrote:
> > On Fri, 20 Sep 2019 15:43:47 +0800
> > Tao Xu <tao3.xu@intel.com> wrote:
> >   
> >> From: Liu Jingqi <jingqi.liu@intel.com>
> >>
> >> This structure describes the memory access latency and bandwidth
> >> information from various memory access initiator proximity domains.
> >> The latency and bandwidth numbers represented in this structure
> >> correspond to rated latency and bandwidth for the platform.
> >> The software could use this information as hint for optimization.
> >>
> >> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
> >> Signed-off-by: Tao Xu <tao3.xu@intel.com>
> >> ---
> >>
> >> Changes in v12:
> >>      - Fix a bug that if HMAT is enabled and without hmat-lb setting,
> >>        QEMU will crash. (reported by Danmei Wei)
> >>
> >> Changes in v11:
> >>      - Calculate base in build_hmat_lb().
> >> ---
> >>   hw/acpi/hmat.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >>   hw/acpi/hmat.h |   2 +
> >>   2 files changed, 127 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
> >> index 1368fce7ee..e7be849581 100644
> >> --- a/hw/acpi/hmat.c
> >> +++ b/hw/acpi/hmat.c
> >> @@ -27,6 +27,7 @@
> >>   #include "qemu/osdep.h"
> >>   #include "sysemu/numa.h"
> >>   #include "hw/acpi/hmat.h"
> >> +#include "qemu/error-report.h"
> >>   
> >>   /*
> >>    * ACPI 6.3:
> >> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, int initiator,
> >>       build_append_int_noprefix(table_data, 0, 8);
> >>   }
> >>   
> >> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int len)
> >> +{
> >> +    int i;
> >> +
> >> +    for (i = 0; i < len; i++) {
> >> +        if (lb_data[i] / base >= UINT16_MAX) {
> >> +            return true;
> >> +        }
> >> +    }
> >> +
> >> +    return false;
> >> +}  
> > I suggest to do this check at CLI parsing time
> >   
> >> +/*
> >> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
> >> + * Structure: Table 5-146
> >> + */
> >> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
> >> +                          uint32_t num_initiator, uint32_t num_target,
> >> +                          uint32_t *initiator_list, int type)
> >> +{
> >> +    uint8_t mask = 0x0f;
> >> +    uint32_t s = num_initiator;
> >> +    uint32_t t = num_target;  
> > drop this locals and use arguments directly
> >   
> >> +    uint64_t base = 1;
> >> +    uint64_t *lb_data;
> >> +    int i, unit;
> >> +
> >> +    /* Type */
> >> +    build_append_int_noprefix(table_data, 1, 2);
> >> +    /* Reserved */
> >> +    build_append_int_noprefix(table_data, 0, 2);
> >> +    /* Length */
> >> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 * s * t, 4);  
> >                                               ^^^^
> > to me above looks like /dev/random output, absolutely unreadable.
> > Suggest to use local var (like: lb_length) for expression with comments
> > beside magic numbers.
> >   
> >> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
> >> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & mask, 1);  
> > 
> > why do you need to use mask here?
> >   
> Because Bits[7:4] Reserved, so I use mask to keep it reserved.

these bits are not user provided and set to 0, if they get set it's
programming error and instead of masking problem out QEMU should abort,
I suggest replace masking with assert(!foo>>x).

> 
> >> +    /* Data Type */
> >> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);  
> > 
> > Isn't hmat_lb->data_type and passed argument 'type' the same?
> >   
> Yes, I will drop 'type'.
> >   
> >> +    /* Reserved */
> >> +    build_append_int_noprefix(table_data, 0, 2);
> >> +    /* Number of Initiator Proximity Domains (s) */
> >> +    build_append_int_noprefix(table_data, s, 4);
> >> +    /* Number of Target Proximity Domains (t) */
> >> +    build_append_int_noprefix(table_data, t, 4);
> >> +    /* Reserved */
> >> +    build_append_int_noprefix(table_data, 0, 4);
> >> +
> >> +    if (HMAT_IS_LATENCY(type)) {
> >> +        unit = 1000;
> >> +        lb_data = hmat_lb->latency;
> >> +    } else {
> >> +        unit = 1024;
> >> +        lb_data = hmat_lb->bandwidth;
> >> +    }
> >> +
> >> +    while (entry_overflow(lb_data, base, s * t)) {
> >> +        for (i = 0; i < s * t; i++) {
> >> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
> >> +                error_report("Invalid latency/bandwidth input, all "
> >> +                "latencies/bandwidths should be specified in the same units.");
> >> +                exit(1);
> >> +            }
> >> +        }
> >> +        base *= unit;
> >> +    }  
> > Can you clarify what you are trying to check here?
> >   
> This part I use entry_overflow() to check if uint16 can store entry. If 
> can't store and the entries matrix can be divisible by unit * base, then 
> base will be unit * base.
> 
> For example, if lb_data[i] are 1048576(1TB/s) and 1024(1GB/s), unit is 
> 1024, so 1048576 is bigger than UINT16_MAX, and can be divisible by 1024 
> * 1, so base is 1024 and entries are 1024 and 1 (see entry = 
> hmat_lb->latency[i] / base;). The benefit is even user input different 
> unit(TB/s vs GB/s), we can still store the data as far as possible.

Is it possible instead of doing multiple iterations over lb_data
until it finds valid base, just go over lb_data once to find MIN/MAX
and then calculate base using it. Error out with max/min offending
values if it's not possible to compress the range into uint16_t?


> >> +
> >> +    /* Entry Base Unit */
> >> +    build_append_int_noprefix(table_data, base, 8);
> >> +
> >> +    /* Initiator Proximity Domain List */
> >> +    for (i = 0; i < s; i++) {
> >> +        build_append_int_noprefix(table_data, initiator_list[i], 4);
> >> +    }
> >> +
> >> +    /* Target Proximity Domain List */
> >> +    for (i = 0; i < t; i++) {
> >> +        build_append_int_noprefix(table_data, i, 4);
> >> +    }
> >> +
> >> +    /* Latency or Bandwidth Entries */
> >> +    for (i = 0; i < s * t; i++) {
> >> +        uint16_t entry;
> >> +
> >> +        if (HMAT_IS_LATENCY(type)) {  
> > drop if condition and reuse lb_data, that you've just initialized above
> > 
> >   
> >> +            entry = hmat_lb->latency[i] / base;  
> > ...  
> >> +            entry = hmat_lb->bandwidth[i] / base;  
> > I'm not sure that above is correct.
> > Pls clarify math behind above 2 expressions
> >   
> >> +        }
> >> +
> >> +        build_append_int_noprefix(table_data, entry, 2);
> >> +    }
> >> +}
> >> +
> >>   /* Build HMAT sub table structures */
> >>   static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
> >>   {
> >>       uint16_t flags;
> >> -    int i;
> >> +    uint32_t *initiator_list = NULL;
> >> +    int i, j, hrchy, type;  
> > s/hrchy/hierarchy/
> >   
> >> +    HMAT_LB_Info *numa_hmat_lb;
> >>   
> >>       for (i = 0; i < nstat->num_nodes; i++) {
> >>           flags = 0;
> >> @@ -82,6 +177,35 @@ static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
> >>   
> >>           build_hmat_mpda(table_data, flags, nstat->nodes[i].initiator, i);
> >>       }
> >> +
> >> +    if (nstat->num_initiator) {
> >> +        initiator_list = g_malloc0(nstat->num_initiator * sizeof(uint32_t));
> >> +        for (i = 0, j = 0; i < nstat->num_nodes; i++) {
> >> +            if (nstat->nodes[i].has_cpu) {
> >> +                initiator_list[j] = i;
> >> +                j++;
> >> +            }
> >> +        }
> >> +    }
> >> +
> >> +    /*
> >> +     * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
> >> +     * Structure: Table 5-146
> >> +     */
> >> +    for (hrchy = HMAT_LB_MEM_MEMORY;
> >> +         hrchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hrchy++) {
> >> +        for (type = HMAT_LB_DATA_ACCESS_LATENCY;
> >> +             type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
> >> +            numa_hmat_lb = nstat->hmat_lb[hrchy][type];
> >> +
> >> +            if (numa_hmat_lb) {
> >> +                build_hmat_lb(table_data, numa_hmat_lb, nstat->num_initiator,
> >> +                              nstat->num_nodes, initiator_list, type);
> >> +            }
> >> +        }
> >> +    }
> >> +
> >> +    g_free(initiator_list);
> >>   }
> >>   
> >>   void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat)
> >> diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
> >> index 0c1839cf6f..1154dfb48e 100644
> >> --- a/hw/acpi/hmat.h
> >> +++ b/hw/acpi/hmat.h
> >> @@ -40,6 +40,8 @@
> >>    */
> >>   #define HMAT_PROX_INIT_VALID 0x1
> >>   
> >> +#define HMAT_IS_LATENCY(type) (type <= HMAT_LB_DATA_WRITE_LATENCY)  
> > 
> > it's not worth to create macro for 1-off calculation, just drop it
> > and s/if (HMAT_IS_LATENCY(type))/if(type <= HMAT_LB_DATA_WRITE_LATENCY)/
> >   
> >> +
> >>   void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat);
> >>   
> >>   #endif  
> >   
> 
>
Tao Xu Oct. 12, 2019, 3:04 a.m. UTC | #4
On 10/11/2019 10:08 PM, Igor Mammedov wrote:
> On Thu, 10 Oct 2019 14:53:56 +0800
> Tao Xu <tao3.xu@intel.com> wrote:
> 
>> On 10/3/2019 10:41 PM, Igor Mammedov wrote:
>>> On Fri, 20 Sep 2019 15:43:47 +0800
>>> Tao Xu <tao3.xu@intel.com> wrote:
>>>    
>>>> From: Liu Jingqi <jingqi.liu@intel.com>
>>>>
>>>> This structure describes the memory access latency and bandwidth
>>>> information from various memory access initiator proximity domains.
>>>> The latency and bandwidth numbers represented in this structure
>>>> correspond to rated latency and bandwidth for the platform.
>>>> The software could use this information as hint for optimization.
>>>>
>>>> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
>>>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>>>> ---
>>>>
>>>> Changes in v12:
>>>>       - Fix a bug that if HMAT is enabled and without hmat-lb setting,
>>>>         QEMU will crash. (reported by Danmei Wei)
>>>>
>>>> Changes in v11:
>>>>       - Calculate base in build_hmat_lb().
>>>> ---
>>>>    hw/acpi/hmat.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>>>    hw/acpi/hmat.h |   2 +
>>>>    2 files changed, 127 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
>>>> index 1368fce7ee..e7be849581 100644
>>>> --- a/hw/acpi/hmat.c
>>>> +++ b/hw/acpi/hmat.c
>>>> @@ -27,6 +27,7 @@
>>>>    #include "qemu/osdep.h"
>>>>    #include "sysemu/numa.h"
>>>>    #include "hw/acpi/hmat.h"
>>>> +#include "qemu/error-report.h"
>>>>    
>>>>    /*
>>>>     * ACPI 6.3:
>>>> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, int initiator,
>>>>        build_append_int_noprefix(table_data, 0, 8);
>>>>    }
>>>>    
>>>> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int len)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < len; i++) {
>>>> +        if (lb_data[i] / base >= UINT16_MAX) {
>>>> +            return true;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return false;
>>>> +}
>>> I suggest to do this check at CLI parsing time
>>>    
>>>> +/*
>>>> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
>>>> + * Structure: Table 5-146
>>>> + */
>>>> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
>>>> +                          uint32_t num_initiator, uint32_t num_target,
>>>> +                          uint32_t *initiator_list, int type)
>>>> +{
>>>> +    uint8_t mask = 0x0f;
>>>> +    uint32_t s = num_initiator;
>>>> +    uint32_t t = num_target;
>>> drop this locals and use arguments directly
>>>    
>>>> +    uint64_t base = 1;
>>>> +    uint64_t *lb_data;
>>>> +    int i, unit;
>>>> +
>>>> +    /* Type */
>>>> +    build_append_int_noprefix(table_data, 1, 2);
>>>> +    /* Reserved */
>>>> +    build_append_int_noprefix(table_data, 0, 2);
>>>> +    /* Length */
>>>> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 * s * t, 4);
>>>                                                ^^^^
>>> to me above looks like /dev/random output, absolutely unreadable.
>>> Suggest to use local var (like: lb_length) for expression with comments
>>> beside magic numbers.
>>>    
>>>> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
>>>> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & mask, 1);
>>>
>>> why do you need to use mask here?
>>>    
>> Because Bits[7:4] Reserved, so I use mask to keep it reserved.
> 
> these bits are not user provided and set to 0, if they get set it's
> programming error and instead of masking problem out QEMU should abort,
> I suggest replace masking with assert(!foo>>x).
> 
>>
>>>> +    /* Data Type */
>>>> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
>>>
>>> Isn't hmat_lb->data_type and passed argument 'type' the same?
>>>    
>> Yes, I will drop 'type'.
>>>    
>>>> +    /* Reserved */
>>>> +    build_append_int_noprefix(table_data, 0, 2);
>>>> +    /* Number of Initiator Proximity Domains (s) */
>>>> +    build_append_int_noprefix(table_data, s, 4);
>>>> +    /* Number of Target Proximity Domains (t) */
>>>> +    build_append_int_noprefix(table_data, t, 4);
>>>> +    /* Reserved */
>>>> +    build_append_int_noprefix(table_data, 0, 4);
>>>> +
>>>> +    if (HMAT_IS_LATENCY(type)) {
>>>> +        unit = 1000;
>>>> +        lb_data = hmat_lb->latency;
>>>> +    } else {
>>>> +        unit = 1024;
>>>> +        lb_data = hmat_lb->bandwidth;
>>>> +    }
>>>> +
>>>> +    while (entry_overflow(lb_data, base, s * t)) {
>>>> +        for (i = 0; i < s * t; i++) {
>>>> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
>>>> +                error_report("Invalid latency/bandwidth input, all "
>>>> +                "latencies/bandwidths should be specified in the same units.");
>>>> +                exit(1);
>>>> +            }
>>>> +        }
>>>> +        base *= unit;
>>>> +    }
>>> Can you clarify what you are trying to check here?
>>>    
>> This part I use entry_overflow() to check if uint16 can store entry. If
>> can't store and the entries matrix can be divisible by unit * base, then
>> base will be unit * base.
>>
>> For example, if lb_data[i] are 1048576(1TB/s) and 1024(1GB/s), unit is
>> 1024, so 1048576 is bigger than UINT16_MAX, and can be divisible by 1024
>> * 1, so base is 1024 and entries are 1024 and 1 (see entry =
>> hmat_lb->latency[i] / base;). The benefit is even user input different
>> unit(TB/s vs GB/s), we can still store the data as far as possible.
> 
> Is it possible instead of doing multiple iterations over lb_data
> until it finds valid base, just go over lb_data once to find MIN/MAX
> and then calculate base using it. Error out with max/min offending
> values if it's not possible to compress the range into uint16_t?
> 

Although we tell user input same unit data, such as use 1GB/s 3GB/s. If 
user input data such as 1048575, 1048576(1TB/s) and 1024(1GB/s), then we 
will get 1024 * (1023 1024 1). I am wondering if it is appropriate 
because we lose a float number(0.999020). But in our codes, it will 
raise error.
Igor Mammedov Oct. 14, 2019, 9 a.m. UTC | #5
On Sat, 12 Oct 2019 11:04:03 +0800
Tao Xu <tao3.xu@intel.com> wrote:

> On 10/11/2019 10:08 PM, Igor Mammedov wrote:
> > On Thu, 10 Oct 2019 14:53:56 +0800
> > Tao Xu <tao3.xu@intel.com> wrote:
> >   
> >> On 10/3/2019 10:41 PM, Igor Mammedov wrote:  
> >>> On Fri, 20 Sep 2019 15:43:47 +0800
> >>> Tao Xu <tao3.xu@intel.com> wrote:
> >>>      
> >>>> From: Liu Jingqi <jingqi.liu@intel.com>
> >>>>
> >>>> This structure describes the memory access latency and bandwidth
> >>>> information from various memory access initiator proximity domains.
> >>>> The latency and bandwidth numbers represented in this structure
> >>>> correspond to rated latency and bandwidth for the platform.
> >>>> The software could use this information as hint for optimization.
> >>>>
> >>>> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
> >>>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
> >>>> ---
> >>>>
> >>>> Changes in v12:
> >>>>       - Fix a bug that if HMAT is enabled and without hmat-lb setting,
> >>>>         QEMU will crash. (reported by Danmei Wei)
> >>>>
> >>>> Changes in v11:
> >>>>       - Calculate base in build_hmat_lb().
> >>>> ---
> >>>>    hw/acpi/hmat.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
> >>>>    hw/acpi/hmat.h |   2 +
> >>>>    2 files changed, 127 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
> >>>> index 1368fce7ee..e7be849581 100644
> >>>> --- a/hw/acpi/hmat.c
> >>>> +++ b/hw/acpi/hmat.c
> >>>> @@ -27,6 +27,7 @@
> >>>>    #include "qemu/osdep.h"
> >>>>    #include "sysemu/numa.h"
> >>>>    #include "hw/acpi/hmat.h"
> >>>> +#include "qemu/error-report.h"
> >>>>    
> >>>>    /*
> >>>>     * ACPI 6.3:
> >>>> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, int initiator,
> >>>>        build_append_int_noprefix(table_data, 0, 8);
> >>>>    }
> >>>>    
> >>>> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int len)
> >>>> +{
> >>>> +    int i;
> >>>> +
> >>>> +    for (i = 0; i < len; i++) {
> >>>> +        if (lb_data[i] / base >= UINT16_MAX) {
> >>>> +            return true;
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>> +    return false;
> >>>> +}  
> >>> I suggest to do this check at CLI parsing time
> >>>      
> >>>> +/*
> >>>> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
> >>>> + * Structure: Table 5-146
> >>>> + */
> >>>> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
> >>>> +                          uint32_t num_initiator, uint32_t num_target,
> >>>> +                          uint32_t *initiator_list, int type)
> >>>> +{
> >>>> +    uint8_t mask = 0x0f;
> >>>> +    uint32_t s = num_initiator;
> >>>> +    uint32_t t = num_target;  
> >>> drop this locals and use arguments directly
> >>>      
> >>>> +    uint64_t base = 1;
> >>>> +    uint64_t *lb_data;
> >>>> +    int i, unit;
> >>>> +
> >>>> +    /* Type */
> >>>> +    build_append_int_noprefix(table_data, 1, 2);
> >>>> +    /* Reserved */
> >>>> +    build_append_int_noprefix(table_data, 0, 2);
> >>>> +    /* Length */
> >>>> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 * s * t, 4);  
> >>>                                                ^^^^
> >>> to me above looks like /dev/random output, absolutely unreadable.
> >>> Suggest to use local var (like: lb_length) for expression with comments
> >>> beside magic numbers.
> >>>      
> >>>> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
> >>>> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & mask, 1);  
> >>>
> >>> why do you need to use mask here?
> >>>      
> >> Because Bits[7:4] Reserved, so I use mask to keep it reserved.  
> > 
> > these bits are not user provided and set to 0, if they get set it's
> > programming error and instead of masking problem out QEMU should abort,
> > I suggest replace masking with assert(!foo>>x).
> >   
> >>  
> >>>> +    /* Data Type */
> >>>> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);  
> >>>
> >>> Isn't hmat_lb->data_type and passed argument 'type' the same?
> >>>      
> >> Yes, I will drop 'type'.  
> >>>      
> >>>> +    /* Reserved */
> >>>> +    build_append_int_noprefix(table_data, 0, 2);
> >>>> +    /* Number of Initiator Proximity Domains (s) */
> >>>> +    build_append_int_noprefix(table_data, s, 4);
> >>>> +    /* Number of Target Proximity Domains (t) */
> >>>> +    build_append_int_noprefix(table_data, t, 4);
> >>>> +    /* Reserved */
> >>>> +    build_append_int_noprefix(table_data, 0, 4);
> >>>> +
> >>>> +    if (HMAT_IS_LATENCY(type)) {
> >>>> +        unit = 1000;
> >>>> +        lb_data = hmat_lb->latency;
> >>>> +    } else {
> >>>> +        unit = 1024;
> >>>> +        lb_data = hmat_lb->bandwidth;
> >>>> +    }
> >>>> +
> >>>> +    while (entry_overflow(lb_data, base, s * t)) {
> >>>> +        for (i = 0; i < s * t; i++) {
> >>>> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
> >>>> +                error_report("Invalid latency/bandwidth input, all "
> >>>> +                "latencies/bandwidths should be specified in the same units.");
> >>>> +                exit(1);
> >>>> +            }
> >>>> +        }
> >>>> +        base *= unit;
> >>>> +    }  
> >>> Can you clarify what you are trying to check here?
> >>>      
> >> This part I use entry_overflow() to check if uint16 can store entry. If
> >> can't store and the entries matrix can be divisible by unit * base, then
> >> base will be unit * base.
> >>
> >> For example, if lb_data[i] are 1048576(1TB/s) and 1024(1GB/s), unit is
> >> 1024, so 1048576 is bigger than UINT16_MAX, and can be divisible by 1024
> >> * 1, so base is 1024 and entries are 1024 and 1 (see entry =
> >> hmat_lb->latency[i] / base;). The benefit is even user input different
> >> unit(TB/s vs GB/s), we can still store the data as far as possible.  
> > 
> > Is it possible instead of doing multiple iterations over lb_data
> > until it finds valid base, just go over lb_data once to find MIN/MAX
> > and then calculate base using it. Error out with max/min offending
> > values if it's not possible to compress the range into uint16_t?
> >   
> 
> Although we tell user input same unit data, such as use 1GB/s 3GB/s. If 
> user input data such as 1048575, 1048576(1TB/s) and 1024(1GB/s), then we 
> will get 1024 * (1023 1024 1). I am wondering if it is appropriate 
> because we lose a float number(0.999020). But in our codes, it will 
> raise error. 
I do not understand what you are trying to say here, could you rephrase
it, so the problem would be more clear, please?
Tao Xu Oct. 15, 2019, 12:59 a.m. UTC | #6
On 10/14/2019 5:00 PM, Igor Mammedov wrote:
> On Sat, 12 Oct 2019 11:04:03 +0800
> Tao Xu <tao3.xu@intel.com> wrote:
> 
>> On 10/11/2019 10:08 PM, Igor Mammedov wrote:
>>> On Thu, 10 Oct 2019 14:53:56 +0800
>>> Tao Xu <tao3.xu@intel.com> wrote:
>>>    
>>>> On 10/3/2019 10:41 PM, Igor Mammedov wrote:
>>>>> On Fri, 20 Sep 2019 15:43:47 +0800
>>>>> Tao Xu <tao3.xu@intel.com> wrote:
>>>>>       
>>>>>> From: Liu Jingqi <jingqi.liu@intel.com>
>>>>>>
>>>>>> This structure describes the memory access latency and bandwidth
>>>>>> information from various memory access initiator proximity domains.
>>>>>> The latency and bandwidth numbers represented in this structure
>>>>>> correspond to rated latency and bandwidth for the platform.
>>>>>> The software could use this information as hint for optimization.
>>>>>>
>>>>>> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
>>>>>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>>>>>> ---
>>>>>>
>>>>>> Changes in v12:
>>>>>>        - Fix a bug that if HMAT is enabled and without hmat-lb setting,
>>>>>>          QEMU will crash. (reported by Danmei Wei)
>>>>>>
>>>>>> Changes in v11:
>>>>>>        - Calculate base in build_hmat_lb().
>>>>>> ---
>>>>>>     hw/acpi/hmat.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>>>>>     hw/acpi/hmat.h |   2 +
>>>>>>     2 files changed, 127 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
>>>>>> index 1368fce7ee..e7be849581 100644
>>>>>> --- a/hw/acpi/hmat.c
>>>>>> +++ b/hw/acpi/hmat.c
>>>>>> @@ -27,6 +27,7 @@
>>>>>>     #include "qemu/osdep.h"
>>>>>>     #include "sysemu/numa.h"
>>>>>>     #include "hw/acpi/hmat.h"
>>>>>> +#include "qemu/error-report.h"
>>>>>>     
>>>>>>     /*
>>>>>>      * ACPI 6.3:
>>>>>> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray *table_data, uint16_t flags, int initiator,
>>>>>>         build_append_int_noprefix(table_data, 0, 8);
>>>>>>     }
>>>>>>     
>>>>>> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int len)
>>>>>> +{
>>>>>> +    int i;
>>>>>> +
>>>>>> +    for (i = 0; i < len; i++) {
>>>>>> +        if (lb_data[i] / base >= UINT16_MAX) {
>>>>>> +            return true;
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>> +    return false;
>>>>>> +}
>>>>> I suggest to do this check at CLI parsing time
>>>>>       
>>>>>> +/*
>>>>>> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
>>>>>> + * Structure: Table 5-146
>>>>>> + */
>>>>>> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
>>>>>> +                          uint32_t num_initiator, uint32_t num_target,
>>>>>> +                          uint32_t *initiator_list, int type)
>>>>>> +{
>>>>>> +    uint8_t mask = 0x0f;
>>>>>> +    uint32_t s = num_initiator;
>>>>>> +    uint32_t t = num_target;
>>>>> drop this locals and use arguments directly
>>>>>       
>>>>>> +    uint64_t base = 1;
>>>>>> +    uint64_t *lb_data;
>>>>>> +    int i, unit;
>>>>>> +
>>>>>> +    /* Type */
>>>>>> +    build_append_int_noprefix(table_data, 1, 2);
>>>>>> +    /* Reserved */
>>>>>> +    build_append_int_noprefix(table_data, 0, 2);
>>>>>> +    /* Length */
>>>>>> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 * s * t, 4);
>>>>>                                                 ^^^^
>>>>> to me above looks like /dev/random output, absolutely unreadable.
>>>>> Suggest to use local var (like: lb_length) for expression with comments
>>>>> beside magic numbers.
>>>>>       
>>>>>> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
>>>>>> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & mask, 1);
>>>>>
>>>>> why do you need to use mask here?
>>>>>       
>>>> Because Bits[7:4] Reserved, so I use mask to keep it reserved.
>>>
>>> these bits are not user provided and set to 0, if they get set it's
>>> programming error and instead of masking problem out QEMU should abort,
>>> I suggest replace masking with assert(!foo>>x).
>>>    
>>>>   
>>>>>> +    /* Data Type */
>>>>>> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
>>>>>
>>>>> Isn't hmat_lb->data_type and passed argument 'type' the same?
>>>>>       
>>>> Yes, I will drop 'type'.
>>>>>       
>>>>>> +    /* Reserved */
>>>>>> +    build_append_int_noprefix(table_data, 0, 2);
>>>>>> +    /* Number of Initiator Proximity Domains (s) */
>>>>>> +    build_append_int_noprefix(table_data, s, 4);
>>>>>> +    /* Number of Target Proximity Domains (t) */
>>>>>> +    build_append_int_noprefix(table_data, t, 4);
>>>>>> +    /* Reserved */
>>>>>> +    build_append_int_noprefix(table_data, 0, 4);
>>>>>> +
>>>>>> +    if (HMAT_IS_LATENCY(type)) {
>>>>>> +        unit = 1000;
>>>>>> +        lb_data = hmat_lb->latency;
>>>>>> +    } else {
>>>>>> +        unit = 1024;
>>>>>> +        lb_data = hmat_lb->bandwidth;
>>>>>> +    }
>>>>>> +
>>>>>> +    while (entry_overflow(lb_data, base, s * t)) {
>>>>>> +        for (i = 0; i < s * t; i++) {
>>>>>> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
>>>>>> +                error_report("Invalid latency/bandwidth input, all "
>>>>>> +                "latencies/bandwidths should be specified in the same units.");
>>>>>> +                exit(1);
>>>>>> +            }
>>>>>> +        }
>>>>>> +        base *= unit;
>>>>>> +    }
>>>>> Can you clarify what you are trying to check here?
>>>>>       
>>>> This part I use entry_overflow() to check if uint16 can store entry. If
>>>> can't store and the entries matrix can be divisible by unit * base, then
>>>> base will be unit * base.
>>>>
>>>> For example, if lb_data[i] are 1048576(1TB/s) and 1024(1GB/s), unit is
>>>> 1024, so 1048576 is bigger than UINT16_MAX, and can be divisible by 1024
>>>> * 1, so base is 1024 and entries are 1024 and 1 (see entry =
>>>> hmat_lb->latency[i] / base;). The benefit is even user input different
>>>> unit(TB/s vs GB/s), we can still store the data as far as possible.
>>>
>>> Is it possible instead of doing multiple iterations over lb_data
>>> until it finds valid base, just go over lb_data once to find MIN/MAX
>>> and then calculate base using it. Error out with max/min offending
>>> values if it's not possible to compress the range into uint16_t?
>>>    
>>
>> Although we tell user input same unit data, such as use 1GB/s 3GB/s. If
>> user input data such as 1048575, 1048576(1TB/s) and 1024(1GB/s), then we
>> will get 1024 * (1023 1024 1). I am wondering if it is appropriate
>> because we lose a float number(0.999020). But in our codes, it will
>> raise error.
> I do not understand what you are trying to say here, could you rephrase
> it, so the problem would be more clear, please?
> 
Sorry, I mean how we treat the data cannot be divisible if we use 
max/min as base. For another example, If user input the data(including 3 
bandwidths) : 9GB/s 5GB/s 3GB/s. Then max/min result is 3. But entries 
should be uint16, (5GB/s)/3 we can only get 1GB/s, then we should raise 
error(overflow).
But if this patch, we will get the base is 1GB/s.
Tao Xu Oct. 15, 2019, 5:40 a.m. UTC | #7
On 10/15/2019 8:59 AM, Tao Xu wrote:
> On 10/14/2019 5:00 PM, Igor Mammedov wrote:
>> On Sat, 12 Oct 2019 11:04:03 +0800
>> Tao Xu <tao3.xu@intel.com> wrote:
>>
>>> On 10/11/2019 10:08 PM, Igor Mammedov wrote:
>>>> On Thu, 10 Oct 2019 14:53:56 +0800
>>>> Tao Xu <tao3.xu@intel.com> wrote:
>>>>> On 10/3/2019 10:41 PM, Igor Mammedov wrote:
>>>>>> On Fri, 20 Sep 2019 15:43:47 +0800
>>>>>> Tao Xu <tao3.xu@intel.com> wrote:
>>>>>>> From: Liu Jingqi <jingqi.liu@intel.com>
>>>>>>>
>>>>>>> This structure describes the memory access latency and bandwidth
>>>>>>> information from various memory access initiator proximity domains.
>>>>>>> The latency and bandwidth numbers represented in this structure
>>>>>>> correspond to rated latency and bandwidth for the platform.
>>>>>>> The software could use this information as hint for optimization.
>>>>>>>
>>>>>>> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
>>>>>>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>>>>>>> ---
>>>>>>>
>>>>>>> Changes in v12:
>>>>>>>        - Fix a bug that if HMAT is enabled and without hmat-lb 
>>>>>>> setting,
>>>>>>>          QEMU will crash. (reported by Danmei Wei)
>>>>>>>
>>>>>>> Changes in v11:
>>>>>>>        - Calculate base in build_hmat_lb().
>>>>>>> ---
>>>>>>>     hw/acpi/hmat.c | 126 
>>>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++-
>>>>>>>     hw/acpi/hmat.h |   2 +
>>>>>>>     2 files changed, 127 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
>>>>>>> index 1368fce7ee..e7be849581 100644
>>>>>>> --- a/hw/acpi/hmat.c
>>>>>>> +++ b/hw/acpi/hmat.c
>>>>>>> @@ -27,6 +27,7 @@
>>>>>>>     #include "qemu/osdep.h"
>>>>>>>     #include "sysemu/numa.h"
>>>>>>>     #include "hw/acpi/hmat.h"
>>>>>>> +#include "qemu/error-report.h"
>>>>>>>     /*
>>>>>>>      * ACPI 6.3:
>>>>>>> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray 
>>>>>>> *table_data, uint16_t flags, int initiator,
>>>>>>>         build_append_int_noprefix(table_data, 0, 8);
>>>>>>>     }
>>>>>>> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int 
>>>>>>> len)
>>>>>>> +{
>>>>>>> +    int i;
>>>>>>> +
>>>>>>> +    for (i = 0; i < len; i++) {
>>>>>>> +        if (lb_data[i] / base >= UINT16_MAX) {
>>>>>>> +            return true;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return false;
>>>>>>> +}
>>>>>> I suggest to do this check at CLI parsing time
>>>>>>> +/*
>>>>>>> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth 
>>>>>>> Information
>>>>>>> + * Structure: Table 5-146
>>>>>>> + */
>>>>>>> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info 
>>>>>>> *hmat_lb,
>>>>>>> +                          uint32_t num_initiator, uint32_t 
>>>>>>> num_target,
>>>>>>> +                          uint32_t *initiator_list, int type)
>>>>>>> +{
>>>>>>> +    uint8_t mask = 0x0f;
>>>>>>> +    uint32_t s = num_initiator;
>>>>>>> +    uint32_t t = num_target;
>>>>>> drop this locals and use arguments directly
>>>>>>> +    uint64_t base = 1;
>>>>>>> +    uint64_t *lb_data;
>>>>>>> +    int i, unit;
>>>>>>> +
>>>>>>> +    /* Type */
>>>>>>> +    build_append_int_noprefix(table_data, 1, 2);
>>>>>>> +    /* Reserved */
>>>>>>> +    build_append_int_noprefix(table_data, 0, 2);
>>>>>>> +    /* Length */
>>>>>>> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 
>>>>>>> * s * t, 4);
>>>>>>                                                 ^^^^
>>>>>> to me above looks like /dev/random output, absolutely unreadable.
>>>>>> Suggest to use local var (like: lb_length) for expression with 
>>>>>> comments
>>>>>> beside magic numbers.
>>>>>>> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
>>>>>>> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & 
>>>>>>> mask, 1);
>>>>>>
>>>>>> why do you need to use mask here?
>>>>> Because Bits[7:4] Reserved, so I use mask to keep it reserved.
>>>>
>>>> these bits are not user provided and set to 0, if they get set it's
>>>> programming error and instead of masking problem out QEMU should abort,
>>>> I suggest replace masking with assert(!foo>>x).
>>>>>>> +    /* Data Type */
>>>>>>> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
>>>>>>
>>>>>> Isn't hmat_lb->data_type and passed argument 'type' the same?
>>>>> Yes, I will drop 'type'.
>>>>>>> +    /* Reserved */
>>>>>>> +    build_append_int_noprefix(table_data, 0, 2);
>>>>>>> +    /* Number of Initiator Proximity Domains (s) */
>>>>>>> +    build_append_int_noprefix(table_data, s, 4);
>>>>>>> +    /* Number of Target Proximity Domains (t) */
>>>>>>> +    build_append_int_noprefix(table_data, t, 4);
>>>>>>> +    /* Reserved */
>>>>>>> +    build_append_int_noprefix(table_data, 0, 4);
>>>>>>> +
>>>>>>> +    if (HMAT_IS_LATENCY(type)) {
>>>>>>> +        unit = 1000;
>>>>>>> +        lb_data = hmat_lb->latency;
>>>>>>> +    } else {
>>>>>>> +        unit = 1024;
>>>>>>> +        lb_data = hmat_lb->bandwidth;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    while (entry_overflow(lb_data, base, s * t)) {
>>>>>>> +        for (i = 0; i < s * t; i++) {
>>>>>>> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
>>>>>>> +                error_report("Invalid latency/bandwidth input, 
>>>>>>> all "
>>>>>>> +                "latencies/bandwidths should be specified in the 
>>>>>>> same units.");
>>>>>>> +                exit(1);
>>>>>>> +            }
>>>>>>> +        }
>>>>>>> +        base *= unit;
>>>>>>> +    }
>>>>>> Can you clarify what you are trying to check here?
>>>>> This part I use entry_overflow() to check if uint16 can store 
>>>>> entry. If
>>>>> can't store and the entries matrix can be divisible by unit * base, 
>>>>> then
>>>>> base will be unit * base.
>>>>>
>>>>> For example, if lb_data[i] are 1048576(1TB/s) and 1024(1GB/s), unit is
>>>>> 1024, so 1048576 is bigger than UINT16_MAX, and can be divisible by 
>>>>> 1024
>>>>> * 1, so base is 1024 and entries are 1024 and 1 (see entry =
>>>>> hmat_lb->latency[i] / base;). The benefit is even user input different
>>>>> unit(TB/s vs GB/s), we can still store the data as far as possible.
>>>>
>>>> Is it possible instead of doing multiple iterations over lb_data
>>>> until it finds valid base, just go over lb_data once to find MIN/MAX
>>>> and then calculate base using it. Error out with max/min offending
>>>> values if it's not possible to compress the range into uint16_t?
>>>
>>> Although we tell user input same unit data, such as use 1GB/s 3GB/s. If
>>> user input data such as 1048575, 1048576(1TB/s) and 1024(1GB/s), then we
>>> will get 1024 * (1023 1024 1). I am wondering if it is appropriate
>>> because we lose a float number(0.999020). But in our codes, it will
>>> raise error.
>> I do not understand what you are trying to say here, could you rephrase
>> it, so the problem would be more clear, please?
>>
> Sorry, I mean how we treat the data cannot be divisible if we use 
> max/min as base. For another example, If user input the data(including 3 
> bandwidths) : 9GB/s 5GB/s 3GB/s. Then max/min result is 3. But entries 
> should be uint16, (5GB/s)/3 we can only get 1GB/s, then we should raise 
> error(overflow).
> But if this patch, we will get the base is 1GB/s.
I understand the MIN/MAX means, in the case above, we get MAX is 9GB/s, 
MIN is 3GB/s, then I use code below to calculate :

     while (max_data >= UINT16_MAX) {
         if (!QEMU_IS_ALIGNED(max_data, unit * base) ||
             !QEMU_IS_ALIGNED(min_data, unit * base) {
                 error_report("Invalid latency/bandwidth input.");
                 exit(1);
         }
         base *= unit;
     }
Igor Mammedov Oct. 17, 2019, 2:17 p.m. UTC | #8
On Tue, 15 Oct 2019 13:40:54 +0800
Tao Xu <tao3.xu@intel.com> wrote:

> On 10/15/2019 8:59 AM, Tao Xu wrote:
> > On 10/14/2019 5:00 PM, Igor Mammedov wrote:  
> >> On Sat, 12 Oct 2019 11:04:03 +0800
> >> Tao Xu <tao3.xu@intel.com> wrote:
> >>  
> >>> On 10/11/2019 10:08 PM, Igor Mammedov wrote:  
> >>>> On Thu, 10 Oct 2019 14:53:56 +0800
> >>>> Tao Xu <tao3.xu@intel.com> wrote:  
> >>>>> On 10/3/2019 10:41 PM, Igor Mammedov wrote:  
> >>>>>> On Fri, 20 Sep 2019 15:43:47 +0800
> >>>>>> Tao Xu <tao3.xu@intel.com> wrote:  
> >>>>>>> From: Liu Jingqi <jingqi.liu@intel.com>
> >>>>>>>
> >>>>>>> This structure describes the memory access latency and bandwidth
> >>>>>>> information from various memory access initiator proximity domains.
> >>>>>>> The latency and bandwidth numbers represented in this structure
> >>>>>>> correspond to rated latency and bandwidth for the platform.
> >>>>>>> The software could use this information as hint for optimization.
> >>>>>>>
> >>>>>>> Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
> >>>>>>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
> >>>>>>> ---
> >>>>>>>
> >>>>>>> Changes in v12:
> >>>>>>>        - Fix a bug that if HMAT is enabled and without hmat-lb 
> >>>>>>> setting,
> >>>>>>>          QEMU will crash. (reported by Danmei Wei)
> >>>>>>>
> >>>>>>> Changes in v11:
> >>>>>>>        - Calculate base in build_hmat_lb().
> >>>>>>> ---
> >>>>>>>     hw/acpi/hmat.c | 126 
> >>>>>>> ++++++++++++++++++++++++++++++++++++++++++++++++-
> >>>>>>>     hw/acpi/hmat.h |   2 +
> >>>>>>>     2 files changed, 127 insertions(+), 1 deletion(-)
> >>>>>>>
> >>>>>>> diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
> >>>>>>> index 1368fce7ee..e7be849581 100644
> >>>>>>> --- a/hw/acpi/hmat.c
> >>>>>>> +++ b/hw/acpi/hmat.c
> >>>>>>> @@ -27,6 +27,7 @@
> >>>>>>>     #include "qemu/osdep.h"
> >>>>>>>     #include "sysemu/numa.h"
> >>>>>>>     #include "hw/acpi/hmat.h"
> >>>>>>> +#include "qemu/error-report.h"
> >>>>>>>     /*
> >>>>>>>      * ACPI 6.3:
> >>>>>>> @@ -67,11 +68,105 @@ static void build_hmat_mpda(GArray 
> >>>>>>> *table_data, uint16_t flags, int initiator,
> >>>>>>>         build_append_int_noprefix(table_data, 0, 8);
> >>>>>>>     }
> >>>>>>> +static bool entry_overflow(uint64_t *lb_data, uint64_t base, int 
> >>>>>>> len)
> >>>>>>> +{
> >>>>>>> +    int i;
> >>>>>>> +
> >>>>>>> +    for (i = 0; i < len; i++) {
> >>>>>>> +        if (lb_data[i] / base >= UINT16_MAX) {
> >>>>>>> +            return true;
> >>>>>>> +        }
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    return false;
> >>>>>>> +}  
> >>>>>> I suggest to do this check at CLI parsing time  
> >>>>>>> +/*
> >>>>>>> + * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth 
> >>>>>>> Information
> >>>>>>> + * Structure: Table 5-146
> >>>>>>> + */
> >>>>>>> +static void build_hmat_lb(GArray *table_data, HMAT_LB_Info 
> >>>>>>> *hmat_lb,
> >>>>>>> +                          uint32_t num_initiator, uint32_t 
> >>>>>>> num_target,
> >>>>>>> +                          uint32_t *initiator_list, int type)
> >>>>>>> +{
> >>>>>>> +    uint8_t mask = 0x0f;
> >>>>>>> +    uint32_t s = num_initiator;
> >>>>>>> +    uint32_t t = num_target;  
> >>>>>> drop this locals and use arguments directly  
> >>>>>>> +    uint64_t base = 1;
> >>>>>>> +    uint64_t *lb_data;
> >>>>>>> +    int i, unit;
> >>>>>>> +
> >>>>>>> +    /* Type */
> >>>>>>> +    build_append_int_noprefix(table_data, 1, 2);
> >>>>>>> +    /* Reserved */
> >>>>>>> +    build_append_int_noprefix(table_data, 0, 2);
> >>>>>>> +    /* Length */
> >>>>>>> +    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 
> >>>>>>> * s * t, 4);  
> >>>>>>                                                 ^^^^
> >>>>>> to me above looks like /dev/random output, absolutely unreadable.
> >>>>>> Suggest to use local var (like: lb_length) for expression with 
> >>>>>> comments
> >>>>>> beside magic numbers.  
> >>>>>>> +    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
> >>>>>>> +    build_append_int_noprefix(table_data, hmat_lb->hierarchy & 
> >>>>>>> mask, 1);  
> >>>>>>
> >>>>>> why do you need to use mask here?  
> >>>>> Because Bits[7:4] Reserved, so I use mask to keep it reserved.  
> >>>>
> >>>> these bits are not user provided and set to 0, if they get set it's
> >>>> programming error and instead of masking problem out QEMU should abort,
> >>>> I suggest replace masking with assert(!foo>>x).  
> >>>>>>> +    /* Data Type */
> >>>>>>> +    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);  
> >>>>>>
> >>>>>> Isn't hmat_lb->data_type and passed argument 'type' the same?  
> >>>>> Yes, I will drop 'type'.  
> >>>>>>> +    /* Reserved */
> >>>>>>> +    build_append_int_noprefix(table_data, 0, 2);
> >>>>>>> +    /* Number of Initiator Proximity Domains (s) */
> >>>>>>> +    build_append_int_noprefix(table_data, s, 4);
> >>>>>>> +    /* Number of Target Proximity Domains (t) */
> >>>>>>> +    build_append_int_noprefix(table_data, t, 4);
> >>>>>>> +    /* Reserved */
> >>>>>>> +    build_append_int_noprefix(table_data, 0, 4);
> >>>>>>> +
> >>>>>>> +    if (HMAT_IS_LATENCY(type)) {
> >>>>>>> +        unit = 1000;
> >>>>>>> +        lb_data = hmat_lb->latency;
> >>>>>>> +    } else {
> >>>>>>> +        unit = 1024;
> >>>>>>> +        lb_data = hmat_lb->bandwidth;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    while (entry_overflow(lb_data, base, s * t)) {
> >>>>>>> +        for (i = 0; i < s * t; i++) {
> >>>>>>> +            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
> >>>>>>> +                error_report("Invalid latency/bandwidth input, 
> >>>>>>> all "
> >>>>>>> +                "latencies/bandwidths should be specified in the 
> >>>>>>> same units.");
> >>>>>>> +                exit(1);
> >>>>>>> +            }
> >>>>>>> +        }
> >>>>>>> +        base *= unit;
> >>>>>>> +    }  
> >>>>>> Can you clarify what you are trying to check here?  
> >>>>> This part I use entry_overflow() to check if uint16 can store 
> >>>>> entry. If
> >>>>> can't store and the entries matrix can be divisible by unit * base, 
> >>>>> then
> >>>>> base will be unit * base.
> >>>>>
> >>>>> For example, if lb_data[i] are 1048576(1TB/s) and 1024(1GB/s), unit is
> >>>>> 1024, so 1048576 is bigger than UINT16_MAX, and can be divisible by 
> >>>>> 1024
> >>>>> * 1, so base is 1024 and entries are 1024 and 1 (see entry =
> >>>>> hmat_lb->latency[i] / base;). The benefit is even user input different
> >>>>> unit(TB/s vs GB/s), we can still store the data as far as possible.  
> >>>>
> >>>> Is it possible instead of doing multiple iterations over lb_data
> >>>> until it finds valid base, just go over lb_data once to find MIN/MAX
> >>>> and then calculate base using it. Error out with max/min offending
> >>>> values if it's not possible to compress the range into uint16_t?  
> >>>
> >>> Although we tell user input same unit data, such as use 1GB/s 3GB/s. If
> >>> user input data such as 1048575, 1048576(1TB/s) and 1024(1GB/s), then we
> >>> will get 1024 * (1023 1024 1). I am wondering if it is appropriate
> >>> because we lose a float number(0.999020). But in our codes, it will
> >>> raise error.  
> >> I do not understand what you are trying to say here, could you rephrase
> >> it, so the problem would be more clear, please?
> >>  
> > Sorry, I mean how we treat the data cannot be divisible if we use 
> > max/min as base. For another example, If user input the data(including 3 
> > bandwidths) : 9GB/s 5GB/s 3GB/s. Then max/min result is 3. But entries 
> > should be uint16, (5GB/s)/3 we can only get 1GB/s, then we should raise 
> > error(overflow).
> > But if this patch, we will get the base is 1GB/s.  
> I understand the MIN/MAX means, in the case above, we get MAX is 9GB/s, 
> MIN is 3GB/s, then I use code below to calculate :
> 
>      while (max_data >= UINT16_MAX) {
>          if (!QEMU_IS_ALIGNED(max_data, unit * base) ||
>              !QEMU_IS_ALIGNED(min_data, unit * base) {
>                  error_report("Invalid latency/bandwidth input.");
>                  exit(1);
>          }
>          base *= unit;
>      }
this check won't cover, entries in between min and max.
Maybe using range bitmap the time of parsing bandwidth/latency CLI option
would work:

   parse_numa_hmat_lb(...) {
      ...
      if (bw && !ALIGNED(value, 1MB))
          error fatal("should be 1MB aligned")

      sub_table->range_bitmap |= value;

      last_bit = find_last_bit(sub_table->range_bitmap)
      first_bit = find_first_bit(sub_table->range_bitmap)
      if ((last_bit - first_bit) > UINT16_BITS)
          error_fatal("value (%d) should not differ from
                      previously entered values on more that UNINT16_MAX")

      sub_table->base = bit_2_base(first_bit)
      sub_table[x] = value
      ...
   }

it should
  1: error out at the first option which value deviates too
     much from previously parsed options for sub-table
  2: recalculate 'base' value for sub-table
diff mbox series

Patch

diff --git a/hw/acpi/hmat.c b/hw/acpi/hmat.c
index 1368fce7ee..e7be849581 100644
--- a/hw/acpi/hmat.c
+++ b/hw/acpi/hmat.c
@@ -27,6 +27,7 @@ 
 #include "qemu/osdep.h"
 #include "sysemu/numa.h"
 #include "hw/acpi/hmat.h"
+#include "qemu/error-report.h"
 
 /*
  * ACPI 6.3:
@@ -67,11 +68,105 @@  static void build_hmat_mpda(GArray *table_data, uint16_t flags, int initiator,
     build_append_int_noprefix(table_data, 0, 8);
 }
 
+static bool entry_overflow(uint64_t *lb_data, uint64_t base, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++) {
+        if (lb_data[i] / base >= UINT16_MAX) {
+            return true;
+        }
+    }
+
+    return false;
+}
+/*
+ * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+ * Structure: Table 5-146
+ */
+static void build_hmat_lb(GArray *table_data, HMAT_LB_Info *hmat_lb,
+                          uint32_t num_initiator, uint32_t num_target,
+                          uint32_t *initiator_list, int type)
+{
+    uint8_t mask = 0x0f;
+    uint32_t s = num_initiator;
+    uint32_t t = num_target;
+    uint64_t base = 1;
+    uint64_t *lb_data;
+    int i, unit;
+
+    /* Type */
+    build_append_int_noprefix(table_data, 1, 2);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* Length */
+    build_append_int_noprefix(table_data, 32 + 4 * s + 4 * t + 2 * s * t, 4);
+    /* Flags: Bits [3:0] Memory Hierarchy, Bits[7:4] Reserved */
+    build_append_int_noprefix(table_data, hmat_lb->hierarchy & mask, 1);
+    /* Data Type */
+    build_append_int_noprefix(table_data, hmat_lb->data_type, 1);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* Number of Initiator Proximity Domains (s) */
+    build_append_int_noprefix(table_data, s, 4);
+    /* Number of Target Proximity Domains (t) */
+    build_append_int_noprefix(table_data, t, 4);
+    /* Reserved */
+    build_append_int_noprefix(table_data, 0, 4);
+
+    if (HMAT_IS_LATENCY(type)) {
+        unit = 1000;
+        lb_data = hmat_lb->latency;
+    } else {
+        unit = 1024;
+        lb_data = hmat_lb->bandwidth;
+    }
+
+    while (entry_overflow(lb_data, base, s * t)) {
+        for (i = 0; i < s * t; i++) {
+            if (!QEMU_IS_ALIGNED(lb_data[i], unit * base)) {
+                error_report("Invalid latency/bandwidth input, all "
+                "latencies/bandwidths should be specified in the same units.");
+                exit(1);
+            }
+        }
+        base *= unit;
+    }
+
+    /* Entry Base Unit */
+    build_append_int_noprefix(table_data, base, 8);
+
+    /* Initiator Proximity Domain List */
+    for (i = 0; i < s; i++) {
+        build_append_int_noprefix(table_data, initiator_list[i], 4);
+    }
+
+    /* Target Proximity Domain List */
+    for (i = 0; i < t; i++) {
+        build_append_int_noprefix(table_data, i, 4);
+    }
+
+    /* Latency or Bandwidth Entries */
+    for (i = 0; i < s * t; i++) {
+        uint16_t entry;
+
+        if (HMAT_IS_LATENCY(type)) {
+            entry = hmat_lb->latency[i] / base;
+        } else {
+            entry = hmat_lb->bandwidth[i] / base;
+        }
+
+        build_append_int_noprefix(table_data, entry, 2);
+    }
+}
+
 /* Build HMAT sub table structures */
 static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
 {
     uint16_t flags;
-    int i;
+    uint32_t *initiator_list = NULL;
+    int i, j, hrchy, type;
+    HMAT_LB_Info *numa_hmat_lb;
 
     for (i = 0; i < nstat->num_nodes; i++) {
         flags = 0;
@@ -82,6 +177,35 @@  static void hmat_build_table_structs(GArray *table_data, NumaState *nstat)
 
         build_hmat_mpda(table_data, flags, nstat->nodes[i].initiator, i);
     }
+
+    if (nstat->num_initiator) {
+        initiator_list = g_malloc0(nstat->num_initiator * sizeof(uint32_t));
+        for (i = 0, j = 0; i < nstat->num_nodes; i++) {
+            if (nstat->nodes[i].has_cpu) {
+                initiator_list[j] = i;
+                j++;
+            }
+        }
+    }
+
+    /*
+     * ACPI 6.3: 5.2.27.4 System Locality Latency and Bandwidth Information
+     * Structure: Table 5-146
+     */
+    for (hrchy = HMAT_LB_MEM_MEMORY;
+         hrchy <= HMAT_LB_MEM_CACHE_3RD_LEVEL; hrchy++) {
+        for (type = HMAT_LB_DATA_ACCESS_LATENCY;
+             type <= HMAT_LB_DATA_WRITE_BANDWIDTH; type++) {
+            numa_hmat_lb = nstat->hmat_lb[hrchy][type];
+
+            if (numa_hmat_lb) {
+                build_hmat_lb(table_data, numa_hmat_lb, nstat->num_initiator,
+                              nstat->num_nodes, initiator_list, type);
+            }
+        }
+    }
+
+    g_free(initiator_list);
 }
 
 void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat)
diff --git a/hw/acpi/hmat.h b/hw/acpi/hmat.h
index 0c1839cf6f..1154dfb48e 100644
--- a/hw/acpi/hmat.h
+++ b/hw/acpi/hmat.h
@@ -40,6 +40,8 @@ 
  */
 #define HMAT_PROX_INIT_VALID 0x1
 
+#define HMAT_IS_LATENCY(type) (type <= HMAT_LB_DATA_WRITE_LATENCY)
+
 void build_hmat(GArray *table_data, BIOSLinker *linker, NumaState *nstat);
 
 #endif