diff mbox series

[v9,3/7] i386: Populate AMD Processor Cache Information for cpuid 0x8000001D

Message ID 20180514164156.27034-4-babu.moger@amd.com
State New
Headers show
Series i386: Enable TOPOEXT to support hyperthreading on AMD CPU | expand

Commit Message

Moger, Babu May 14, 2018, 4:41 p.m. UTC
Add information for cpuid 0x8000001D leaf. Populate cache topology information
for different cache types(Data Cache, Instruction Cache, L2 and L3) supported
by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) for AMD
Family 17h Model for more details.

Signed-off-by: Babu Moger <babu.moger@amd.com>
Tested-by: Geoffrey McRae <geoff@hostfission.com>
---
 target/i386/cpu.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++
 target/i386/kvm.c | 29 +++++++++++++++--
 2 files changed, 105 insertions(+), 3 deletions(-)

Comments

Eduardo Habkost May 14, 2018, 7:47 p.m. UTC | #1
On Mon, May 14, 2018 at 11:41:52AM -0500, Babu Moger wrote:
> Add information for cpuid 0x8000001D leaf. Populate cache topology information
> for different cache types(Data Cache, Instruction Cache, L2 and L3) supported
> by 0x8000001D leaf. Please refer Processor Programming Reference (PPR) for AMD
> Family 17h Model for more details.
> 
> Signed-off-by: Babu Moger <babu.moger@amd.com>
> Tested-by: Geoffrey McRae <geoff@hostfission.com>
> ---
>  target/i386/cpu.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm.c | 29 +++++++++++++++--
>  2 files changed, 105 insertions(+), 3 deletions(-)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index e1daff37ab..7f40241786 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -307,6 +307,14 @@ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache)
>                            a == ASSOC_FULL ? 0xF : \
>                            0 /* invalid value */)
>  
> +/* Definitions used on CPUID Leaf 0x8000001D */
> +/* Number of logical cores in a complex */
> +#define CORES_IN_CMPLX  4

Number of cores is configurable in QEMU, so we can't hardcode
this.

I understand you want to make it match the hardware as close as
possible (as you noted in your reply on v7), but this should be
done by simply configuring QEMU as closely to the hardware as
possible.


> +/* Number of logical processors sharing cache */
> +#define NUM_SHARING_CACHE(threads)   ((threads > 1) ? \
> +                         (((CORES_IN_CMPLX - 1) * threads) + 1)  : \
> +                         (CORES_IN_CMPLX - 1))

I don't see why the check for threads > 1, here.  Why not simply
write this as:

  ((nr_cores * nr_threads) - 1))

which will work for any cores/threads value?

(Or the function could just get nr_logical_cpus argument like I
suggested on v7, to make the code here simpler.)


> +
>  /*
>   * Encode cache info for CPUID[0x80000006].ECX and CPUID[0x80000006].EDX
>   * @l3 can be NULL.
> @@ -336,6 +344,41 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2,
>      }
>  }
>  
> +/* Encode cache info for CPUID[8000001D] */
> +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int nr_threads,
> +                                uint32_t *eax, uint32_t *ebx,
> +                                uint32_t *ecx, uint32_t *edx)
> +{
> +    assert(cache->size == cache->line_size * cache->associativity *
> +                          cache->partitions * cache->sets);
> +
> +    *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
> +               (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
> +
> +    /* L3 is shared among multiple cores */
> +    if (cache->level == 3) {
> +        *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
> +    } else {
> +        *eax |= ((nr_threads - 1) << 14);
> +    }
> +
> +    assert(cache->line_size > 0);
> +    assert(cache->partitions > 0);
> +    assert(cache->associativity > 0);
> +    /* We don't implement fully-associative caches */
> +    assert(cache->associativity < cache->sets);
> +    *ebx = (cache->line_size - 1) |
> +           ((cache->partitions - 1) << 12) |
> +           ((cache->associativity - 1) << 22);
> +
> +    assert(cache->sets > 0);
> +    *ecx = cache->sets - 1;
> +
> +    *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
> +           (cache->inclusive ? CACHE_INCLUSIVE : 0) |
> +           (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
> +}
> +
>  /*
>   * Definitions of the hardcoded cache entries we expose:
>   * These are legacy cache values. If there is a need to change any
> @@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>              *edx = 0;
>          }
>          break;
> +    case 0x8000001D:
> +        *eax = 0;
> +        CPUCacheInfo *l1d, *l1i, *l2, *l3;
> +        if (env->cache_info && !cpu->legacy_cache) {
> +            l1d = &env->cache_info->l1d_cache;
> +            l1i = &env->cache_info->l1i_cache;
> +            l2 = &env->cache_info->l2_cache;
> +            l3 = &env->cache_info->l3_cache;
> +        } else {
> +            l1d = &legacy_l1d_cache_amd;
> +            l1i = &legacy_l1i_cache_amd;
> +            l2 = &legacy_l2_cache_amd;
> +            l3 = &legacy_l3_cache;
> +        }
> +        switch (count) {
> +        case 0: /* L1 dcache info */
> +            encode_cache_cpuid8000001d(l1d, cs->nr_threads,
> +                                       eax, ebx, ecx, edx);
> +            break;
> +        case 1: /* L1 icache info */
> +            encode_cache_cpuid8000001d(l1i, cs->nr_threads,
> +                                       eax, ebx, ecx, edx);
> +            break;
> +        case 2: /* L2 cache info */
> +            encode_cache_cpuid8000001d(l2, cs->nr_threads,
> +                                       eax, ebx, ecx, edx);
> +            break;
> +        case 3: /* L3 cache info */
> +            encode_cache_cpuid8000001d(l3, cs->nr_threads,
> +                                       eax, ebx, ecx, edx);
> +            break;
> +        default: /* end of info */
> +            *eax = *ebx = *ecx = *edx = 0;
> +            break;
> +        }
> +        break;
>      case 0xC0000000:
>          *eax = env->cpuid_xlevel2;
>          *ebx = 0;
> diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> index 6c49954e68..6e66f9c51d 100644
> --- a/target/i386/kvm.c
> +++ b/target/i386/kvm.c
> @@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
>          }
>          c = &cpuid_data.entries[cpuid_i++];
>  
> -        c->function = i;
> -        c->flags = 0;
> -        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> +        switch (i) {
> +        case 0x8000001d:
> +            /* Query for all AMD cache information leaves */
> +            for (j = 0; ; j++) {
> +                c->function = i;
> +                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
> +                c->index = j;
> +                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
> +
> +                if (c->eax == 0) {
> +                    break;
> +                }
> +                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
> +                    fprintf(stderr, "cpuid_data is full, no space for "
> +                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
> +                    abort();
> +                }
> +                c = &cpuid_data.entries[cpuid_i++];
> +            }
> +            break;
> +        default:
> +            c->function = i;
> +            c->flags = 0;
> +            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> +            break;
> +        }
>      }
>  
>      /* Call Centaur's CPUID instructions they are supported. */
> -- 
> 2.17.0
>
Moger, Babu May 14, 2018, 11:49 p.m. UTC | #2
> -----Original Message-----
> From: Eduardo Habkost [mailto:ehabkost@redhat.com]
> Sent: Monday, May 14, 2018 2:47 PM
> To: Moger, Babu <Babu.Moger@amd.com>
> Cc: mst@redhat.com; marcel.apfelbaum@gmail.com; pbonzini@redhat.com;
> rth@twiddle.net; mtosatti@redhat.com; qemu-devel@nongnu.org;
> kvm@vger.kernel.org; geoff@hostfission.com; kash@tripleback.net
> Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache Information
> for cpuid 0x8000001D
> 
> On Mon, May 14, 2018 at 11:41:52AM -0500, Babu Moger wrote:
> > Add information for cpuid 0x8000001D leaf. Populate cache topology
> information
> > for different cache types(Data Cache, Instruction Cache, L2 and L3)
> supported
> > by 0x8000001D leaf. Please refer Processor Programming Reference (PPR)
> for AMD
> > Family 17h Model for more details.
> >
> > Signed-off-by: Babu Moger <babu.moger@amd.com>
> > Tested-by: Geoffrey McRae <geoff@hostfission.com>
> > ---
> >  target/i386/cpu.c | 79
> +++++++++++++++++++++++++++++++++++++++++++++++
> >  target/i386/kvm.c | 29 +++++++++++++++--
> >  2 files changed, 105 insertions(+), 3 deletions(-)
> >
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index e1daff37ab..7f40241786 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -307,6 +307,14 @@ static uint32_t
> encode_cache_cpuid80000005(CPUCacheInfo *cache)
> >                            a == ASSOC_FULL ? 0xF : \
> >                            0 /* invalid value */)
> >
> > +/* Definitions used on CPUID Leaf 0x8000001D */
> > +/* Number of logical cores in a complex */
> > +#define CORES_IN_CMPLX  4
> 
> Number of cores is configurable in QEMU, so we can't hardcode
> this.

In EPYC architecture, in a single die we have 2 core complexes.
 Each core complex has 4 cores at max(CORES_IN_CMPLX).
Without SMT(thread=1), L3 is shared between 4(4x1) cores.
   NUM_SHARING_CACHE should be 3.
With SMT(thread=2), L3 is shared between 8(4x2) cores.
  NUM_SHARING_CACHE should be 7.
This is what we are trying to achieve here. This is a fixed h/w configuration.

> 
> I understand you want to make it match the hardware as close as
> possible (as you noted in your reply on v7), but this should be
> done by simply configuring QEMU as closely to the hardware as
> possible.
> 
> 
> > +/* Number of logical processors sharing cache */
> > +#define NUM_SHARING_CACHE(threads)   ((threads > 1) ? \
> > +                         (((CORES_IN_CMPLX - 1) * threads) + 1)  : \
> > +                         (CORES_IN_CMPLX - 1))
> 
> I don't see why the check for threads > 1, here.  Why not simply
> write this as:
> 
>   ((nr_cores * nr_threads) - 1))
> 
> which will work for any cores/threads value?

We cannot achieve the above numbers if we use this logic.
For example.. with nr_cores = 8, nr_threads=2. 
  This will report (8x2)-1=15 which is not what we want.

> 
> (Or the function could just get nr_logical_cpus argument like I
> suggested on v7, to make the code here simpler.)
> 
> 
> > +
> >  /*
> >   * Encode cache info for CPUID[0x80000006].ECX and
> CPUID[0x80000006].EDX
> >   * @l3 can be NULL.
> > @@ -336,6 +344,41 @@ static void
> encode_cache_cpuid80000006(CPUCacheInfo *l2,
> >      }
> >  }
> >
> > +/* Encode cache info for CPUID[8000001D] */
> > +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int
> nr_threads,
> > +                                uint32_t *eax, uint32_t *ebx,
> > +                                uint32_t *ecx, uint32_t *edx)
> > +{
> > +    assert(cache->size == cache->line_size * cache->associativity *
> > +                          cache->partitions * cache->sets);
> > +
> > +    *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
> > +               (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
> > +
> > +    /* L3 is shared among multiple cores */
> > +    if (cache->level == 3) {
> > +        *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
> > +    } else {
> > +        *eax |= ((nr_threads - 1) << 14);
> > +    }
> > +
> > +    assert(cache->line_size > 0);
> > +    assert(cache->partitions > 0);
> > +    assert(cache->associativity > 0);
> > +    /* We don't implement fully-associative caches */
> > +    assert(cache->associativity < cache->sets);
> > +    *ebx = (cache->line_size - 1) |
> > +           ((cache->partitions - 1) << 12) |
> > +           ((cache->associativity - 1) << 22);
> > +
> > +    assert(cache->sets > 0);
> > +    *ecx = cache->sets - 1;
> > +
> > +    *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
> > +           (cache->inclusive ? CACHE_INCLUSIVE : 0) |
> > +           (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
> > +}
> > +
> >  /*
> >   * Definitions of the hardcoded cache entries we expose:
> >   * These are legacy cache values. If there is a need to change any
> > @@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env,
> uint32_t index, uint32_t count,
> >              *edx = 0;
> >          }
> >          break;
> > +    case 0x8000001D:
> > +        *eax = 0;
> > +        CPUCacheInfo *l1d, *l1i, *l2, *l3;
> > +        if (env->cache_info && !cpu->legacy_cache) {
> > +            l1d = &env->cache_info->l1d_cache;
> > +            l1i = &env->cache_info->l1i_cache;
> > +            l2 = &env->cache_info->l2_cache;
> > +            l3 = &env->cache_info->l3_cache;
> > +        } else {
> > +            l1d = &legacy_l1d_cache_amd;
> > +            l1i = &legacy_l1i_cache_amd;
> > +            l2 = &legacy_l2_cache_amd;
> > +            l3 = &legacy_l3_cache;
> > +        }
> > +        switch (count) {
> > +        case 0: /* L1 dcache info */
> > +            encode_cache_cpuid8000001d(l1d, cs->nr_threads,
> > +                                       eax, ebx, ecx, edx);
> > +            break;
> > +        case 1: /* L1 icache info */
> > +            encode_cache_cpuid8000001d(l1i, cs->nr_threads,
> > +                                       eax, ebx, ecx, edx);
> > +            break;
> > +        case 2: /* L2 cache info */
> > +            encode_cache_cpuid8000001d(l2, cs->nr_threads,
> > +                                       eax, ebx, ecx, edx);
> > +            break;
> > +        case 3: /* L3 cache info */
> > +            encode_cache_cpuid8000001d(l3, cs->nr_threads,
> > +                                       eax, ebx, ecx, edx);
> > +            break;
> > +        default: /* end of info */
> > +            *eax = *ebx = *ecx = *edx = 0;
> > +            break;
> > +        }
> > +        break;
> >      case 0xC0000000:
> >          *eax = env->cpuid_xlevel2;
> >          *ebx = 0;
> > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > index 6c49954e68..6e66f9c51d 100644
> > --- a/target/i386/kvm.c
> > +++ b/target/i386/kvm.c
> > @@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >          }
> >          c = &cpuid_data.entries[cpuid_i++];
> >
> > -        c->function = i;
> > -        c->flags = 0;
> > -        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > +        switch (i) {
> > +        case 0x8000001d:
> > +            /* Query for all AMD cache information leaves */
> > +            for (j = 0; ; j++) {
> > +                c->function = i;
> > +                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
> > +                c->index = j;
> > +                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > +
> > +                if (c->eax == 0) {
> > +                    break;
> > +                }
> > +                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
> > +                    fprintf(stderr, "cpuid_data is full, no space for "
> > +                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
> > +                    abort();
> > +                }
> > +                c = &cpuid_data.entries[cpuid_i++];
> > +            }
> > +            break;
> > +        default:
> > +            c->function = i;
> > +            c->flags = 0;
> > +            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > +            break;
> > +        }
> >      }
> >
> >      /* Call Centaur's CPUID instructions they are supported. */
> > --
> > 2.17.0
> >
> 
> --
> Eduardo
Eduardo Habkost May 16, 2018, 12:52 p.m. UTC | #3
On Mon, May 14, 2018 at 11:49:30PM +0000, Moger, Babu wrote:
> 
> > -----Original Message-----
> > From: Eduardo Habkost [mailto:ehabkost@redhat.com]
> > Sent: Monday, May 14, 2018 2:47 PM
> > To: Moger, Babu <Babu.Moger@amd.com>
> > Cc: mst@redhat.com; marcel.apfelbaum@gmail.com; pbonzini@redhat.com;
> > rth@twiddle.net; mtosatti@redhat.com; qemu-devel@nongnu.org;
> > kvm@vger.kernel.org; geoff@hostfission.com; kash@tripleback.net
> > Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache Information
> > for cpuid 0x8000001D
> > 
> > On Mon, May 14, 2018 at 11:41:52AM -0500, Babu Moger wrote:
> > > Add information for cpuid 0x8000001D leaf. Populate cache topology
> > information
> > > for different cache types(Data Cache, Instruction Cache, L2 and L3)
> > supported
> > > by 0x8000001D leaf. Please refer Processor Programming Reference (PPR)
> > for AMD
> > > Family 17h Model for more details.
> > >
> > > Signed-off-by: Babu Moger <babu.moger@amd.com>
> > > Tested-by: Geoffrey McRae <geoff@hostfission.com>
> > > ---
> > >  target/i386/cpu.c | 79
> > +++++++++++++++++++++++++++++++++++++++++++++++
> > >  target/i386/kvm.c | 29 +++++++++++++++--
> > >  2 files changed, 105 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > index e1daff37ab..7f40241786 100644
> > > --- a/target/i386/cpu.c
> > > +++ b/target/i386/cpu.c
> > > @@ -307,6 +307,14 @@ static uint32_t
> > encode_cache_cpuid80000005(CPUCacheInfo *cache)
> > >                            a == ASSOC_FULL ? 0xF : \
> > >                            0 /* invalid value */)
> > >
> > > +/* Definitions used on CPUID Leaf 0x8000001D */
> > > +/* Number of logical cores in a complex */
> > > +#define CORES_IN_CMPLX  4
> > 
> > Number of cores is configurable in QEMU, so we can't hardcode
> > this.
> 
> In EPYC architecture, in a single die we have 2 core complexes.
>  Each core complex has 4 cores at max(CORES_IN_CMPLX).
> Without SMT(thread=1), L3 is shared between 4(4x1) cores.
>    NUM_SHARING_CACHE should be 3.
> With SMT(thread=2), L3 is shared between 8(4x2) cores.
>   NUM_SHARING_CACHE should be 7.
> This is what we are trying to achieve here. This is a fixed h/w configuration.

There's nothing in this part of the code that makes it specific
to the EPYC CPU model, so it has to be more generic.  But
probably my suggestion wasn't correct either.  Se my question
below:


> 
> > 
> > I understand you want to make it match the hardware as close as
> > possible (as you noted in your reply on v7), but this should be
> > done by simply configuring QEMU as closely to the hardware as
> > possible.
> > 
> > 
> > > +/* Number of logical processors sharing cache */
> > > +#define NUM_SHARING_CACHE(threads)   ((threads > 1) ? \
> > > +                         (((CORES_IN_CMPLX - 1) * threads) + 1)  : \
> > > +                         (CORES_IN_CMPLX - 1))
> > 
> > I don't see why the check for threads > 1, here.  Why not simply
> > write this as:
> > 
> >   ((nr_cores * nr_threads) - 1))
> > 
> > which will work for any cores/threads value?
> 
> We cannot achieve the above numbers if we use this logic.
> For example.. with nr_cores = 8, nr_threads=2. 
>   This will report (8x2)-1=15 which is not what we want.

I'm confused.  What would be the correct value for
Fn8000_001D_EAX_x[25:14] for a 8-core 2-threads-per-core CPU?

I assumed the L3 cache would be shared by the whole socket, but
it's shared only by a core complex (which has 4 cores in EPYC).
Is that right?

So, what would be a reasonable value for Fn8000_001D_EAX_3[25:14]
for the following configurations?

  -cpu EPYC,cores=2,threads=1
  -cpu EPYC,cores=2,threads=2
  -cpu EPYC,cores=3,threads=1
  -cpu EPYC,cores=3,threads=2
  -cpu EPYC,cores=4,threads=1
  -cpu EPYC,cores=4,threads=2
  -cpu EPYC,cores=5,threads=1
  -cpu EPYC,cores=5,threads=2
  -cpu EPYC,cores=6,threads=1
  -cpu EPYC,cores=6,threads=2
  -cpu EPYC,cores=7,threads=1
  -cpu EPYC,cores=7,threads=2
  -cpu EPYC,cores=8,threads=1
  -cpu EPYC,cores=8,threads=2
  -cpu EPYC,cores=9,threads=1
  -cpu EPYC,cores=9,threads=2

> 
> > 
> > (Or the function could just get nr_logical_cpus argument like I
> > suggested on v7, to make the code here simpler.)
> > 
> > 
> > > +
> > >  /*
> > >   * Encode cache info for CPUID[0x80000006].ECX and
> > CPUID[0x80000006].EDX
> > >   * @l3 can be NULL.
> > > @@ -336,6 +344,41 @@ static void
> > encode_cache_cpuid80000006(CPUCacheInfo *l2,
> > >      }
> > >  }
> > >
> > > +/* Encode cache info for CPUID[8000001D] */
> > > +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int
> > nr_threads,
> > > +                                uint32_t *eax, uint32_t *ebx,
> > > +                                uint32_t *ecx, uint32_t *edx)
> > > +{
> > > +    assert(cache->size == cache->line_size * cache->associativity *
> > > +                          cache->partitions * cache->sets);
> > > +
> > > +    *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
> > > +               (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
> > > +
> > > +    /* L3 is shared among multiple cores */
> > > +    if (cache->level == 3) {
> > > +        *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
> > > +    } else {
> > > +        *eax |= ((nr_threads - 1) << 14);
> > > +    }
> > > +
> > > +    assert(cache->line_size > 0);
> > > +    assert(cache->partitions > 0);
> > > +    assert(cache->associativity > 0);
> > > +    /* We don't implement fully-associative caches */
> > > +    assert(cache->associativity < cache->sets);
> > > +    *ebx = (cache->line_size - 1) |
> > > +           ((cache->partitions - 1) << 12) |
> > > +           ((cache->associativity - 1) << 22);
> > > +
> > > +    assert(cache->sets > 0);
> > > +    *ecx = cache->sets - 1;
> > > +
> > > +    *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
> > > +           (cache->inclusive ? CACHE_INCLUSIVE : 0) |
> > > +           (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
> > > +}
> > > +
> > >  /*
> > >   * Definitions of the hardcoded cache entries we expose:
> > >   * These are legacy cache values. If there is a need to change any
> > > @@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env,
> > uint32_t index, uint32_t count,
> > >              *edx = 0;
> > >          }
> > >          break;
> > > +    case 0x8000001D:
> > > +        *eax = 0;
> > > +        CPUCacheInfo *l1d, *l1i, *l2, *l3;
> > > +        if (env->cache_info && !cpu->legacy_cache) {
> > > +            l1d = &env->cache_info->l1d_cache;
> > > +            l1i = &env->cache_info->l1i_cache;
> > > +            l2 = &env->cache_info->l2_cache;
> > > +            l3 = &env->cache_info->l3_cache;
> > > +        } else {
> > > +            l1d = &legacy_l1d_cache_amd;
> > > +            l1i = &legacy_l1i_cache_amd;
> > > +            l2 = &legacy_l2_cache_amd;
> > > +            l3 = &legacy_l3_cache;
> > > +        }
> > > +        switch (count) {
> > > +        case 0: /* L1 dcache info */
> > > +            encode_cache_cpuid8000001d(l1d, cs->nr_threads,
> > > +                                       eax, ebx, ecx, edx);
> > > +            break;
> > > +        case 1: /* L1 icache info */
> > > +            encode_cache_cpuid8000001d(l1i, cs->nr_threads,
> > > +                                       eax, ebx, ecx, edx);
> > > +            break;
> > > +        case 2: /* L2 cache info */
> > > +            encode_cache_cpuid8000001d(l2, cs->nr_threads,
> > > +                                       eax, ebx, ecx, edx);
> > > +            break;
> > > +        case 3: /* L3 cache info */
> > > +            encode_cache_cpuid8000001d(l3, cs->nr_threads,
> > > +                                       eax, ebx, ecx, edx);
> > > +            break;
> > > +        default: /* end of info */
> > > +            *eax = *ebx = *ecx = *edx = 0;
> > > +            break;
> > > +        }
> > > +        break;
> > >      case 0xC0000000:
> > >          *eax = env->cpuid_xlevel2;
> > >          *ebx = 0;
> > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > > index 6c49954e68..6e66f9c51d 100644
> > > --- a/target/i386/kvm.c
> > > +++ b/target/i386/kvm.c
> > > @@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > >          }
> > >          c = &cpuid_data.entries[cpuid_i++];
> > >
> > > -        c->function = i;
> > > -        c->flags = 0;
> > > -        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > > +        switch (i) {
> > > +        case 0x8000001d:
> > > +            /* Query for all AMD cache information leaves */
> > > +            for (j = 0; ; j++) {
> > > +                c->function = i;
> > > +                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
> > > +                c->index = j;
> > > +                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > > +
> > > +                if (c->eax == 0) {
> > > +                    break;
> > > +                }
> > > +                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
> > > +                    fprintf(stderr, "cpuid_data is full, no space for "
> > > +                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
> > > +                    abort();
> > > +                }
> > > +                c = &cpuid_data.entries[cpuid_i++];
> > > +            }
> > > +            break;
> > > +        default:
> > > +            c->function = i;
> > > +            c->flags = 0;
> > > +            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > > +            break;
> > > +        }
> > >      }
> > >
> > >      /* Call Centaur's CPUID instructions they are supported. */
> > > --
> > > 2.17.0
> > >
> > 
> > --
> > Eduardo
Moger, Babu May 16, 2018, 7:25 p.m. UTC | #4
> -----Original Message-----
> From: Eduardo Habkost [mailto:ehabkost@redhat.com]
> Sent: Wednesday, May 16, 2018 7:52 AM
> To: Moger, Babu <Babu.Moger@amd.com>
> Cc: mst@redhat.com; marcel.apfelbaum@gmail.com; pbonzini@redhat.com;
> rth@twiddle.net; mtosatti@redhat.com; qemu-devel@nongnu.org;
> kvm@vger.kernel.org; geoff@hostfission.com; kash@tripleback.net
> Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache Information
> for cpuid 0x8000001D
> 
> On Mon, May 14, 2018 at 11:49:30PM +0000, Moger, Babu wrote:
> >
> > > -----Original Message-----
> > > From: Eduardo Habkost [mailto:ehabkost@redhat.com]
> > > Sent: Monday, May 14, 2018 2:47 PM
> > > To: Moger, Babu <Babu.Moger@amd.com>
> > > Cc: mst@redhat.com; marcel.apfelbaum@gmail.com;
> pbonzini@redhat.com;
> > > rth@twiddle.net; mtosatti@redhat.com; qemu-devel@nongnu.org;
> > > kvm@vger.kernel.org; geoff@hostfission.com; kash@tripleback.net
> > > Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache
> Information
> > > for cpuid 0x8000001D
> > >
> > > On Mon, May 14, 2018 at 11:41:52AM -0500, Babu Moger wrote:
> > > > Add information for cpuid 0x8000001D leaf. Populate cache topology
> > > information
> > > > for different cache types(Data Cache, Instruction Cache, L2 and L3)
> > > supported
> > > > by 0x8000001D leaf. Please refer Processor Programming Reference
> (PPR)
> > > for AMD
> > > > Family 17h Model for more details.
> > > >
> > > > Signed-off-by: Babu Moger <babu.moger@amd.com>
> > > > Tested-by: Geoffrey McRae <geoff@hostfission.com>
> > > > ---
> > > >  target/i386/cpu.c | 79
> > > +++++++++++++++++++++++++++++++++++++++++++++++
> > > >  target/i386/kvm.c | 29 +++++++++++++++--
> > > >  2 files changed, 105 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > > index e1daff37ab..7f40241786 100644
> > > > --- a/target/i386/cpu.c
> > > > +++ b/target/i386/cpu.c
> > > > @@ -307,6 +307,14 @@ static uint32_t
> > > encode_cache_cpuid80000005(CPUCacheInfo *cache)
> > > >                            a == ASSOC_FULL ? 0xF : \
> > > >                            0 /* invalid value */)
> > > >
> > > > +/* Definitions used on CPUID Leaf 0x8000001D */
> > > > +/* Number of logical cores in a complex */
> > > > +#define CORES_IN_CMPLX  4
> > >
> > > Number of cores is configurable in QEMU, so we can't hardcode
> > > this.
> >
> > In EPYC architecture, in a single die we have 2 core complexes.
> >  Each core complex has 4 cores at max(CORES_IN_CMPLX).
> > Without SMT(thread=1), L3 is shared between 4(4x1) cores.
> >    NUM_SHARING_CACHE should be 3.
> > With SMT(thread=2), L3 is shared between 8(4x2) cores.
> >   NUM_SHARING_CACHE should be 7.
> > This is what we are trying to achieve here. This is a fixed h/w configuration.
> 
> There's nothing in this part of the code that makes it specific
> to the EPYC CPU model, so it has to be more generic.  But
> probably my suggestion wasn't correct either.  Se my question
> below:
> 
> 
> >
> > >
> > > I understand you want to make it match the hardware as close as
> > > possible (as you noted in your reply on v7), but this should be
> > > done by simply configuring QEMU as closely to the hardware as
> > > possible.
> > >
> > >
> > > > +/* Number of logical processors sharing cache */
> > > > +#define NUM_SHARING_CACHE(threads)   ((threads > 1) ? \
> > > > +                         (((CORES_IN_CMPLX - 1) * threads) + 1)  : \
> > > > +                         (CORES_IN_CMPLX - 1))
> > >
> > > I don't see why the check for threads > 1, here.  Why not simply
> > > write this as:
> > >
> > >   ((nr_cores * nr_threads) - 1))
> > >
> > > which will work for any cores/threads value?
> >
> > We cannot achieve the above numbers if we use this logic.
> > For example.. with nr_cores = 8, nr_threads=2.
> >   This will report (8x2)-1=15 which is not what we want.
> 
> I'm confused.  What would be the correct value for
> Fn8000_001D_EAX_x[25:14] for a 8-core 2-threads-per-core CPU?
> 
> I assumed the L3 cache would be shared by the whole socket, but
> it's shared only by a core complex (which has 4 cores in EPYC).
> Is that right?

That is correct.

> 
> So, what would be a reasonable value for Fn8000_001D_EAX_3[25:14]
> for the following configurations?
> 
>   -cpu EPYC,cores=2,threads=1
    1
>   -cpu EPYC,cores=2,threads=2
    3
>   -cpu EPYC,cores=3,threads=1
     2
>   -cpu EPYC,cores=3,threads=2
    5
>   -cpu EPYC,cores=4,threads=1
     3
>   -cpu EPYC,cores=4,threads=2
     7
>   -cpu EPYC,cores=5,threads=1
>   -cpu EPYC,cores=5,threads=2
>   -cpu EPYC,cores=6,threads=1
>   -cpu EPYC,cores=6,threads=2
>   -cpu EPYC,cores=7,threads=1
>   -cpu EPYC,cores=7,threads=2
>   -cpu EPYC,cores=8,threads=1
>   -cpu EPYC,cores=8,threads=2
>   -cpu EPYC,cores=9,threads=1
>   -cpu EPYC,cores=9,threads=2

Some of these combinations are not valid.   We are thinking of coming up with a statically 
defined data model and pickup the model that best fits the above parameter or something like that.
We may have to report Invalid for some of the combinations. Still thinking. Let me know if you think
of any better way to handle it or if there are similar cases which are already handled which we can base on.

> 
> >
> > >
> > > (Or the function could just get nr_logical_cpus argument like I
> > > suggested on v7, to make the code here simpler.)
> > >
> > >
> > > > +
> > > >  /*
> > > >   * Encode cache info for CPUID[0x80000006].ECX and
> > > CPUID[0x80000006].EDX
> > > >   * @l3 can be NULL.
> > > > @@ -336,6 +344,41 @@ static void
> > > encode_cache_cpuid80000006(CPUCacheInfo *l2,
> > > >      }
> > > >  }
> > > >
> > > > +/* Encode cache info for CPUID[8000001D] */
> > > > +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int
> > > nr_threads,
> > > > +                                uint32_t *eax, uint32_t *ebx,
> > > > +                                uint32_t *ecx, uint32_t *edx)
> > > > +{
> > > > +    assert(cache->size == cache->line_size * cache->associativity *
> > > > +                          cache->partitions * cache->sets);
> > > > +
> > > > +    *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
> > > > +               (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
> > > > +
> > > > +    /* L3 is shared among multiple cores */
> > > > +    if (cache->level == 3) {
> > > > +        *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
> > > > +    } else {
> > > > +        *eax |= ((nr_threads - 1) << 14);
> > > > +    }
> > > > +
> > > > +    assert(cache->line_size > 0);
> > > > +    assert(cache->partitions > 0);
> > > > +    assert(cache->associativity > 0);
> > > > +    /* We don't implement fully-associative caches */
> > > > +    assert(cache->associativity < cache->sets);
> > > > +    *ebx = (cache->line_size - 1) |
> > > > +           ((cache->partitions - 1) << 12) |
> > > > +           ((cache->associativity - 1) << 22);
> > > > +
> > > > +    assert(cache->sets > 0);
> > > > +    *ecx = cache->sets - 1;
> > > > +
> > > > +    *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0)
> |
> > > > +           (cache->inclusive ? CACHE_INCLUSIVE : 0) |
> > > > +           (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
> > > > +}
> > > > +
> > > >  /*
> > > >   * Definitions of the hardcoded cache entries we expose:
> > > >   * These are legacy cache values. If there is a need to change any
> > > > @@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env,
> > > uint32_t index, uint32_t count,
> > > >              *edx = 0;
> > > >          }
> > > >          break;
> > > > +    case 0x8000001D:
> > > > +        *eax = 0;
> > > > +        CPUCacheInfo *l1d, *l1i, *l2, *l3;
> > > > +        if (env->cache_info && !cpu->legacy_cache) {
> > > > +            l1d = &env->cache_info->l1d_cache;
> > > > +            l1i = &env->cache_info->l1i_cache;
> > > > +            l2 = &env->cache_info->l2_cache;
> > > > +            l3 = &env->cache_info->l3_cache;
> > > > +        } else {
> > > > +            l1d = &legacy_l1d_cache_amd;
> > > > +            l1i = &legacy_l1i_cache_amd;
> > > > +            l2 = &legacy_l2_cache_amd;
> > > > +            l3 = &legacy_l3_cache;
> > > > +        }
> > > > +        switch (count) {
> > > > +        case 0: /* L1 dcache info */
> > > > +            encode_cache_cpuid8000001d(l1d, cs->nr_threads,
> > > > +                                       eax, ebx, ecx, edx);
> > > > +            break;
> > > > +        case 1: /* L1 icache info */
> > > > +            encode_cache_cpuid8000001d(l1i, cs->nr_threads,
> > > > +                                       eax, ebx, ecx, edx);
> > > > +            break;
> > > > +        case 2: /* L2 cache info */
> > > > +            encode_cache_cpuid8000001d(l2, cs->nr_threads,
> > > > +                                       eax, ebx, ecx, edx);
> > > > +            break;
> > > > +        case 3: /* L3 cache info */
> > > > +            encode_cache_cpuid8000001d(l3, cs->nr_threads,
> > > > +                                       eax, ebx, ecx, edx);
> > > > +            break;
> > > > +        default: /* end of info */
> > > > +            *eax = *ebx = *ecx = *edx = 0;
> > > > +            break;
> > > > +        }
> > > > +        break;
> > > >      case 0xC0000000:
> > > >          *eax = env->cpuid_xlevel2;
> > > >          *ebx = 0;
> > > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > > > index 6c49954e68..6e66f9c51d 100644
> > > > --- a/target/i386/kvm.c
> > > > +++ b/target/i386/kvm.c
> > > > @@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > > >          }
> > > >          c = &cpuid_data.entries[cpuid_i++];
> > > >
> > > > -        c->function = i;
> > > > -        c->flags = 0;
> > > > -        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > > > +        switch (i) {
> > > > +        case 0x8000001d:
> > > > +            /* Query for all AMD cache information leaves */
> > > > +            for (j = 0; ; j++) {
> > > > +                c->function = i;
> > > > +                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
> > > > +                c->index = j;
> > > > +                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c-
> >edx);
> > > > +
> > > > +                if (c->eax == 0) {
> > > > +                    break;
> > > > +                }
> > > > +                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
> > > > +                    fprintf(stderr, "cpuid_data is full, no space for "
> > > > +                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
> > > > +                    abort();
> > > > +                }
> > > > +                c = &cpuid_data.entries[cpuid_i++];
> > > > +            }
> > > > +            break;
> > > > +        default:
> > > > +            c->function = i;
> > > > +            c->flags = 0;
> > > > +            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > > > +            break;
> > > > +        }
> > > >      }
> > > >
> > > >      /* Call Centaur's CPUID instructions they are supported. */
> > > > --
> > > > 2.17.0
> > > >
> > >
> > > --
> > > Eduardo
> 
> --
> Eduardo
Eduardo Habkost May 16, 2018, 8:54 p.m. UTC | #5
On Wed, May 16, 2018 at 07:25:53PM +0000, Moger, Babu wrote:
> 
> > -----Original Message-----
> > From: Eduardo Habkost [mailto:ehabkost@redhat.com]
> > Sent: Wednesday, May 16, 2018 7:52 AM
> > To: Moger, Babu <Babu.Moger@amd.com>
> > Cc: mst@redhat.com; marcel.apfelbaum@gmail.com; pbonzini@redhat.com;
> > rth@twiddle.net; mtosatti@redhat.com; qemu-devel@nongnu.org;
> > kvm@vger.kernel.org; geoff@hostfission.com; kash@tripleback.net
> > Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache Information
> > for cpuid 0x8000001D
> > 
> > On Mon, May 14, 2018 at 11:49:30PM +0000, Moger, Babu wrote:
> > >
> > > > -----Original Message-----
> > > > From: Eduardo Habkost [mailto:ehabkost@redhat.com]
> > > > Sent: Monday, May 14, 2018 2:47 PM
> > > > To: Moger, Babu <Babu.Moger@amd.com>
> > > > Cc: mst@redhat.com; marcel.apfelbaum@gmail.com;
> > pbonzini@redhat.com;
> > > > rth@twiddle.net; mtosatti@redhat.com; qemu-devel@nongnu.org;
> > > > kvm@vger.kernel.org; geoff@hostfission.com; kash@tripleback.net
> > > > Subject: Re: [PATCH v9 3/7] i386: Populate AMD Processor Cache
> > Information
> > > > for cpuid 0x8000001D
> > > >
> > > > On Mon, May 14, 2018 at 11:41:52AM -0500, Babu Moger wrote:
> > > > > Add information for cpuid 0x8000001D leaf. Populate cache topology
> > > > information
> > > > > for different cache types(Data Cache, Instruction Cache, L2 and L3)
> > > > supported
> > > > > by 0x8000001D leaf. Please refer Processor Programming Reference
> > (PPR)
> > > > for AMD
> > > > > Family 17h Model for more details.
> > > > >
> > > > > Signed-off-by: Babu Moger <babu.moger@amd.com>
> > > > > Tested-by: Geoffrey McRae <geoff@hostfission.com>
> > > > > ---
> > > > >  target/i386/cpu.c | 79
> > > > +++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  target/i386/kvm.c | 29 +++++++++++++++--
> > > > >  2 files changed, 105 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > > > index e1daff37ab..7f40241786 100644
> > > > > --- a/target/i386/cpu.c
> > > > > +++ b/target/i386/cpu.c
> > > > > @@ -307,6 +307,14 @@ static uint32_t
> > > > encode_cache_cpuid80000005(CPUCacheInfo *cache)
> > > > >                            a == ASSOC_FULL ? 0xF : \
> > > > >                            0 /* invalid value */)
> > > > >
> > > > > +/* Definitions used on CPUID Leaf 0x8000001D */
> > > > > +/* Number of logical cores in a complex */
> > > > > +#define CORES_IN_CMPLX  4
> > > >
> > > > Number of cores is configurable in QEMU, so we can't hardcode
> > > > this.
> > >
> > > In EPYC architecture, in a single die we have 2 core complexes.
> > >  Each core complex has 4 cores at max(CORES_IN_CMPLX).
> > > Without SMT(thread=1), L3 is shared between 4(4x1) cores.
> > >    NUM_SHARING_CACHE should be 3.
> > > With SMT(thread=2), L3 is shared between 8(4x2) cores.
> > >   NUM_SHARING_CACHE should be 7.
> > > This is what we are trying to achieve here. This is a fixed h/w configuration.
> > 
> > There's nothing in this part of the code that makes it specific
> > to the EPYC CPU model, so it has to be more generic.  But
> > probably my suggestion wasn't correct either.  Se my question
> > below:
> > 
> > 
> > >
> > > >
> > > > I understand you want to make it match the hardware as close as
> > > > possible (as you noted in your reply on v7), but this should be
> > > > done by simply configuring QEMU as closely to the hardware as
> > > > possible.
> > > >
> > > >
> > > > > +/* Number of logical processors sharing cache */
> > > > > +#define NUM_SHARING_CACHE(threads)   ((threads > 1) ? \
> > > > > +                         (((CORES_IN_CMPLX - 1) * threads) + 1)  : \
> > > > > +                         (CORES_IN_CMPLX - 1))
> > > >
> > > > I don't see why the check for threads > 1, here.  Why not simply
> > > > write this as:
> > > >
> > > >   ((nr_cores * nr_threads) - 1))
> > > >
> > > > which will work for any cores/threads value?
> > >
> > > We cannot achieve the above numbers if we use this logic.
> > > For example.. with nr_cores = 8, nr_threads=2.
> > >   This will report (8x2)-1=15 which is not what we want.
> > 
> > I'm confused.  What would be the correct value for
> > Fn8000_001D_EAX_x[25:14] for a 8-core 2-threads-per-core CPU?
> > 
> > I assumed the L3 cache would be shared by the whole socket, but
> > it's shared only by a core complex (which has 4 cores in EPYC).
> > Is that right?
> 
> That is correct.
> 
> > 
> > So, what would be a reasonable value for Fn8000_001D_EAX_3[25:14]
> > for the following configurations?
> > 
> >   -cpu EPYC,cores=2,threads=1
>     1
> >   -cpu EPYC,cores=2,threads=2
>     3
> >   -cpu EPYC,cores=3,threads=1
>      2
> >   -cpu EPYC,cores=3,threads=2
>     5
> >   -cpu EPYC,cores=4,threads=1
>      3
> >   -cpu EPYC,cores=4,threads=2
>      7
> >   -cpu EPYC,cores=5,threads=1
> >   -cpu EPYC,cores=5,threads=2
> >   -cpu EPYC,cores=6,threads=1
> >   -cpu EPYC,cores=6,threads=2
> >   -cpu EPYC,cores=7,threads=1
> >   -cpu EPYC,cores=7,threads=2
> >   -cpu EPYC,cores=8,threads=1
> >   -cpu EPYC,cores=8,threads=2
> >   -cpu EPYC,cores=9,threads=1
> >   -cpu EPYC,cores=9,threads=2
> 
> Some of these combinations are not valid.   We are thinking of coming up with a statically 
> defined data model and pickup the model that best fits the above parameter or something like that.
> We may have to report Invalid for some of the combinations. Still thinking. Let me know if you think
> of any better way to handle it or if there are similar cases which are already handled which we can base on.

I understand the goal, here, but QEMU already allows all the
combinations above.  In this case, we need to find a reasonable
enough way to handle these configurations.

The main obstacle here is that we can't make things like
"-cpu EPYC -smp cores=5,threads=2" stop working, unfortunately,
or it will make existing configurations stop working.

But we have multiple options to handle this:

One option is to automatically disable topoext (and refuse to
enable it if explicitly set to "on") if the socket/core/thread
topology is incompatible with what we're trying to do.

Another one is to try to calculate a reasonable enough value for
the given configuration.

> 
> > 
> > >
> > > >
> > > > (Or the function could just get nr_logical_cpus argument like I
> > > > suggested on v7, to make the code here simpler.)
> > > >
> > > >
> > > > > +
> > > > >  /*
> > > > >   * Encode cache info for CPUID[0x80000006].ECX and
> > > > CPUID[0x80000006].EDX
> > > > >   * @l3 can be NULL.
> > > > > @@ -336,6 +344,41 @@ static void
> > > > encode_cache_cpuid80000006(CPUCacheInfo *l2,
> > > > >      }
> > > > >  }
> > > > >
> > > > > +/* Encode cache info for CPUID[8000001D] */
> > > > > +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int
> > > > nr_threads,
> > > > > +                                uint32_t *eax, uint32_t *ebx,
> > > > > +                                uint32_t *ecx, uint32_t *edx)
> > > > > +{
> > > > > +    assert(cache->size == cache->line_size * cache->associativity *
> > > > > +                          cache->partitions * cache->sets);
> > > > > +
> > > > > +    *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
> > > > > +               (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
> > > > > +
> > > > > +    /* L3 is shared among multiple cores */
> > > > > +    if (cache->level == 3) {
> > > > > +        *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
> > > > > +    } else {
> > > > > +        *eax |= ((nr_threads - 1) << 14);
> > > > > +    }
> > > > > +
> > > > > +    assert(cache->line_size > 0);
> > > > > +    assert(cache->partitions > 0);
> > > > > +    assert(cache->associativity > 0);
> > > > > +    /* We don't implement fully-associative caches */
> > > > > +    assert(cache->associativity < cache->sets);
> > > > > +    *ebx = (cache->line_size - 1) |
> > > > > +           ((cache->partitions - 1) << 12) |
> > > > > +           ((cache->associativity - 1) << 22);
> > > > > +
> > > > > +    assert(cache->sets > 0);
> > > > > +    *ecx = cache->sets - 1;
> > > > > +
> > > > > +    *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0)
> > |
> > > > > +           (cache->inclusive ? CACHE_INCLUSIVE : 0) |
> > > > > +           (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
> > > > > +}
> > > > > +
> > > > >  /*
> > > > >   * Definitions of the hardcoded cache entries we expose:
> > > > >   * These are legacy cache values. If there is a need to change any
> > > > > @@ -4035,6 +4078,42 @@ void cpu_x86_cpuid(CPUX86State *env,
> > > > uint32_t index, uint32_t count,
> > > > >              *edx = 0;
> > > > >          }
> > > > >          break;
> > > > > +    case 0x8000001D:
> > > > > +        *eax = 0;
> > > > > +        CPUCacheInfo *l1d, *l1i, *l2, *l3;
> > > > > +        if (env->cache_info && !cpu->legacy_cache) {
> > > > > +            l1d = &env->cache_info->l1d_cache;
> > > > > +            l1i = &env->cache_info->l1i_cache;
> > > > > +            l2 = &env->cache_info->l2_cache;
> > > > > +            l3 = &env->cache_info->l3_cache;
> > > > > +        } else {
> > > > > +            l1d = &legacy_l1d_cache_amd;
> > > > > +            l1i = &legacy_l1i_cache_amd;
> > > > > +            l2 = &legacy_l2_cache_amd;
> > > > > +            l3 = &legacy_l3_cache;
> > > > > +        }
> > > > > +        switch (count) {
> > > > > +        case 0: /* L1 dcache info */
> > > > > +            encode_cache_cpuid8000001d(l1d, cs->nr_threads,
> > > > > +                                       eax, ebx, ecx, edx);
> > > > > +            break;
> > > > > +        case 1: /* L1 icache info */
> > > > > +            encode_cache_cpuid8000001d(l1i, cs->nr_threads,
> > > > > +                                       eax, ebx, ecx, edx);
> > > > > +            break;
> > > > > +        case 2: /* L2 cache info */
> > > > > +            encode_cache_cpuid8000001d(l2, cs->nr_threads,
> > > > > +                                       eax, ebx, ecx, edx);
> > > > > +            break;
> > > > > +        case 3: /* L3 cache info */
> > > > > +            encode_cache_cpuid8000001d(l3, cs->nr_threads,
> > > > > +                                       eax, ebx, ecx, edx);
> > > > > +            break;
> > > > > +        default: /* end of info */
> > > > > +            *eax = *ebx = *ecx = *edx = 0;
> > > > > +            break;
> > > > > +        }
> > > > > +        break;
> > > > >      case 0xC0000000:
> > > > >          *eax = env->cpuid_xlevel2;
> > > > >          *ebx = 0;
> > > > > diff --git a/target/i386/kvm.c b/target/i386/kvm.c
> > > > > index 6c49954e68..6e66f9c51d 100644
> > > > > --- a/target/i386/kvm.c
> > > > > +++ b/target/i386/kvm.c
> > > > > @@ -967,9 +967,32 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > > > >          }
> > > > >          c = &cpuid_data.entries[cpuid_i++];
> > > > >
> > > > > -        c->function = i;
> > > > > -        c->flags = 0;
> > > > > -        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > > > > +        switch (i) {
> > > > > +        case 0x8000001d:
> > > > > +            /* Query for all AMD cache information leaves */
> > > > > +            for (j = 0; ; j++) {
> > > > > +                c->function = i;
> > > > > +                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
> > > > > +                c->index = j;
> > > > > +                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c-
> > >edx);
> > > > > +
> > > > > +                if (c->eax == 0) {
> > > > > +                    break;
> > > > > +                }
> > > > > +                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
> > > > > +                    fprintf(stderr, "cpuid_data is full, no space for "
> > > > > +                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
> > > > > +                    abort();
> > > > > +                }
> > > > > +                c = &cpuid_data.entries[cpuid_i++];
> > > > > +            }
> > > > > +            break;
> > > > > +        default:
> > > > > +            c->function = i;
> > > > > +            c->flags = 0;
> > > > > +            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
> > > > > +            break;
> > > > > +        }
> > > > >      }
> > > > >
> > > > >      /* Call Centaur's CPUID instructions they are supported. */
> > > > > --
> > > > > 2.17.0
> > > > >
> > > >
> > > > --
> > > > Eduardo
> > 
> > --
> > Eduardo
diff mbox series

Patch

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e1daff37ab..7f40241786 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -307,6 +307,14 @@  static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache)
                           a == ASSOC_FULL ? 0xF : \
                           0 /* invalid value */)
 
+/* Definitions used on CPUID Leaf 0x8000001D */
+/* Number of logical cores in a complex */
+#define CORES_IN_CMPLX  4
+/* Number of logical processors sharing cache */
+#define NUM_SHARING_CACHE(threads)   ((threads > 1) ? \
+                         (((CORES_IN_CMPLX - 1) * threads) + 1)  : \
+                         (CORES_IN_CMPLX - 1))
+
 /*
  * Encode cache info for CPUID[0x80000006].ECX and CPUID[0x80000006].EDX
  * @l3 can be NULL.
@@ -336,6 +344,41 @@  static void encode_cache_cpuid80000006(CPUCacheInfo *l2,
     }
 }
 
+/* Encode cache info for CPUID[8000001D] */
+static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, int nr_threads,
+                                uint32_t *eax, uint32_t *ebx,
+                                uint32_t *ecx, uint32_t *edx)
+{
+    assert(cache->size == cache->line_size * cache->associativity *
+                          cache->partitions * cache->sets);
+
+    *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) |
+               (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0);
+
+    /* L3 is shared among multiple cores */
+    if (cache->level == 3) {
+        *eax |= (NUM_SHARING_CACHE(nr_threads) << 14);
+    } else {
+        *eax |= ((nr_threads - 1) << 14);
+    }
+
+    assert(cache->line_size > 0);
+    assert(cache->partitions > 0);
+    assert(cache->associativity > 0);
+    /* We don't implement fully-associative caches */
+    assert(cache->associativity < cache->sets);
+    *ebx = (cache->line_size - 1) |
+           ((cache->partitions - 1) << 12) |
+           ((cache->associativity - 1) << 22);
+
+    assert(cache->sets > 0);
+    *ecx = cache->sets - 1;
+
+    *edx = (cache->no_invd_sharing ? CACHE_NO_INVD_SHARING : 0) |
+           (cache->inclusive ? CACHE_INCLUSIVE : 0) |
+           (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0);
+}
+
 /*
  * Definitions of the hardcoded cache entries we expose:
  * These are legacy cache values. If there is a need to change any
@@ -4035,6 +4078,42 @@  void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             *edx = 0;
         }
         break;
+    case 0x8000001D:
+        *eax = 0;
+        CPUCacheInfo *l1d, *l1i, *l2, *l3;
+        if (env->cache_info && !cpu->legacy_cache) {
+            l1d = &env->cache_info->l1d_cache;
+            l1i = &env->cache_info->l1i_cache;
+            l2 = &env->cache_info->l2_cache;
+            l3 = &env->cache_info->l3_cache;
+        } else {
+            l1d = &legacy_l1d_cache_amd;
+            l1i = &legacy_l1i_cache_amd;
+            l2 = &legacy_l2_cache_amd;
+            l3 = &legacy_l3_cache;
+        }
+        switch (count) {
+        case 0: /* L1 dcache info */
+            encode_cache_cpuid8000001d(l1d, cs->nr_threads,
+                                       eax, ebx, ecx, edx);
+            break;
+        case 1: /* L1 icache info */
+            encode_cache_cpuid8000001d(l1i, cs->nr_threads,
+                                       eax, ebx, ecx, edx);
+            break;
+        case 2: /* L2 cache info */
+            encode_cache_cpuid8000001d(l2, cs->nr_threads,
+                                       eax, ebx, ecx, edx);
+            break;
+        case 3: /* L3 cache info */
+            encode_cache_cpuid8000001d(l3, cs->nr_threads,
+                                       eax, ebx, ecx, edx);
+            break;
+        default: /* end of info */
+            *eax = *ebx = *ecx = *edx = 0;
+            break;
+        }
+        break;
     case 0xC0000000:
         *eax = env->cpuid_xlevel2;
         *ebx = 0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 6c49954e68..6e66f9c51d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -967,9 +967,32 @@  int kvm_arch_init_vcpu(CPUState *cs)
         }
         c = &cpuid_data.entries[cpuid_i++];
 
-        c->function = i;
-        c->flags = 0;
-        cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+        switch (i) {
+        case 0x8000001d:
+            /* Query for all AMD cache information leaves */
+            for (j = 0; ; j++) {
+                c->function = i;
+                c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+                c->index = j;
+                cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
+
+                if (c->eax == 0) {
+                    break;
+                }
+                if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+                    fprintf(stderr, "cpuid_data is full, no space for "
+                            "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+                    abort();
+                }
+                c = &cpuid_data.entries[cpuid_i++];
+            }
+            break;
+        default:
+            c->function = i;
+            c->flags = 0;
+            cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
+            break;
+        }
     }
 
     /* Call Centaur's CPUID instructions they are supported. */