diff mbox series

[RESEND,18/18] i386: Add new property to control L2 cache topo in CPUID.04H

Message ID 20230213093625.158170-19-zhao1.liu@linux.intel.com
State New
Headers show
Series Support smp.clusters for x86 | expand

Commit Message

Zhao Liu Feb. 13, 2023, 9:36 a.m. UTC
From: Zhao Liu <zhao1.liu@intel.com>

The property x-l2-cache-topo will be used to change the L2 cache
topology in CPUID.04H.

Now it allows user to set the L2 cache is shared in core level or
cluster level.

If user passes "-cpu x-l2-cache-topo=[core|cluster]" then older L2 cache
topology will be overrided by the new topology setting.

Here we expose to user "cluster" instead of "module", to be consistent
with "cluster-id" naming.

Since CPUID.04H is used by intel CPUs, this property is available on
intel CPUs as for now.

When necessary, it can be extended to CPUID.8000001DH for amd CPUs.

Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
---
 target/i386/cpu.c | 33 ++++++++++++++++++++++++++++++++-
 target/i386/cpu.h |  2 ++
 2 files changed, 34 insertions(+), 1 deletion(-)

Comments

Zhao Liu Feb. 17, 2023, 3:35 a.m. UTC | #1
On Thu, Feb 16, 2023 at 09:14:54PM +0800, wangyanan (Y) wrote:
> Date: Thu, 16 Feb 2023 21:14:54 +0800
> From: "wangyanan (Y)" <wangyanan55@huawei.com>
> Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
>  cache topo in CPUID.04H
> 
> 在 2023/2/13 17:36, Zhao Liu 写道:
> > From: Zhao Liu <zhao1.liu@intel.com>
> > 
> > The property x-l2-cache-topo will be used to change the L2 cache
> > topology in CPUID.04H.
> > 
> > Now it allows user to set the L2 cache is shared in core level or
> > cluster level.
> > 
> > If user passes "-cpu x-l2-cache-topo=[core|cluster]" then older L2 cache
> > topology will be overrided by the new topology setting.
> Currently x-l2-cache-topo only defines the share level *globally*.

Yes, will set for all CPUs.

> I'm thinking how we can make the property more powerful so that it
> can specify which CPUs share l2 on core level and which CPUs share
> l2 on cluster level.
> 
> What would Intel's Hybrid CPUs do? Determine the l2 share level
> is core or cluster according to the CPU core type (Atom or Core)?
> While ARM does not have the core type concept but have CPUs
> that l2 is shared on different levels in the same system.

For example, Alderlake's "core" shares 1 L2 per core and every 4 "atom"s
share 1 L2. For this case, we can set the topology as:

cluster0 has 1 "core" and cluster1 has 4 "atom". Then set L2 shared on
cluster level.

Since cluster0 has only 1 "core" type core, then L2 per "core" works.

Not sure if this idea can be applied to arm?

> 
> Thanks,
> Yanan
> > Here we expose to user "cluster" instead of "module", to be consistent
> > with "cluster-id" naming.
> > 
> > Since CPUID.04H is used by intel CPUs, this property is available on
> > intel CPUs as for now.
> > 
> > When necessary, it can be extended to CPUID.8000001DH for amd CPUs.
> > 
> > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > ---
> >   target/i386/cpu.c | 33 ++++++++++++++++++++++++++++++++-
> >   target/i386/cpu.h |  2 ++
> >   2 files changed, 34 insertions(+), 1 deletion(-)
> > 
> > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > index 5816dc99b1d4..cf84c720a431 100644
> > --- a/target/i386/cpu.c
> > +++ b/target/i386/cpu.c
> > @@ -240,12 +240,15 @@ static uint32_t max_processor_ids_for_cache(CPUCacheInfo *cache,
> >       case CORE:
> >           num_ids = 1 << apicid_core_offset(topo_info);
> >           break;
> > +    case MODULE:
> > +        num_ids = 1 << apicid_module_offset(topo_info);
> > +        break;
> >       case DIE:
> >           num_ids = 1 << apicid_die_offset(topo_info);
> >           break;
> >       default:
> >           /*
> > -         * Currently there is no use case for SMT, MODULE and PACKAGE, so use
> > +         * Currently there is no use case for SMT and PACKAGE, so use
> >            * assert directly to facilitate debugging.
> >            */
> >           g_assert_not_reached();
> > @@ -6633,6 +6636,33 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
> >           env->cache_info_amd.l3_cache = &legacy_l3_cache;
> >       }
> > +    if (cpu->l2_cache_topo_level) {
> > +        /*
> > +         * FIXME: Currently only supports changing CPUID[4] (for intel), and
> > +         * will support changing CPUID[0x8000001D] when necessary.
> > +         */
> > +        if (!IS_INTEL_CPU(env)) {
> > +            error_setg(errp, "only intel cpus supports x-l2-cache-topo");
> > +            return;
> > +        }
> > +
> > +        if (!strcmp(cpu->l2_cache_topo_level, "core")) {
> > +            env->cache_info_cpuid4.l2_cache->share_level = CORE;
> > +        } else if (!strcmp(cpu->l2_cache_topo_level, "cluster")) {
> > +            /*
> > +             * We expose to users "cluster" instead of "module", to be
> > +             * consistent with "cluster-id" naming.
> > +             */
> > +            env->cache_info_cpuid4.l2_cache->share_level = MODULE;
> > +        } else {
> > +            error_setg(errp,
> > +                       "x-l2-cache-topo doesn't support '%s', "
> > +                       "and it only supports 'core' or 'cluster'",
> > +                       cpu->l2_cache_topo_level);
> > +            return;
> > +        }
> > +    }
> > +
> >   #ifndef CONFIG_USER_ONLY
> >       MachineState *ms = MACHINE(qdev_get_machine());
> >       qemu_register_reset(x86_cpu_machine_reset_cb, cpu);
> > @@ -7135,6 +7165,7 @@ static Property x86_cpu_properties[] = {
> >                        false),
> >       DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level,
> >                        true),
> > +    DEFINE_PROP_STRING("x-l2-cache-topo", X86CPU, l2_cache_topo_level),
> >       DEFINE_PROP_END_OF_LIST()
> >   };
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index 5a955431f759..aa7e96c586c7 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1987,6 +1987,8 @@ struct ArchCPU {
> >       int32_t thread_id;
> >       int32_t hv_max_vps;
> > +
> > +    char *l2_cache_topo_level;
> >   };
>
Zhao Liu Feb. 17, 2023, 7:26 a.m. UTC | #2
On Fri, Feb 17, 2023 at 12:07:01PM +0800, wangyanan (Y) wrote:
> Date: Fri, 17 Feb 2023 12:07:01 +0800
> From: "wangyanan (Y)" <wangyanan55@huawei.com>
> Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
>  cache topo in CPUID.04H
> 
> 在 2023/2/17 11:35, Zhao Liu 写道:
> > On Thu, Feb 16, 2023 at 09:14:54PM +0800, wangyanan (Y) wrote:
> > > Date: Thu, 16 Feb 2023 21:14:54 +0800
> > > From: "wangyanan (Y)" <wangyanan55@huawei.com>
> > > Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
> > >   cache topo in CPUID.04H
> > > 
> > > 在 2023/2/13 17:36, Zhao Liu 写道:
> > > > From: Zhao Liu <zhao1.liu@intel.com>
> > > > 
> > > > The property x-l2-cache-topo will be used to change the L2 cache
> > > > topology in CPUID.04H.
> > > > 
> > > > Now it allows user to set the L2 cache is shared in core level or
> > > > cluster level.
> > > > 
> > > > If user passes "-cpu x-l2-cache-topo=[core|cluster]" then older L2 cache
> > > > topology will be overrided by the new topology setting.
> > > Currently x-l2-cache-topo only defines the share level *globally*.
> > Yes, will set for all CPUs.
> > 
> > > I'm thinking how we can make the property more powerful so that it
> > > can specify which CPUs share l2 on core level and which CPUs share
> > > l2 on cluster level.
> > > 
> > > What would Intel's Hybrid CPUs do? Determine the l2 share level
> > > is core or cluster according to the CPU core type (Atom or Core)?
> > > While ARM does not have the core type concept but have CPUs
> > > that l2 is shared on different levels in the same system.
> > For example, Alderlake's "core" shares 1 L2 per core and every 4 "atom"s
> > share 1 L2. For this case, we can set the topology as:
> > 
> > cluster0 has 1 "core" and cluster1 has 4 "atom". Then set L2 shared on
> > cluster level.
> > 
> > Since cluster0 has only 1 "core" type core, then L2 per "core" works.
> > 
> > Not sure if this idea can be applied to arm?
> For a CPU topopoly where we have 2 clusters totally, 2 cores in cluster0
> have their own L1/L2 cache and 2 threads in each core, 4 cores in cluster1
> share one L2 cache and 1 thread in each core. The global way does not
> work well.
> 
> What about defining something general, which looks like -numa config:
> -cache-topo cache=l2, share_level="core", cpus='0-3'
> -cache-topo cache=l2, share_level="cluster", cpus='4-7'

Hi Yanan, here it may be necessary to check whether the cpu index set
in "cpus" is reasonable through the specific cpu topology.

For example, core0 has 2 CPUs: cpu0 and cpu1, and core1 has 2 CPUs: cpu2
and cpu3, then set l2 as:

-cache-topo cache=l2, share_level="core", cpus='0-2'
-cache-topo cache=l2, share_level="core", cpus='3'

Whether this command is legal depends on the meaning we give to the
parameter "cpu":
1. If "cpu" means all cpus share the cache set in this command, then
this command should fail since cpu2 and cpu3 are in a core.

2. If "cpu" means the affected cpus, then this command should find the
cores they belong to according to the cpu topology, and set L2 for those
cores. This command may return success.

What about removing share_level and ask "cpu" to mean all the sharing
cpus to avoid checking the cpu topology?

Then the above example should be:

-cache-topo cache=l2, cpus='0-1'
-cache-topo cache=l2, cpus='2-3'

This decouples cpu topology and cache topology completely and very
simple. In this way, determining the cache by specifying the shared cpu
is similar to that in x86 CPUID.04H.

But the price of simplicity is we may build a cache topology that doesn't
match the reality.

But if the cache topology must be set based on the cpu topology, another
way is consider specifying the cache when setting the topology
structure, which can be based on @Daniel's format [1]:

  -object cpu-socket,id=sock0,cache=l3
  -object cpu-die,id=die0,parent=sock0
  -object cpu-cluster,id=cluster0,parent=die0
  -object cpu-cluster,id=cluster1,parent=die0,cache=l2
  -object x86-cpu-model-core,id=cpu0,parent=cluster0,threads=2,cache=l1i,lid,l2
  -object x86-cpu-model-atom,id=cpu1,parent=cluster1,cache=l1i,lid
  -object x86-cpu-model-atom,id=cpu2,parent=cluster1,cache=l1i,l1d

Then from this command, cpu0 has a l2, and cpu1 and cpu2 shares a l2
(the l2 is inserted in cluster1).

This whole process is like when designing or building a CPU, the user
decides where to insert the caches. The advantage is that it is easier
to verify the rationality and is intuitive. But complicated.

(Also CC @Daniel for comments).

[1]: https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg03320.html

Thanks,
Zhao

> If we ever want to support custom share-level for L3/L1, no extra work
> is needed. We can also extend the CLI to support custom cache size, etc..
> 
> If you thinks this a good idea to explore, I can work on it, since I'm
> planing to add support cache topology for ARM.
> 
> Thanks,
> Yanan
> > > Thanks,
> > > Yanan
> > > > Here we expose to user "cluster" instead of "module", to be consistent
> > > > with "cluster-id" naming.
> > > > 
> > > > Since CPUID.04H is used by intel CPUs, this property is available on
> > > > intel CPUs as for now.
> > > > 
> > > > When necessary, it can be extended to CPUID.8000001DH for amd CPUs.
> > > > 
> > > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > > > ---
> > > >    target/i386/cpu.c | 33 ++++++++++++++++++++++++++++++++-
> > > >    target/i386/cpu.h |  2 ++
> > > >    2 files changed, 34 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > > index 5816dc99b1d4..cf84c720a431 100644
> > > > --- a/target/i386/cpu.c
> > > > +++ b/target/i386/cpu.c
> > > > @@ -240,12 +240,15 @@ static uint32_t max_processor_ids_for_cache(CPUCacheInfo *cache,
> > > >        case CORE:
> > > >            num_ids = 1 << apicid_core_offset(topo_info);
> > > >            break;
> > > > +    case MODULE:
> > > > +        num_ids = 1 << apicid_module_offset(topo_info);
> > > > +        break;
> > > >        case DIE:
> > > >            num_ids = 1 << apicid_die_offset(topo_info);
> > > >            break;
> > > >        default:
> > > >            /*
> > > > -         * Currently there is no use case for SMT, MODULE and PACKAGE, so use
> > > > +         * Currently there is no use case for SMT and PACKAGE, so use
> > > >             * assert directly to facilitate debugging.
> > > >             */
> > > >            g_assert_not_reached();
> > > > @@ -6633,6 +6636,33 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
> > > >            env->cache_info_amd.l3_cache = &legacy_l3_cache;
> > > >        }
> > > > +    if (cpu->l2_cache_topo_level) {
> > > > +        /*
> > > > +         * FIXME: Currently only supports changing CPUID[4] (for intel), and
> > > > +         * will support changing CPUID[0x8000001D] when necessary.
> > > > +         */
> > > > +        if (!IS_INTEL_CPU(env)) {
> > > > +            error_setg(errp, "only intel cpus supports x-l2-cache-topo");
> > > > +            return;
> > > > +        }
> > > > +
> > > > +        if (!strcmp(cpu->l2_cache_topo_level, "core")) {
> > > > +            env->cache_info_cpuid4.l2_cache->share_level = CORE;
> > > > +        } else if (!strcmp(cpu->l2_cache_topo_level, "cluster")) {
> > > > +            /*
> > > > +             * We expose to users "cluster" instead of "module", to be
> > > > +             * consistent with "cluster-id" naming.
> > > > +             */
> > > > +            env->cache_info_cpuid4.l2_cache->share_level = MODULE;
> > > > +        } else {
> > > > +            error_setg(errp,
> > > > +                       "x-l2-cache-topo doesn't support '%s', "
> > > > +                       "and it only supports 'core' or 'cluster'",
> > > > +                       cpu->l2_cache_topo_level);
> > > > +            return;
> > > > +        }
> > > > +    }
> > > > +
> > > >    #ifndef CONFIG_USER_ONLY
> > > >        MachineState *ms = MACHINE(qdev_get_machine());
> > > >        qemu_register_reset(x86_cpu_machine_reset_cb, cpu);
> > > > @@ -7135,6 +7165,7 @@ static Property x86_cpu_properties[] = {
> > > >                         false),
> > > >        DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level,
> > > >                         true),
> > > > +    DEFINE_PROP_STRING("x-l2-cache-topo", X86CPU, l2_cache_topo_level),
> > > >        DEFINE_PROP_END_OF_LIST()
> > > >    };
> > > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > > > index 5a955431f759..aa7e96c586c7 100644
> > > > --- a/target/i386/cpu.h
> > > > +++ b/target/i386/cpu.h
> > > > @@ -1987,6 +1987,8 @@ struct ArchCPU {
> > > >        int32_t thread_id;
> > > >        int32_t hv_max_vps;
> > > > +
> > > > +    char *l2_cache_topo_level;
> > > >    };
>
Zhao Liu Feb. 20, 2023, 2:49 a.m. UTC | #3
On Fri, Feb 17, 2023 at 05:08:31PM +0800, wangyanan (Y) wrote:
> Date: Fri, 17 Feb 2023 17:08:31 +0800
> From: "wangyanan (Y)" <wangyanan55@huawei.com>
> Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
>  cache topo in CPUID.04H
> 
> 在 2023/2/17 15:26, Zhao Liu 写道:
> > On Fri, Feb 17, 2023 at 12:07:01PM +0800, wangyanan (Y) wrote:
> > > Date: Fri, 17 Feb 2023 12:07:01 +0800
> > > From: "wangyanan (Y)" <wangyanan55@huawei.com>
> > > Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
> > >   cache topo in CPUID.04H
> > > 
> > > 在 2023/2/17 11:35, Zhao Liu 写道:
> > > > On Thu, Feb 16, 2023 at 09:14:54PM +0800, wangyanan (Y) wrote:
> > > > > Date: Thu, 16 Feb 2023 21:14:54 +0800
> > > > > From: "wangyanan (Y)" <wangyanan55@huawei.com>
> > > > > Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
> > > > >    cache topo in CPUID.04H
> > > > > 
> > > > > 在 2023/2/13 17:36, Zhao Liu 写道:
> > > > > > From: Zhao Liu <zhao1.liu@intel.com>
> > > > > > 
> > > > > > The property x-l2-cache-topo will be used to change the L2 cache
> > > > > > topology in CPUID.04H.
> > > > > > 
> > > > > > Now it allows user to set the L2 cache is shared in core level or
> > > > > > cluster level.
> > > > > > 
> > > > > > If user passes "-cpu x-l2-cache-topo=[core|cluster]" then older L2 cache
> > > > > > topology will be overrided by the new topology setting.
> > > > > Currently x-l2-cache-topo only defines the share level *globally*.
> > > > Yes, will set for all CPUs.
> > > > 
> > > > > I'm thinking how we can make the property more powerful so that it
> > > > > can specify which CPUs share l2 on core level and which CPUs share
> > > > > l2 on cluster level.
> > > > > 
> > > > > What would Intel's Hybrid CPUs do? Determine the l2 share level
> > > > > is core or cluster according to the CPU core type (Atom or Core)?
> > > > > While ARM does not have the core type concept but have CPUs
> > > > > that l2 is shared on different levels in the same system.
> > > > For example, Alderlake's "core" shares 1 L2 per core and every 4 "atom"s
> > > > share 1 L2. For this case, we can set the topology as:
> > > > 
> > > > cluster0 has 1 "core" and cluster1 has 4 "atom". Then set L2 shared on
> > > > cluster level.
> > > > 
> > > > Since cluster0 has only 1 "core" type core, then L2 per "core" works.
> > > > 
> > > > Not sure if this idea can be applied to arm?
> > > For a CPU topopoly where we have 2 clusters totally, 2 cores in cluster0
> > > have their own L1/L2 cache and 2 threads in each core, 4 cores in cluster1
> > > share one L2 cache and 1 thread in each core. The global way does not
> > > work well.
> > > 
> > > What about defining something general, which looks like -numa config:
> > > -cache-topo cache=l2, share_level="core", cpus='0-3'
> > > -cache-topo cache=l2, share_level="cluster", cpus='4-7'
> > Hi Yanan, here it may be necessary to check whether the cpu index set
> > in "cpus" is reasonable through the specific cpu topology.
> Yes, the validity of the cache topo configs from the users should be
> check in machine_parse_cache_topo ( if we will have this func).
> It's not a big deal, we always need the validity checks.

I guess that verification needs to build up the full cpu topology, as
done in another hybrid RFC. So, should the cpu-topology.h related
patches in that RFC be split out and sent first?

> 
> In summary:
> 1、There can not be the same cpus in different "cpus" list.
> 2、A combination of all the "cpus" list should *just* cover all the CPUs
> in the machine
> 3、Most importantly, cpus in the same cluster must be set with the
> same cache "share_level" (core or cluster) and cpus in the same core
> must also be set with the same cache "share_level".

Got it, thx.

> > For example, core0 has 2 CPUs: cpu0 and cpu1, and core1 has 2 CPUs: cpu2
> > and cpu3, then set l2 as:
> > 
> > -cache-topo cache=l2, share_level="core", cpus='0-2'
> > -cache-topo cache=l2, share_level="core", cpus='3'
> > 
> > Whether this command is legal depends on the meaning we give to the
> > parameter "cpu":
> s/cpus/cpu.
> It means all the afftected CPUs, e.g, the second case.
> > 1. If "cpu" means all cpus share the cache set in this command, then
> > this command should fail since cpu2 and cpu3 are in a core.
> > 
> > 2. If "cpu" means the affected cpus, then this command should find the
> > cores they belong to according to the cpu topology, and set L2 for those
> > cores. This command may return success.
> > 
> > What about removing share_level and ask "cpu" to mean all the sharing
> > cpus to avoid checking the cpu topology?
> > 
> > Then the above example should be:
> > 
> > -cache-topo cache=l2, cpus='0-1'
> > -cache-topo cache=l2, cpus='2-3'
> Sorry, I dont understand how we will know the cache share_level of
> cpus '0-1' or '2-3'. What will the CLIs will be like if we change the
> belows CLIs by removing the "share_level" params.
> 
> -cache-topo cache=l2, share_level="core", cpus='0-3'
> -cache-topo cache=l2, share_level="cluster", cpus='4-7'
> > This decouples cpu topology and cache topology completely and very
> > simple. In this way, determining the cache by specifying the shared cpu
> > is similar to that in x86 CPUID.04H.
> > 
> > But the price of simplicity is we may build a cache topology that doesn't
> > match the reality.
> > 
> > But if the cache topology must be set based on the cpu topology, another
> > way is consider specifying the cache when setting the topology
> > structure, which can be based on @Daniel's format [1]:
> > 
> >    -object cpu-socket,id=sock0,cache=l3
> >    -object cpu-die,id=die0,parent=sock0
> >    -object cpu-cluster,id=cluster0,parent=die0
> >    -object cpu-cluster,id=cluster1,parent=die0,cache=l2
> >    -object x86-cpu-model-core,id=cpu0,parent=cluster0,threads=2,cache=l1i,lid,l2
> >    -object x86-cpu-model-atom,id=cpu1,parent=cluster1,cache=l1i,lid
> >    -object x86-cpu-model-atom,id=cpu2,parent=cluster1,cache=l1i,l1d
> > 
> > Then from this command, cpu0 has a l2, and cpu1 and cpu2 shares a l2
> > (the l2 is inserted in cluster1).
> > 
> > This whole process is like when designing or building a CPU, the user
> > decides where to insert the caches. The advantage is that it is easier
> > to verify the rationality and is intuitive. But complicated.
> Yeah, this is also a way.
> Most of the concern is that it will not be easy/readable to extand the
> cache configs, for example if we ever want to support custom cache size,
> cache type or other cache properties in the future. And yes, will also
> complex the -objects by making them huge.
> 
> I think keeping cache and cpu configs decouped will leave simplicity
> to the users, just like we keep numa resources config from cpu
> topology currently.
> 
> On the other hand, -cache-topo is not just for hybrid, it's also for
> current smp, which make it inappropriate to bind -cache-topo
> with hybrid case. For example, "-cache-topo name=l2, share_level=cluster"
> will indicates that l2 cache is shared on cluster level globally. And this
> is
> "x-l2-cache-topo" in this patch does.
> 
> Thanks,
> Yanan
> > (Also CC @Daniel for comments).
> > 
> > [1]: https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg03320.html
> > 
> > Thanks,
> > Zhao
> > 
> > > If we ever want to support custom share-level for L3/L1, no extra work
> > > is needed. We can also extend the CLI to support custom cache size, etc..
> > > 
> > > If you thinks this a good idea to explore, I can work on it, since I'm
> > > planing to add support cache topology for ARM.
> > > 
> > > Thanks,
> > > Yanan
> > > > > Thanks,
> > > > > Yanan
> > > > > > Here we expose to user "cluster" instead of "module", to be consistent
> > > > > > with "cluster-id" naming.
> > > > > > 
> > > > > > Since CPUID.04H is used by intel CPUs, this property is available on
> > > > > > intel CPUs as for now.
> > > > > > 
> > > > > > When necessary, it can be extended to CPUID.8000001DH for amd CPUs.
> > > > > > 
> > > > > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > > > > > ---
> > > > > >     target/i386/cpu.c | 33 ++++++++++++++++++++++++++++++++-
> > > > > >     target/i386/cpu.h |  2 ++
> > > > > >     2 files changed, 34 insertions(+), 1 deletion(-)
> > > > > > 
> > > > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > > > > index 5816dc99b1d4..cf84c720a431 100644
> > > > > > --- a/target/i386/cpu.c
> > > > > > +++ b/target/i386/cpu.c
> > > > > > @@ -240,12 +240,15 @@ static uint32_t max_processor_ids_for_cache(CPUCacheInfo *cache,
> > > > > >         case CORE:
> > > > > >             num_ids = 1 << apicid_core_offset(topo_info);
> > > > > >             break;
> > > > > > +    case MODULE:
> > > > > > +        num_ids = 1 << apicid_module_offset(topo_info);
> > > > > > +        break;
> > > > > >         case DIE:
> > > > > >             num_ids = 1 << apicid_die_offset(topo_info);
> > > > > >             break;
> > > > > >         default:
> > > > > >             /*
> > > > > > -         * Currently there is no use case for SMT, MODULE and PACKAGE, so use
> > > > > > +         * Currently there is no use case for SMT and PACKAGE, so use
> > > > > >              * assert directly to facilitate debugging.
> > > > > >              */
> > > > > >             g_assert_not_reached();
> > > > > > @@ -6633,6 +6636,33 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
> > > > > >             env->cache_info_amd.l3_cache = &legacy_l3_cache;
> > > > > >         }
> > > > > > +    if (cpu->l2_cache_topo_level) {
> > > > > > +        /*
> > > > > > +         * FIXME: Currently only supports changing CPUID[4] (for intel), and
> > > > > > +         * will support changing CPUID[0x8000001D] when necessary.
> > > > > > +         */
> > > > > > +        if (!IS_INTEL_CPU(env)) {
> > > > > > +            error_setg(errp, "only intel cpus supports x-l2-cache-topo");
> > > > > > +            return;
> > > > > > +        }
> > > > > > +
> > > > > > +        if (!strcmp(cpu->l2_cache_topo_level, "core")) {
> > > > > > +            env->cache_info_cpuid4.l2_cache->share_level = CORE;
> > > > > > +        } else if (!strcmp(cpu->l2_cache_topo_level, "cluster")) {
> > > > > > +            /*
> > > > > > +             * We expose to users "cluster" instead of "module", to be
> > > > > > +             * consistent with "cluster-id" naming.
> > > > > > +             */
> > > > > > +            env->cache_info_cpuid4.l2_cache->share_level = MODULE;
> > > > > > +        } else {
> > > > > > +            error_setg(errp,
> > > > > > +                       "x-l2-cache-topo doesn't support '%s', "
> > > > > > +                       "and it only supports 'core' or 'cluster'",
> > > > > > +                       cpu->l2_cache_topo_level);
> > > > > > +            return;
> > > > > > +        }
> > > > > > +    }
> > > > > > +
> > > > > >     #ifndef CONFIG_USER_ONLY
> > > > > >         MachineState *ms = MACHINE(qdev_get_machine());
> > > > > >         qemu_register_reset(x86_cpu_machine_reset_cb, cpu);
> > > > > > @@ -7135,6 +7165,7 @@ static Property x86_cpu_properties[] = {
> > > > > >                          false),
> > > > > >         DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level,
> > > > > >                          true),
> > > > > > +    DEFINE_PROP_STRING("x-l2-cache-topo", X86CPU, l2_cache_topo_level),
> > > > > >         DEFINE_PROP_END_OF_LIST()
> > > > > >     };
> > > > > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > > > > > index 5a955431f759..aa7e96c586c7 100644
> > > > > > --- a/target/i386/cpu.h
> > > > > > +++ b/target/i386/cpu.h
> > > > > > @@ -1987,6 +1987,8 @@ struct ArchCPU {
> > > > > >         int32_t thread_id;
> > > > > >         int32_t hv_max_vps;
> > > > > > +
> > > > > > +    char *l2_cache_topo_level;
> > > > > >     };
>
Zhao Liu Feb. 20, 2023, 2:54 a.m. UTC | #4
On Fri, Feb 17, 2023 at 11:45:58AM +0800, wangyanan (Y) wrote:
> Date: Fri, 17 Feb 2023 11:45:58 +0800
> From: "wangyanan (Y)" <wangyanan55@huawei.com>
> Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
>  cache topo in CPUID.04H
> 
> 在 2023/2/17 11:35, Zhao Liu 写道:
> > On Thu, Feb 16, 2023 at 09:14:54PM +0800, wangyanan (Y) wrote:
> > > Date: Thu, 16 Feb 2023 21:14:54 +0800
> > > From: "wangyanan (Y)" <wangyanan55@huawei.com>
> > > Subject: Re: [PATCH RESEND 18/18] i386: Add new property to control L2
> > >   cache topo in CPUID.04H
> > > 
> > > 在 2023/2/13 17:36, Zhao Liu 写道:
> > > > From: Zhao Liu <zhao1.liu@intel.com>
> > > > 
> > > > The property x-l2-cache-topo will be used to change the L2 cache
> > > > topology in CPUID.04H.
> > > > 
> > > > Now it allows user to set the L2 cache is shared in core level or
> > > > cluster level.
> > > > 
> > > > If user passes "-cpu x-l2-cache-topo=[core|cluster]" then older L2 cache
> > > > topology will be overrided by the new topology setting.
> > > Currently x-l2-cache-topo only defines the share level *globally*.
> > Yes, will set for all CPUs.
> > 
> > > I'm thinking how we can make the property more powerful so that it
> > > can specify which CPUs share l2 on core level and which CPUs share
> > > l2 on cluster level.
> > > 
> > > What would Intel's Hybrid CPUs do? Determine the l2 share level
> > > is core or cluster according to the CPU core type (Atom or Core)?
> > > While ARM does not have the core type concept but have CPUs
> > > that l2 is shared on different levels in the same system.
> > For example, Alderlake's "core" shares 1 L2 per core and every 4 "atom"s
> > share 1 L2. For this case, we can set the topology as:
> > 
> > cluster0 has 1 "core" and cluster1 has 4 "atom". Then set L2 shared on
> > cluster level.
> > 
> > Since cluster0 has only 1 "core" type core, then L2 per "core" works.
> This brings restriction to the users that cluster0 must have *1* core-type
> core.
> What if we set 2 vCores in cluster0 and 4 vCores in cluster1,  and bind
> cores in
> cluster0 to 2 core-type pCores and bind cores in cluster1 to 4 atom-type
> pCores?I think this is a necessary use case too.

At present, the cache topology level and core type are not bound, so the
cache topology level can also be adjusted for any vCores.

> > Not sure if this idea can be applied to arm?
> > 
> > > Thanks,
> > > Yanan
> > > > Here we expose to user "cluster" instead of "module", to be consistent
> > > > with "cluster-id" naming.
> > > > 
> > > > Since CPUID.04H is used by intel CPUs, this property is available on
> > > > intel CPUs as for now.
> > > > 
> > > > When necessary, it can be extended to CPUID.8000001DH for amd CPUs.
> > > > 
> > > > Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
> > > > ---
> > > >    target/i386/cpu.c | 33 ++++++++++++++++++++++++++++++++-
> > > >    target/i386/cpu.h |  2 ++
> > > >    2 files changed, 34 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> > > > index 5816dc99b1d4..cf84c720a431 100644
> > > > --- a/target/i386/cpu.c
> > > > +++ b/target/i386/cpu.c
> > > > @@ -240,12 +240,15 @@ static uint32_t max_processor_ids_for_cache(CPUCacheInfo *cache,
> > > >        case CORE:
> > > >            num_ids = 1 << apicid_core_offset(topo_info);
> > > >            break;
> > > > +    case MODULE:
> > > > +        num_ids = 1 << apicid_module_offset(topo_info);
> > > > +        break;
> > > >        case DIE:
> > > >            num_ids = 1 << apicid_die_offset(topo_info);
> > > >            break;
> > > >        default:
> > > >            /*
> > > > -         * Currently there is no use case for SMT, MODULE and PACKAGE, so use
> > > > +         * Currently there is no use case for SMT and PACKAGE, so use
> > > >             * assert directly to facilitate debugging.
> > > >             */
> > > >            g_assert_not_reached();
> > > > @@ -6633,6 +6636,33 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
> > > >            env->cache_info_amd.l3_cache = &legacy_l3_cache;
> > > >        }
> > > > +    if (cpu->l2_cache_topo_level) {
> > > > +        /*
> > > > +         * FIXME: Currently only supports changing CPUID[4] (for intel), and
> > > > +         * will support changing CPUID[0x8000001D] when necessary.
> > > > +         */
> > > > +        if (!IS_INTEL_CPU(env)) {
> > > > +            error_setg(errp, "only intel cpus supports x-l2-cache-topo");
> > > > +            return;
> > > > +        }
> > > > +
> > > > +        if (!strcmp(cpu->l2_cache_topo_level, "core")) {
> > > > +            env->cache_info_cpuid4.l2_cache->share_level = CORE;
> > > > +        } else if (!strcmp(cpu->l2_cache_topo_level, "cluster")) {
> > > > +            /*
> > > > +             * We expose to users "cluster" instead of "module", to be
> > > > +             * consistent with "cluster-id" naming.
> > > > +             */
> > > > +            env->cache_info_cpuid4.l2_cache->share_level = MODULE;
> > > > +        } else {
> > > > +            error_setg(errp,
> > > > +                       "x-l2-cache-topo doesn't support '%s', "
> > > > +                       "and it only supports 'core' or 'cluster'",
> > > > +                       cpu->l2_cache_topo_level);
> > > > +            return;
> > > > +        }
> > > > +    }
> > > > +
> > > >    #ifndef CONFIG_USER_ONLY
> > > >        MachineState *ms = MACHINE(qdev_get_machine());
> > > >        qemu_register_reset(x86_cpu_machine_reset_cb, cpu);
> > > > @@ -7135,6 +7165,7 @@ static Property x86_cpu_properties[] = {
> > > >                         false),
> > > >        DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level,
> > > >                         true),
> > > > +    DEFINE_PROP_STRING("x-l2-cache-topo", X86CPU, l2_cache_topo_level),
> > > >        DEFINE_PROP_END_OF_LIST()
> > > >    };
> > > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > > > index 5a955431f759..aa7e96c586c7 100644
> > > > --- a/target/i386/cpu.h
> > > > +++ b/target/i386/cpu.h
> > > > @@ -1987,6 +1987,8 @@ struct ArchCPU {
> > > >        int32_t thread_id;
> > > >        int32_t hv_max_vps;
> > > > +
> > > > +    char *l2_cache_topo_level;
> > > >    };
>
diff mbox series

Patch

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 5816dc99b1d4..cf84c720a431 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -240,12 +240,15 @@  static uint32_t max_processor_ids_for_cache(CPUCacheInfo *cache,
     case CORE:
         num_ids = 1 << apicid_core_offset(topo_info);
         break;
+    case MODULE:
+        num_ids = 1 << apicid_module_offset(topo_info);
+        break;
     case DIE:
         num_ids = 1 << apicid_die_offset(topo_info);
         break;
     default:
         /*
-         * Currently there is no use case for SMT, MODULE and PACKAGE, so use
+         * Currently there is no use case for SMT and PACKAGE, so use
          * assert directly to facilitate debugging.
          */
         g_assert_not_reached();
@@ -6633,6 +6636,33 @@  static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
         env->cache_info_amd.l3_cache = &legacy_l3_cache;
     }
 
+    if (cpu->l2_cache_topo_level) {
+        /*
+         * FIXME: Currently only supports changing CPUID[4] (for intel), and
+         * will support changing CPUID[0x8000001D] when necessary.
+         */
+        if (!IS_INTEL_CPU(env)) {
+            error_setg(errp, "only intel cpus supports x-l2-cache-topo");
+            return;
+        }
+
+        if (!strcmp(cpu->l2_cache_topo_level, "core")) {
+            env->cache_info_cpuid4.l2_cache->share_level = CORE;
+        } else if (!strcmp(cpu->l2_cache_topo_level, "cluster")) {
+            /*
+             * We expose to users "cluster" instead of "module", to be
+             * consistent with "cluster-id" naming.
+             */
+            env->cache_info_cpuid4.l2_cache->share_level = MODULE;
+        } else {
+            error_setg(errp,
+                       "x-l2-cache-topo doesn't support '%s', "
+                       "and it only supports 'core' or 'cluster'",
+                       cpu->l2_cache_topo_level);
+            return;
+        }
+    }
+
 #ifndef CONFIG_USER_ONLY
     MachineState *ms = MACHINE(qdev_get_machine());
     qemu_register_reset(x86_cpu_machine_reset_cb, cpu);
@@ -7135,6 +7165,7 @@  static Property x86_cpu_properties[] = {
                      false),
     DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level,
                      true),
+    DEFINE_PROP_STRING("x-l2-cache-topo", X86CPU, l2_cache_topo_level),
     DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 5a955431f759..aa7e96c586c7 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1987,6 +1987,8 @@  struct ArchCPU {
     int32_t thread_id;
 
     int32_t hv_max_vps;
+
+    char *l2_cache_topo_level;
 };