diff mbox series

[1/2] powerpc: Detect the presence of big-core with interleaved threads

Message ID 1526037444-22876-2-git-send-email-ego@linux.vnet.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series powerpc: Scheduler optimization for POWER9 bigcores | expand

Commit Message

Gautham R Shenoy May 11, 2018, 11:17 a.m. UTC
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

A pair of IBM POWER9 SMT4 cores can be fused together to form a
big-core with 8 SMT threads. This can be discovered via the
"ibm,thread-groups" CPU property in the device tree which will
indicate which group of threads that share the L1 cache, translation
cache and instruction data flow.  If there are multiple such group of
threads, then the core is a big-core. The thread-ids of the threads of
the big-core can be obtained by interleaving the thread-ids of the
thread-groups (component small core).

Eg: Threads in the pair of component SMT4 cores of an interleaved
big-core are numbered {0,2,4,6} and {1,3,5,7} respectively.

This patch introduces a function to check if a given device tree node
corresponding to a CPU node represents an interleaved big-core.

This function is invoked during the boot-up to detect the presence of
interleaved big-cores. The presence of such an interleaved big-core is
recorded in a global variable for later use.

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/cputhreads.h |  8 +++--
 arch/powerpc/kernel/setup-common.c    | 63 +++++++++++++++++++++++++++++++++--
 2 files changed, 66 insertions(+), 5 deletions(-)

Comments

Michael Neuling May 14, 2018, 3:21 a.m. UTC | #1
Thanks for posting this... A couple of comments below.

On Fri, 2018-05-11 at 16:47 +0530, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> A pair of IBM POWER9 SMT4 cores can be fused together to form a
> big-core with 8 SMT threads. This can be discovered via the
> "ibm,thread-groups" CPU property in the device tree which will
> indicate which group of threads that share the L1 cache, translation
> cache and instruction data flow.  If there are multiple such group of
> threads, then the core is a big-core. The thread-ids of the threads of
> the big-core can be obtained by interleaving the thread-ids of the
> thread-groups (component small core).
> 
> Eg: Threads in the pair of component SMT4 cores of an interleaved
> big-core are numbered {0,2,4,6} and {1,3,5,7} respectively.
> 
> This patch introduces a function to check if a given device tree node
> corresponding to a CPU node represents an interleaved big-core.
> 
> This function is invoked during the boot-up to detect the presence of
> interleaved big-cores. The presence of such an interleaved big-core is
> recorded in a global variable for later use.
> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/cputhreads.h |  8 +++--
>  arch/powerpc/kernel/setup-common.c    | 63 +++++++++++++++++++++++++++++++++-
> -
>  2 files changed, 66 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/cputhreads.h
> b/arch/powerpc/include/asm/cputhreads.h
> index d71a909..b706f0a 100644
> --- a/arch/powerpc/include/asm/cputhreads.h
> +++ b/arch/powerpc/include/asm/cputhreads.h
> @@ -23,11 +23,13 @@
>  extern int threads_per_core;
>  extern int threads_per_subcore;
>  extern int threads_shift;
> +extern bool has_interleaved_big_core;
>  extern cpumask_t threads_core_mask;
>  #else
> -#define threads_per_core	1
> -#define threads_per_subcore	1
> -#define threads_shift		0
> +#define threads_per_core		1
> +#define threads_per_subcore		1
> +#define threads_shift			0
> +#define has_interleaved_big_core	0
>  #define threads_core_mask	(*get_cpu_mask(0))
>  #endif
>  
> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-
> common.c
> index 0af5c11..884dff2 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -408,10 +408,12 @@ void __init check_for_initrd(void)
>  #ifdef CONFIG_SMP
>  
>  int threads_per_core, threads_per_subcore, threads_shift;
> +bool has_interleaved_big_core;
>  cpumask_t threads_core_mask;
>  EXPORT_SYMBOL_GPL(threads_per_core);
>  EXPORT_SYMBOL_GPL(threads_per_subcore);
>  EXPORT_SYMBOL_GPL(threads_shift);
> +EXPORT_SYMBOL_GPL(has_interleaved_big_core);
>  EXPORT_SYMBOL_GPL(threads_core_mask);
>  
>  static void __init cpu_init_thread_core_maps(int tpc)
> @@ -436,8 +438,56 @@ static void __init cpu_init_thread_core_maps(int tpc)
>  	printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
>  }
>  
> -
>  u32 *cpu_to_phys_id = NULL;
> +/*
> + * check_for_interleaved_big_core - Checks if the core represented by
> + *	 dn is a big-core whose threads are interleavings of the
> + *	 threads of the component small cores.
> + *
> + * @dn: device node corresponding to the core.
> + *
> + * Returns true if the core is a interleaved big-core.
> + * Returns false otherwise.
> + */
> +static inline bool check_for_interleaved_big_core(struct device_node *dn)
> +{
> +	int len, nr_groups, threads_per_group;
> +	const __be32 *thread_groups;
> +	__be32 *thread_list, *first_cpu_idx;
> +	int cur_cpu, next_cpu, i, j;
> +
> +	thread_groups = of_get_property(dn, "ibm,thread-groups", &len);
> +	if (!thread_groups)
> +		return false;

Can you document what this property looks like? Seems to be nr_groups,
threads_per_group, thread_list. Can you explain what each of these mean?

If we get configured with an SMT2 big-core (ie. two interleaved SMT1 normal
cores), will this code also work there?

> +
> +	nr_groups = be32_to_cpu(*(thread_groups + 1));
> +	if (nr_groups <= 1)
> +		return false;
> +
> +	threads_per_group = be32_to_cpu(*(thread_groups + 2));
> +	thread_list = (__be32 *)thread_groups + 3;
> +
> +	/*
> +	 * In case of an interleaved big-core, the thread-ids of the
> +	 * big-core can be obtained by interleaving the the thread-ids
> +	 * of the component small
> +	 *
> +	 * Eg: On a 8-thread big-core with two SMT4 small cores, the
> +	 * threads of the two component small cores will be
> +	 * {0, 2, 4, 6} and {1, 3, 5, 7}.
> +	 */
> +	for (i = 0; i < nr_groups; i++) {
> +		first_cpu_idx = thread_list + i * threads_per_group;
> +
> +		for (j = 0; j < threads_per_group - 1; j++) {
> +			cur_cpu = be32_to_cpu(*(first_cpu_idx + j));
> +			next_cpu = be32_to_cpu(*(first_cpu_idx + j + 1));
> +			if (next_cpu != cur_cpu + nr_groups)
> +				return false;
> +		}
> +	}
> +	return true;
> +}
>  
>  /**
>   * setup_cpu_maps - initialize the following cpu maps:
> @@ -565,7 +615,16 @@ void __init smp_setup_cpu_maps(void)
>  	vdso_data->processorCount = num_present_cpus();
>  #endif /* CONFIG_PPC64 */
>  
> -        /* Initialize CPU <=> thread mapping/
> +	dn = of_find_node_by_type(NULL, "cpu");
> +	if (dn) {
> +		if (check_for_interleaved_big_core(dn)) {
> +			has_interleaved_big_core = true;
> +			pr_info("Detected interleaved big-cores\n");

Is there a runtime way to check this also?  If the dmesg buffer overflows, we
lose this.

Mikey

> +		}
> +		of_node_put(dn);
> +	}
> +
> +	/* Initialize CPU <=> thread mapping/
>  	 *
>  	 * WARNING: We assume that the number of threads is the same for
>  	 * every CPU in the system. If that is not the case, then some code
Gautham R Shenoy May 16, 2018, 4:35 a.m. UTC | #2
Hi Mikey,
On Mon, May 14, 2018 at 01:21:11PM +1000, Michael Neuling wrote:
> Thanks for posting this... A couple of comments below.

Thanks for the review. Replies below.

> > +/*
> > + * check_for_interleaved_big_core - Checks if the core represented by
> > + *	 dn is a big-core whose threads are interleavings of the
> > + *	 threads of the component small cores.
> > + *
> > + * @dn: device node corresponding to the core.
> > + *
> > + * Returns true if the core is a interleaved big-core.
> > + * Returns false otherwise.
> > + */
> > +static inline bool check_for_interleaved_big_core(struct device_node *dn)
> > +{
> > +	int len, nr_groups, threads_per_group;
> > +	const __be32 *thread_groups;
> > +	__be32 *thread_list, *first_cpu_idx;
> > +	int cur_cpu, next_cpu, i, j;
> > +
> > +	thread_groups = of_get_property(dn, "ibm,thread-groups", &len);
> > +	if (!thread_groups)
> > +		return false;
> 
> Can you document what this property looks like? Seems to be nr_groups,
> threads_per_group, thread_list. Can you explain what each of these
> > mean?

Sure. I will document this in the next version of the patch.

ibm,thread-groups[0..N-1] array defines which group of threads in the
CPU-device node can be grouped together based on the property.

ibm,thread-groups[0] tells us the property based on which the threads
are being grouped together. If this value is 1, it implies that
the threads in the same group share L1, translation cache.

ibm,thread-groups[1] tells us how many such thread groups exist.
ibm,thread-groups[2] tells us the number of threads in each such group.
ibm,thread-groups[3..N-1] is the list of threads identified by
"ibm,ppc-interrupt-server#s" arranged as per their membership in the
grouping.

Example: If ibm,thread-groups[ ] = {1,2,4,5,6,7,8,9,10,11,12}
it implies that there are 2 groups of 4 threads each, where each group
of threads share L1, translation cache.

The "ibm,ppc-interrupt-server#s" of the first group is  {5,6,7,8} and
the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, 11, 12}

> 
> If we get configured with an SMT2 big-core (ie. two interleaved SMT1 normal
> cores), will this code also work there?

No, this code won't work there. I hadn't considered the case where
each group consists of only one thread. Thanks for pointing this out.

> 
> > +
> > +	nr_groups = be32_to_cpu(*(thread_groups + 1));
> > +	if (nr_groups <= 1)
> > +		return false;
> > +
> > +	threads_per_group = be32_to_cpu(*(thread_groups + 2));
> > +	thread_list = (__be32 *)thread_groups + 3;
> > +
> > +	/*
> > +	 * In case of an interleaved big-core, the thread-ids of the
> > +	 * big-core can be obtained by interleaving the the thread-ids
> > +	 * of the component small
> > +	 *
> > +	 * Eg: On a 8-thread big-core with two SMT4 small cores, the
> > +	 * threads of the two component small cores will be
> > +	 * {0, 2, 4, 6} and {1, 3, 5, 7}.
> > +	 */
> > +	for (i = 0; i < nr_groups; i++) {
> > +		first_cpu_idx = thread_list + i * threads_per_group;
> > +
> > +		for (j = 0; j < threads_per_group - 1; j++) {
> > +			cur_cpu = be32_to_cpu(*(first_cpu_idx + j));
> > +			next_cpu = be32_to_cpu(*(first_cpu_idx + j + 1));
> > +			if (next_cpu != cur_cpu + nr_groups)
> > +				return false;
> > +		}
> > +	}
> > +	return true;
> > +}
> >  
> >  /**
> >   * setup_cpu_maps - initialize the following cpu maps:
> > @@ -565,7 +615,16 @@ void __init smp_setup_cpu_maps(void)
> >  	vdso_data->processorCount = num_present_cpus();
> >  #endif /* CONFIG_PPC64 */
> >  
> > -        /* Initialize CPU <=> thread mapping/
> > +	dn = of_find_node_by_type(NULL, "cpu");
> > +	if (dn) {
> > +		if (check_for_interleaved_big_core(dn)) {
> > +			has_interleaved_big_core = true;
> > +			pr_info("Detected interleaved big-cores\n");
> 
> Is there a runtime way to check this also?  If the dmesg buffer overflows, we
> lose this.

Where do you suggest we put this ? Should it be a part of
/proc/cpuinfo ?

> 
> Mikey

--
Thanks and Regards
gautham.
Michael Ellerman May 18, 2018, 1:14 p.m. UTC | #3
Gautham R Shenoy <ego@linux.vnet.ibm.com> writes:
...
>> > @@ -565,7 +615,16 @@ void __init smp_setup_cpu_maps(void)
>> >  	vdso_data->processorCount = num_present_cpus();
>> >  #endif /* CONFIG_PPC64 */
>> >  
>> > -        /* Initialize CPU <=> thread mapping/
>> > +	dn = of_find_node_by_type(NULL, "cpu");
>> > +	if (dn) {
>> > +		if (check_for_interleaved_big_core(dn)) {
>> > +			has_interleaved_big_core = true;
>> > +			pr_info("Detected interleaved big-cores\n");
>> 
>> Is there a runtime way to check this also?  If the dmesg buffer overflows, we
>> lose this.
>
> Where do you suggest we put this ? Should it be a part of
> /proc/cpuinfo ?

Hmm, it'd be nice not to pollute it with more junk.

Can you just look at the pir files in sysfs?

eg. on a normal system:

  # cd /sys/devices/system/cpu
  # grep . cpu[0-7]/pir
  cpu0/pir:20
  cpu1/pir:21
  cpu2/pir:22
  cpu3/pir:23
  cpu4/pir:24
  cpu5/pir:25
  cpu6/pir:26
  cpu7/pir:27


cheers
Michael Ellerman May 18, 2018, 1:21 p.m. UTC | #4
"Gautham R. Shenoy" <ego@linux.vnet.ibm.com> writes:

> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> index 0af5c11..884dff2 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -436,8 +438,56 @@ static void __init cpu_init_thread_core_maps(int tpc)
>  	printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
>  }
>  
> -
>  u32 *cpu_to_phys_id = NULL;
> +/*
> + * check_for_interleaved_big_core - Checks if the core represented by
> + *	 dn is a big-core whose threads are interleavings of the
> + *	 threads of the component small cores.
> + *
> + * @dn: device node corresponding to the core.
> + *
> + * Returns true if the core is a interleaved big-core.
> + * Returns false otherwise.
> + */
> +static inline bool check_for_interleaved_big_core(struct device_node *dn)
> +{
> +	int len, nr_groups, threads_per_group;
> +	const __be32 *thread_groups;
> +	__be32 *thread_list, *first_cpu_idx;
> +	int cur_cpu, next_cpu, i, j;
> +
> +	thread_groups = of_get_property(dn, "ibm,thread-groups", &len);
> +	if (!thread_groups)
> +		return false;

There are better device tree APIs than bare of_get_property() these
days, can you try to use those?

> +	nr_groups = be32_to_cpu(*(thread_groups + 1));
> +	if (nr_groups <= 1)
> +		return false;

eg. this would be of_property_read_u32_index()

> +
> +	threads_per_group = be32_to_cpu(*(thread_groups + 2));
> +	thread_list = (__be32 *)thread_groups + 3;
> +
> +	/*
> +	 * In case of an interleaved big-core, the thread-ids of the
> +	 * big-core can be obtained by interleaving the the thread-ids
> +	 * of the component small
> +	 *
> +	 * Eg: On a 8-thread big-core with two SMT4 small cores, the
> +	 * threads of the two component small cores will be
> +	 * {0, 2, 4, 6} and {1, 3, 5, 7}.
> +	 */
> +	for (i = 0; i < nr_groups; i++) {
> +		first_cpu_idx = thread_list + i * threads_per_group;
> +
> +		for (j = 0; j < threads_per_group - 1; j++) {
> +			cur_cpu = be32_to_cpu(*(first_cpu_idx + j));
> +			next_cpu = be32_to_cpu(*(first_cpu_idx + j + 1));
> +			if (next_cpu != cur_cpu + nr_groups)
> +				return false;
> +		}
> +	}
> +	return true;
> +}
>  
>  /**
>   * setup_cpu_maps - initialize the following cpu maps:
> @@ -565,7 +615,16 @@ void __init smp_setup_cpu_maps(void)
>  	vdso_data->processorCount = num_present_cpus();
>  #endif /* CONFIG_PPC64 */
>  
> -        /* Initialize CPU <=> thread mapping/
> +	dn = of_find_node_by_type(NULL, "cpu");
> +	if (dn) {
> +		if (check_for_interleaved_big_core(dn)) {
> +			has_interleaved_big_core = true;
> +			pr_info("Detected interleaved big-cores\n");
> +		}
> +		of_node_put(dn);
> +	}

This is a bit untidy, given how unlikely it is that you would have no
CPUs :)

You should be able to do the lookup of the property and the setting of
has_interleaved_big_core all inside check_for_interleaved_big_core().

cheers
Gautham R Shenoy May 22, 2018, 4:31 a.m. UTC | #5
Hello Michael,

On Fri, May 18, 2018 at 11:14:04PM +1000, Michael Ellerman wrote:
> Gautham R Shenoy <ego@linux.vnet.ibm.com> writes:
> ...
> >> > @@ -565,7 +615,16 @@ void __init smp_setup_cpu_maps(void)
> >> >  	vdso_data->processorCount = num_present_cpus();
> >> >  #endif /* CONFIG_PPC64 */
> >> >  
> >> > -        /* Initialize CPU <=> thread mapping/
> >> > +	dn = of_find_node_by_type(NULL, "cpu");
> >> > +	if (dn) {
> >> > +		if (check_for_interleaved_big_core(dn)) {
> >> > +			has_interleaved_big_core = true;
> >> > +			pr_info("Detected interleaved big-cores\n");
> >> 
> >> Is there a runtime way to check this also?  If the dmesg buffer overflows, we
> >> lose this.
> >
> > Where do you suggest we put this ? Should it be a part of
> > /proc/cpuinfo ?
> 
> Hmm, it'd be nice not to pollute it with more junk.
> 
> Can you just look at the pir files in sysfs?

Sure Michael. I will explore this option.

If we add a file called l1cache_thread_group, then the siblings of the
big-core that share the L1-cache can be described as follows.

	# cd  /sys/devices/system/cpu
	# grep . cpu[0-7]/l1cache_thread_group
	cpu0/l1cache_thread_group:0,2,4,6
	cpu1/l1cache_thread_group:1,3,5,7
	cpu2/l1cache_thread_group:0,2,4,6
	cpu3/l1cache_thread_group:1,3,5,7
	cpu4/l1cache_thread_group:0,2,4,6
	cpu5/l1cache_thread_group:1,3,5,7
	cpu6/l1cache_thread_group:0,2,4,6
	cpu7/l1cache_thread_group:1,3,5,7

> 
> eg. on a normal system:
> 
>   # cd /sys/devices/system/cpu
>   # grep . cpu[0-7]/pir
>   cpu0/pir:20
>   cpu1/pir:21
>   cpu2/pir:22
>   cpu3/pir:23
>   cpu4/pir:24
>   cpu5/pir:25
>   cpu6/pir:26
>   cpu7/pir:27


> 
> 
> cheers
>
Gautham R Shenoy May 22, 2018, 4:34 a.m. UTC | #6
Hello Michael,

On Fri, May 18, 2018 at 11:21:22PM +1000, Michael Ellerman wrote:
> "Gautham R. Shenoy" <ego@linux.vnet.ibm.com> writes:
> 
> > diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
> > index 0af5c11..884dff2 100644
> > --- a/arch/powerpc/kernel/setup-common.c
> > +++ b/arch/powerpc/kernel/setup-common.c
> > @@ -436,8 +438,56 @@ static void __init cpu_init_thread_core_maps(int tpc)
> >  	printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
> >  }
> >  
> > -
> >  u32 *cpu_to_phys_id = NULL;
> > +/*
> > + * check_for_interleaved_big_core - Checks if the core represented by
> > + *	 dn is a big-core whose threads are interleavings of the
> > + *	 threads of the component small cores.
> > + *
> > + * @dn: device node corresponding to the core.
> > + *
> > + * Returns true if the core is a interleaved big-core.
> > + * Returns false otherwise.
> > + */
> > +static inline bool check_for_interleaved_big_core(struct device_node *dn)
> > +{
> > +	int len, nr_groups, threads_per_group;
> > +	const __be32 *thread_groups;
> > +	__be32 *thread_list, *first_cpu_idx;
> > +	int cur_cpu, next_cpu, i, j;
> > +
> > +	thread_groups = of_get_property(dn, "ibm,thread-groups", &len);
> > +	if (!thread_groups)
> > +		return false;
> 
> There are better device tree APIs than bare of_get_property() these
> days, can you try to use those?

Ok, I will use them.

> 
> > +	nr_groups = be32_to_cpu(*(thread_groups + 1));
> > +	if (nr_groups <= 1)
> > +		return false;
> 
> eg. this would be of_property_read_u32_index()
> 

Ok.

> > @@ -565,7 +615,16 @@ void __init smp_setup_cpu_maps(void)
> >  	vdso_data->processorCount = num_present_cpus();
> >  #endif /* CONFIG_PPC64 */
> >  
> > -        /* Initialize CPU <=> thread mapping/
> > +	dn = of_find_node_by_type(NULL, "cpu");
> > +	if (dn) {
> > +		if (check_for_interleaved_big_core(dn)) {
> > +			has_interleaved_big_core = true;
> > +			pr_info("Detected interleaved big-cores\n");
> > +		}
> > +		of_node_put(dn);
> > +	}
> 
> This is a bit untidy, given how unlikely it is that you would have no
> CPUs :)

This can actually go into the earlier loop where we initialize the
smp_processor_ids(). I have fixed it in the next iteration.

> 
> You should be able to do the lookup of the property and the setting of
> has_interleaved_big_core all inside
> check_for_interleaved_big_core().

Yes, that's what I am doing in the next iteration.

> 
> cheers
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index d71a909..b706f0a 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -23,11 +23,13 @@ 
 extern int threads_per_core;
 extern int threads_per_subcore;
 extern int threads_shift;
+extern bool has_interleaved_big_core;
 extern cpumask_t threads_core_mask;
 #else
-#define threads_per_core	1
-#define threads_per_subcore	1
-#define threads_shift		0
+#define threads_per_core		1
+#define threads_per_subcore		1
+#define threads_shift			0
+#define has_interleaved_big_core	0
 #define threads_core_mask	(*get_cpu_mask(0))
 #endif
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 0af5c11..884dff2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -408,10 +408,12 @@  void __init check_for_initrd(void)
 #ifdef CONFIG_SMP
 
 int threads_per_core, threads_per_subcore, threads_shift;
+bool has_interleaved_big_core;
 cpumask_t threads_core_mask;
 EXPORT_SYMBOL_GPL(threads_per_core);
 EXPORT_SYMBOL_GPL(threads_per_subcore);
 EXPORT_SYMBOL_GPL(threads_shift);
+EXPORT_SYMBOL_GPL(has_interleaved_big_core);
 EXPORT_SYMBOL_GPL(threads_core_mask);
 
 static void __init cpu_init_thread_core_maps(int tpc)
@@ -436,8 +438,56 @@  static void __init cpu_init_thread_core_maps(int tpc)
 	printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
 }
 
-
 u32 *cpu_to_phys_id = NULL;
+/*
+ * check_for_interleaved_big_core - Checks if the core represented by
+ *	 dn is a big-core whose threads are interleavings of the
+ *	 threads of the component small cores.
+ *
+ * @dn: device node corresponding to the core.
+ *
+ * Returns true if the core is a interleaved big-core.
+ * Returns false otherwise.
+ */
+static inline bool check_for_interleaved_big_core(struct device_node *dn)
+{
+	int len, nr_groups, threads_per_group;
+	const __be32 *thread_groups;
+	__be32 *thread_list, *first_cpu_idx;
+	int cur_cpu, next_cpu, i, j;
+
+	thread_groups = of_get_property(dn, "ibm,thread-groups", &len);
+	if (!thread_groups)
+		return false;
+
+	nr_groups = be32_to_cpu(*(thread_groups + 1));
+	if (nr_groups <= 1)
+		return false;
+
+	threads_per_group = be32_to_cpu(*(thread_groups + 2));
+	thread_list = (__be32 *)thread_groups + 3;
+
+	/*
+	 * In case of an interleaved big-core, the thread-ids of the
+	 * big-core can be obtained by interleaving the the thread-ids
+	 * of the component small
+	 *
+	 * Eg: On a 8-thread big-core with two SMT4 small cores, the
+	 * threads of the two component small cores will be
+	 * {0, 2, 4, 6} and {1, 3, 5, 7}.
+	 */
+	for (i = 0; i < nr_groups; i++) {
+		first_cpu_idx = thread_list + i * threads_per_group;
+
+		for (j = 0; j < threads_per_group - 1; j++) {
+			cur_cpu = be32_to_cpu(*(first_cpu_idx + j));
+			next_cpu = be32_to_cpu(*(first_cpu_idx + j + 1));
+			if (next_cpu != cur_cpu + nr_groups)
+				return false;
+		}
+	}
+	return true;
+}
 
 /**
  * setup_cpu_maps - initialize the following cpu maps:
@@ -565,7 +615,16 @@  void __init smp_setup_cpu_maps(void)
 	vdso_data->processorCount = num_present_cpus();
 #endif /* CONFIG_PPC64 */
 
-        /* Initialize CPU <=> thread mapping/
+	dn = of_find_node_by_type(NULL, "cpu");
+	if (dn) {
+		if (check_for_interleaved_big_core(dn)) {
+			has_interleaved_big_core = true;
+			pr_info("Detected interleaved big-cores\n");
+		}
+		of_node_put(dn);
+	}
+
+	/* Initialize CPU <=> thread mapping/
 	 *
 	 * WARNING: We assume that the number of threads is the same for
 	 * every CPU in the system. If that is not the case, then some code