diff mbox

[2/5,AARCH64] Change IMP and PART over to integers from strings.

Message ID 1447798238-29608-3-git-send-email-apinski@cavium.com
State New
Headers show

Commit Message

Andrew Pinski Nov. 17, 2015, 10:10 p.m. UTC
Because the imp and parts are really integer rather than strings, this patch
moves the comparisons to be integer.  Also allows saving around integers are
easier than doing string comparisons.  This allows for the next change.

The way I store BIG.little is (big<<12)|little as each part num is only 12bits
long.  So it would be nice if someone could test -mpu=native on a big.little
system to make sure it works still.

OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Thanks,
Andrew Pinski



* config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer constants.
* config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change implementer_id to unsigned char.
Change part_no to unsigned int.
(AARCH64_BIG_LITTLE): New define.
(INVALID_IMP): New define.
(INVALID_CORE): New define.
(cpu_data): Change the last element's implementer_id and part_no to integers.
(valid_bL_string_p): Rewrite to ..
(valid_bL_core_p): this for integers instead of strings.
(parse_field): New function.
(contains_string_p): Rewrite to ...
(contains_core_p): this for integers and only for the part_no.
(host_detect_local_cpu): Rewrite handling of implementation and part num to be integers;
simplifying the code.
---
 gcc/config/aarch64/aarch64-cores.def | 25 +++++-----
 gcc/config/aarch64/driver-aarch64.c  | 90 ++++++++++++++++++++----------------
 2 files changed, 62 insertions(+), 53 deletions(-)

Comments

Kyrylo Tkachov Nov. 20, 2015, 11:07 a.m. UTC | #1
Hi Andrew,

On 17/11/15 22:10, Andrew Pinski wrote:
> Because the imp and parts are really integer rather than strings, this patch
> moves the comparisons to be integer.  Also allows saving around integers are
> easier than doing string comparisons.  This allows for the next change.
>
> The way I store BIG.little is (big<<12)|little as each part num is only 12bits
> long.  So it would be nice if someone could test -mpu=native on a big.little
> system to make sure it works still.
>
> OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> Thanks,
> Andrew Pinski
>
>
>
> * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer constants.
> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change implementer_id to unsigned char.
> Change part_no to unsigned int.
> (AARCH64_BIG_LITTLE): New define.
> (INVALID_IMP): New define.
> (INVALID_CORE): New define.
> (cpu_data): Change the last element's implementer_id and part_no to integers.
> (valid_bL_string_p): Rewrite to ..
> (valid_bL_core_p): this for integers instead of strings.
> (parse_field): New function.
> (contains_string_p): Rewrite to ...
> (contains_core_p): this for integers and only for the part_no.
> (host_detect_local_cpu): Rewrite handling of implementation and part num to be integers;
> simplifying the code.
> ---
>   gcc/config/aarch64/aarch64-cores.def | 25 +++++-----
>   gcc/config/aarch64/driver-aarch64.c  | 90 ++++++++++++++++++++----------------
>   2 files changed, 62 insertions(+), 53 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 0b456f7..798f3e3 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -33,25 +33,26 @@
>      This need not include flags implied by the architecture.
>      COSTS is the name of the rtx_costs routine to use.
>      IMP is the implementer ID of the CPU vendor.  On a GNU/Linux system it can
> -   be found in /proc/cpuinfo.
> +   be found in /proc/cpuinfo.  There is a list in the ARM ARM.
>      PART is the part number of the CPU.  On a GNU/Linux system it can be found
> -   in /proc/cpuinfo.  For big.LITTLE systems this should have the form at of
> -   "<big core part number>.<LITTLE core part number>".  */
> +   in /proc/cpuinfo.  For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE
> +   where the big part number comes as the first arugment to the macro and little is the
> +   second.  */
>   
>   /* V8 Architecture Processors.  */
>   
> -AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
> -AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
> -AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
> -AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
> -AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800")
> -AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  "0x43", "0x0a1")
> -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
> +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
> +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
> +AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
> +AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001)
> +AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800)
> +AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
> +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
>   
>   /* V8 big.LITTLE implementations.  */
>   
> -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
> -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03")
> +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03))
> +AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03))
>   
>   
>   #undef AARCH64_CORE
> diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
> index d03654c..92388a9 100644
> --- a/gcc/config/aarch64/driver-aarch64.c
> +++ b/gcc/config/aarch64/driver-aarch64.c
> @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] =
>   struct aarch64_core_data
>   {
>     const char* name;
> -  const char* arch;
> -  const char* implementer_id;
> -  const char* part_no;
> +  const char *arch;
> +  unsigned char implementer_id; /* Exactly 8 bits */
> +  unsigned int part_no; /* 12 bits + 12 bits */
>   };
>   
> +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
> +  (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
> +#define INVALID_IMP ((unsigned char) -1)
> +#define INVALID_CORE ((unsigned)-1)
> +
>   #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
>     { CORE_NAME, #ARCH, IMP, PART },
>   
>   static struct aarch64_core_data cpu_data [] =
>   {
>   #include "aarch64-cores.def"
> -  { NULL, NULL, NULL, NULL }
> +  { NULL, NULL, INVALID_IMP, INVALID_CORE }
>   };
>   
>   
> @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id)
>      should return true.  */
>   
>   static bool
> -valid_bL_string_p (const char** core, const char* bL_string)
> +valid_bL_core_p (unsigned int *core, unsigned int bL_core)
>   {
> -  return strstr (bL_string, core[0]) != NULL
> -         && strstr (bL_string, core[1]) != NULL;
> +  return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core;
>   }

This needs the change you suggested in https://gcc.gnu.org/ml/gcc-patches/2015-11/msg02436.html
I've checked that it works now.
Also, please update the comment for this function since the arguments are no longer strings.

Thanks,
Kyrill

>   
> -/*  Return true iff ARR contains STR in one of its two elements.  */
>   
> -static bool
> -contains_string_p (const char** arr, const char* str)
> +/* Returns the integer that is after ':' for the field. */
> +static unsigned parse_field (const char *field)
>   {
> -  bool res = false;
> +  const char *rest = strchr (field, ':');
> +  char *after;
> +  unsigned fint = strtol (rest+1, &after, 16);
> +  if (after == rest+1)
> +    return -1;
> +  return fint;
> +}
> +
> +/*  Return true iff ARR contains CORE, in either of the two elements. */
>   
> -  if (arr[0] != NULL)
> +static bool
> +contains_core_p (unsigned *arr, unsigned core)
> +{
> +  if (arr[0] != INVALID_CORE)
>       {
> -      res = strstr (arr[0], str) != NULL;
> -      if (res)
> -        return res;
> +      if (arr[0] == core)
> +        return true;
>   
> -      if (arr[1] != NULL)
> -        return strstr (arr[1], str) != NULL;
> +      if (arr[1] != INVALID_CORE)
> +        return arr[1] == core;
>       }
>   
>     return false;
> @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv)
>     bool cpu = false;
>     unsigned int i = 0;
>     unsigned int core_idx = 0;
> -  const char* imps[2] = { NULL, NULL };
> -  const char* cores[2] = { NULL, NULL };
> +  unsigned char imp = INVALID_IMP;
> +  unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
>     unsigned int n_cores = 0;
> -  unsigned int n_imps = 0;
>     bool processed_exts = false;
>     const char *ext_string = "";
>   
> @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv)
>       {
>         if (strstr (buf, "implementer") != NULL)
>   	{
> -	  for (i = 0; cpu_data[i].name != NULL; i++)
> -	    if (strstr (buf, cpu_data[i].implementer_id) != NULL
> -                && !contains_string_p (imps, cpu_data[i].implementer_id))
> -	      {
> -                if (n_imps == 2)
> -                  goto not_found;
> -
> -                imps[n_imps++] = cpu_data[i].implementer_id;
> -
> -                break;
> -	      }
> -          continue;
> +	  unsigned cimp = parse_field (buf);
> +	  if (cimp == INVALID_IMP)
> +	    goto not_found;
> +
> +	  if (imp == INVALID_IMP)
> +	    imp = cimp;
> +	  /* BIG.little implementers are always equal. */
> +	  else if (imp != cimp)
> +	    goto not_found;
>   	}
>   
>         if (strstr (buf, "part") != NULL)
>   	{
> +	  unsigned ccore = parse_field (buf);
>   	  for (i = 0; cpu_data[i].name != NULL; i++)
> -	    if (strstr (buf, cpu_data[i].part_no) != NULL
> -                && !contains_string_p (cores, cpu_data[i].part_no))
> +	    if (ccore == cpu_data[i].part_no
> +                && !contains_core_p (cores, ccore))
>   	      {
>                   if (n_cores == 2)
>                     goto not_found;
>   
> -                cores[n_cores++] = cpu_data[i].part_no;
> +                cores[n_cores++] = ccore;
>   	        core_idx = i;
>   	        arch_id = cpu_data[i].arch;
>   	        break;
> @@ -240,7 +250,7 @@ host_detect_local_cpu (int argc, const char **argv)
>     f = NULL;
>   
>     /* Weird cpuinfo format that we don't know how to handle.  */
> -  if (n_cores == 0 || n_cores > 2 || n_imps != 1)
> +  if (n_cores == 0 || n_cores > 2 || imp == INVALID_IMP)
>       goto not_found;
>   
>     if (arch && !arch_id)
> @@ -261,9 +271,8 @@ host_detect_local_cpu (int argc, const char **argv)
>       {
>         for (i = 0; cpu_data[i].name != NULL; i++)
>           {
> -          if (strchr (cpu_data[i].part_no, '.') != NULL
> -              && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0
> -              && valid_bL_string_p (cores, cpu_data[i].part_no))
> +          if (cpu_data[i].implementer_id == imp
> +              && valid_bL_core_p (cores, cpu_data[i].part_no))
>               {
>                 res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL);
>                 break;
> @@ -275,8 +284,7 @@ host_detect_local_cpu (int argc, const char **argv)
>     /* The simple, non-big.LITTLE case.  */
>     else
>       {
> -      if (strncmp (cpu_data[core_idx].implementer_id, imps[0],
> -                   strlen (imps[0]) - 1) != 0)
> +      if (cpu_data[core_idx].implementer_id != imp)
>           goto not_found;
>   
>         res = concat ("-m", cpu ? "cpu" : "tune", "=",
James Greenhalgh Nov. 25, 2015, 10:31 a.m. UTC | #2
On Tue, Nov 17, 2015 at 02:10:35PM -0800, Andrew Pinski wrote:
> 
> Because the imp and parts are really integer rather than strings, this patch
> moves the comparisons to be integer.  Also allows saving around integers are
> easier than doing string comparisons.  This allows for the next change.
> 
> The way I store BIG.little is (big<<12)|little as each part num is only 12bits
> long.  So it would be nice if someone could test -mpu=native on a big.little
> system to make sure it works still.
> 
> OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> 
> Thanks,
> Andrew Pinski
> 
> 
> 
> * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer constants.
> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change implementer_id to unsigned char.
> Change part_no to unsigned int.
> (AARCH64_BIG_LITTLE): New define.
> (INVALID_IMP): New define.
> (INVALID_CORE): New define.
> (cpu_data): Change the last element's implementer_id and part_no to integers.
> (valid_bL_string_p): Rewrite to ..
> (valid_bL_core_p): this for integers instead of strings.
> (parse_field): New function.
> (contains_string_p): Rewrite to ...
> (contains_core_p): this for integers and only for the part_no.
> (host_detect_local_cpu): Rewrite handling of implementation and part num to be integers;
> simplifying the code.
> ---
>  gcc/config/aarch64/aarch64-cores.def | 25 +++++-----
>  gcc/config/aarch64/driver-aarch64.c  | 90 ++++++++++++++++++++----------------
>  2 files changed, 62 insertions(+), 53 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 0b456f7..798f3e3 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -33,25 +33,26 @@
>     This need not include flags implied by the architecture.
>     COSTS is the name of the rtx_costs routine to use.
>     IMP is the implementer ID of the CPU vendor.  On a GNU/Linux system it can
> -   be found in /proc/cpuinfo.
> +   be found in /proc/cpuinfo.  There is a list in the ARM ARM.

Let's be precise with this comment.

  "A partial list of implementer IDs is given in the ARM Architecture
   Reference Manual ARMv8, for ARMv8-A architecture profile"

>     PART is the part number of the CPU.  On a GNU/Linux system it can be found
> -   in /proc/cpuinfo.  For big.LITTLE systems this should have the form at of
> -   "<big core part number>.<LITTLE core part number>".  */
> +   in /proc/cpuinfo.  For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE
> +   where the big part number comes as the first arugment to the macro and little is the

s/arugment/argument/.

> +   second.  */
>  
>  /* V8 Architecture Processors.  */
>  
> -AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
> -AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
> -AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
> -AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
> -AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800")
> -AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  "0x43", "0x0a1")
> -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
> +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
> +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
> +AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
> +AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001)
> +AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800)
> +AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
> +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
>  
>  /* V8 big.LITTLE implementations.  */
>  
> -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
> -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03")
> +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03))
> +AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03))
>  
>  
>  #undef AARCH64_CORE
> diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
> index d03654c..92388a9 100644
> --- a/gcc/config/aarch64/driver-aarch64.c
> +++ b/gcc/config/aarch64/driver-aarch64.c
> @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] =
>  struct aarch64_core_data
>  {
>    const char* name;
> -  const char* arch;
> -  const char* implementer_id;
> -  const char* part_no;
> +  const char *arch;
> +  unsigned char implementer_id; /* Exactly 8 bits */

Can we redesign this around the most general case - big.LITTLE cores with
different implementer IDs? There is nothing in the architecture that would
forbid this, and Samsung recently announced that their Exynos 8 Octa 8890
which would support a combination of four of their custom cores with four ARM
Cortex-A53 cores.

 ( http://www.samsung.com/semiconductor/minisite/Exynos/w/mediacenter.html#?v=news_Samsung_Unveils_a_New_Exynos_8_Octa_Processor_based_on_advanced_14-Nanometer_FinFET_Process_Technology )

> +  unsigned int part_no; /* 12 bits + 12 bits */
>  };
>  
> +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
> +  (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
> +#define INVALID_IMP ((unsigned char) -1)
> +#define INVALID_CORE ((unsigned)-1)
> +
>  #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
>    { CORE_NAME, #ARCH, IMP, PART },
>  
>  static struct aarch64_core_data cpu_data [] =
>  {
>  #include "aarch64-cores.def"
> -  { NULL, NULL, NULL, NULL }
> +  { NULL, NULL, INVALID_IMP, INVALID_CORE }
>  };
>  
>  
> @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id)
>     should return true.  */
>  
>  static bool
> -valid_bL_string_p (const char** core, const char* bL_string)
> +valid_bL_core_p (unsigned int *core, unsigned int bL_core)
>  {
> -  return strstr (bL_string, core[0]) != NULL
> -         && strstr (bL_string, core[1]) != NULL;
> +  return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core;
>  }
>  
> -/*  Return true iff ARR contains STR in one of its two elements.  */
>  
> -static bool
> -contains_string_p (const char** arr, const char* str)
> +/* Returns the integer that is after ':' for the field. */
> +static unsigned parse_field (const char *field)
>  {
> -  bool res = false;
> +  const char *rest = strchr (field, ':');
> +  char *after;
> +  unsigned fint = strtol (rest+1, &after, 16);
> +  if (after == rest+1)

"rest + 1" in both cases.

> +    return -1;
> +  return fint;
> +}
> +
> +/*  Return true iff ARR contains CORE, in either of the two elements. */
>  
> -  if (arr[0] != NULL)
> +static bool
> +contains_core_p (unsigned *arr, unsigned core)
> +{
> +  if (arr[0] != INVALID_CORE)
>      {
> -      res = strstr (arr[0], str) != NULL;
> -      if (res)
> -        return res;
> +      if (arr[0] == core)
> +        return true;
>  
> -      if (arr[1] != NULL)
> -        return strstr (arr[1], str) != NULL;
> +      if (arr[1] != INVALID_CORE)
> +        return arr[1] == core;
>      }
>  
>    return false;
> @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv)
>    bool cpu = false;
>    unsigned int i = 0;
>    unsigned int core_idx = 0;
> -  const char* imps[2] = { NULL, NULL };
> -  const char* cores[2] = { NULL, NULL };
> +  unsigned char imp = INVALID_IMP;
> +  unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
>    unsigned int n_cores = 0;
> -  unsigned int n_imps = 0;
>    bool processed_exts = false;
>    const char *ext_string = "";
>  
> @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv)
>      {
>        if (strstr (buf, "implementer") != NULL)
>  	{
> -	  for (i = 0; cpu_data[i].name != NULL; i++)
> -	    if (strstr (buf, cpu_data[i].implementer_id) != NULL
> -                && !contains_string_p (imps, cpu_data[i].implementer_id))
> -	      {
> -                if (n_imps == 2)
> -                  goto not_found;
> -
> -                imps[n_imps++] = cpu_data[i].implementer_id;
> -
> -                break;
> -	      }
> -          continue;
> +	  unsigned cimp = parse_field (buf);
> +	  if (cimp == INVALID_IMP)
> +	    goto not_found;
> +
> +	  if (imp == INVALID_IMP)
> +	    imp = cimp;
> +	  /* BIG.little implementers are always equal. */

See my comment above. This comment does not neccessarily hold.

In addition, this needs updating for Kyrill's comments.

Thanks,
James
Andrew Pinski Nov. 25, 2015, 7:59 p.m. UTC | #3
On Wed, Nov 25, 2015 at 2:31 AM, James Greenhalgh
<james.greenhalgh@arm.com> wrote:
> On Tue, Nov 17, 2015 at 02:10:35PM -0800, Andrew Pinski wrote:
>>
>> Because the imp and parts are really integer rather than strings, this patch
>> moves the comparisons to be integer.  Also allows saving around integers are
>> easier than doing string comparisons.  This allows for the next change.
>>
>> The way I store BIG.little is (big<<12)|little as each part num is only 12bits
>> long.  So it would be nice if someone could test -mpu=native on a big.little
>> system to make sure it works still.
>>
>> OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>>
>> Thanks,
>> Andrew Pinski
>>
>>
>>
>> * config/aarch64/aarch64-cores.def: Rewrite so IMP and PART are integer constants.
>> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Change implementer_id to unsigned char.
>> Change part_no to unsigned int.
>> (AARCH64_BIG_LITTLE): New define.
>> (INVALID_IMP): New define.
>> (INVALID_CORE): New define.
>> (cpu_data): Change the last element's implementer_id and part_no to integers.
>> (valid_bL_string_p): Rewrite to ..
>> (valid_bL_core_p): this for integers instead of strings.
>> (parse_field): New function.
>> (contains_string_p): Rewrite to ...
>> (contains_core_p): this for integers and only for the part_no.
>> (host_detect_local_cpu): Rewrite handling of implementation and part num to be integers;
>> simplifying the code.
>> ---
>>  gcc/config/aarch64/aarch64-cores.def | 25 +++++-----
>>  gcc/config/aarch64/driver-aarch64.c  | 90 ++++++++++++++++++++----------------
>>  2 files changed, 62 insertions(+), 53 deletions(-)
>>
>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>> index 0b456f7..798f3e3 100644
>> --- a/gcc/config/aarch64/aarch64-cores.def
>> +++ b/gcc/config/aarch64/aarch64-cores.def
>> @@ -33,25 +33,26 @@
>>     This need not include flags implied by the architecture.
>>     COSTS is the name of the rtx_costs routine to use.
>>     IMP is the implementer ID of the CPU vendor.  On a GNU/Linux system it can
>> -   be found in /proc/cpuinfo.
>> +   be found in /proc/cpuinfo.  There is a list in the ARM ARM.
>
> Let's be precise with this comment.
>
>   "A partial list of implementer IDs is given in the ARM Architecture
>    Reference Manual ARMv8, for ARMv8-A architecture profile"
>
>>     PART is the part number of the CPU.  On a GNU/Linux system it can be found
>> -   in /proc/cpuinfo.  For big.LITTLE systems this should have the form at of
>> -   "<big core part number>.<LITTLE core part number>".  */
>> +   in /proc/cpuinfo.  For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE
>> +   where the big part number comes as the first arugment to the macro and little is the
>
> s/arugment/argument/.
>
>> +   second.  */
>>
>>  /* V8 Architecture Processors.  */
>>
>> -AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
>> -AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
>> -AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
>> -AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
>> -AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800")
>> -AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  "0x43", "0x0a1")
>> -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
>> +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
>> +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
>> +AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
>> +AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001)
>> +AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800)
>> +AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
>> +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
>>
>>  /* V8 big.LITTLE implementations.  */
>>
>> -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
>> -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03")
>> +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03))
>> +AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03))
>>
>>
>>  #undef AARCH64_CORE
>> diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
>> index d03654c..92388a9 100644
>> --- a/gcc/config/aarch64/driver-aarch64.c
>> +++ b/gcc/config/aarch64/driver-aarch64.c
>> @@ -37,18 +37,23 @@ static struct arch_extension ext_to_feat_string[] =
>>  struct aarch64_core_data
>>  {
>>    const char* name;
>> -  const char* arch;
>> -  const char* implementer_id;
>> -  const char* part_no;
>> +  const char *arch;
>> +  unsigned char implementer_id; /* Exactly 8 bits */
>
> Can we redesign this around the most general case - big.LITTLE cores with
> different implementer IDs? There is nothing in the architecture that would
> forbid this, and Samsung recently announced that their Exynos 8 Octa 8890
> which would support a combination of four of their custom cores with four ARM
> Cortex-A53 cores.
>
>  ( http://www.samsung.com/semiconductor/minisite/Exynos/w/mediacenter.html#?v=news_Samsung_Unveils_a_New_Exynos_8_Octa_Processor_based_on_advanced_14-Nanometer_FinFET_Process_Technology )

This should be done a separate patch as even the current
infrastructure does not support it at all.
It would be a simple one though.  Change implementer_id to int and
then encode the two implementer_id in cores_data.  Though the parsing
part becomes more complex as the current parsing does not understand
if a new core has been started or not.  This is why it should be a
separate patch instead.

Maybe to support that one it is not about parsing /proc/cpuinfo but
rather /sys/cpu/* instead which is supported for Linux 4.4 (or did not
make it into 4.4 so 4.5).

Thanks,
Andrew

>
>> +  unsigned int part_no; /* 12 bits + 12 bits */
>>  };
>>
>> +#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
>> +  (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
>> +#define INVALID_IMP ((unsigned char) -1)
>> +#define INVALID_CORE ((unsigned)-1)
>> +
>>  #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
>>    { CORE_NAME, #ARCH, IMP, PART },
>>
>>  static struct aarch64_core_data cpu_data [] =
>>  {
>>  #include "aarch64-cores.def"
>> -  { NULL, NULL, NULL, NULL }
>> +  { NULL, NULL, INVALID_IMP, INVALID_CORE }
>>  };
>>
>>
>> @@ -91,27 +96,35 @@ get_arch_name_from_id (const char* id)
>>     should return true.  */
>>
>>  static bool
>> -valid_bL_string_p (const char** core, const char* bL_string)
>> +valid_bL_core_p (unsigned int *core, unsigned int bL_core)
>>  {
>> -  return strstr (bL_string, core[0]) != NULL
>> -         && strstr (bL_string, core[1]) != NULL;
>> +  return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core;
>>  }
>>
>> -/*  Return true iff ARR contains STR in one of its two elements.  */
>>
>> -static bool
>> -contains_string_p (const char** arr, const char* str)
>> +/* Returns the integer that is after ':' for the field. */
>> +static unsigned parse_field (const char *field)
>>  {
>> -  bool res = false;
>> +  const char *rest = strchr (field, ':');
>> +  char *after;
>> +  unsigned fint = strtol (rest+1, &after, 16);
>> +  if (after == rest+1)
>
> "rest + 1" in both cases.
>
>> +    return -1;
>> +  return fint;
>> +}
>> +
>> +/*  Return true iff ARR contains CORE, in either of the two elements. */
>>
>> -  if (arr[0] != NULL)
>> +static bool
>> +contains_core_p (unsigned *arr, unsigned core)
>> +{
>> +  if (arr[0] != INVALID_CORE)
>>      {
>> -      res = strstr (arr[0], str) != NULL;
>> -      if (res)
>> -        return res;
>> +      if (arr[0] == core)
>> +        return true;
>>
>> -      if (arr[1] != NULL)
>> -        return strstr (arr[1], str) != NULL;
>> +      if (arr[1] != INVALID_CORE)
>> +        return arr[1] == core;
>>      }
>>
>>    return false;
>> @@ -146,10 +159,9 @@ host_detect_local_cpu (int argc, const char **argv)
>>    bool cpu = false;
>>    unsigned int i = 0;
>>    unsigned int core_idx = 0;
>> -  const char* imps[2] = { NULL, NULL };
>> -  const char* cores[2] = { NULL, NULL };
>> +  unsigned char imp = INVALID_IMP;
>> +  unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
>>    unsigned int n_cores = 0;
>> -  unsigned int n_imps = 0;
>>    bool processed_exts = false;
>>    const char *ext_string = "";
>>
>> @@ -180,30 +192,28 @@ host_detect_local_cpu (int argc, const char **argv)
>>      {
>>        if (strstr (buf, "implementer") != NULL)
>>       {
>> -       for (i = 0; cpu_data[i].name != NULL; i++)
>> -         if (strstr (buf, cpu_data[i].implementer_id) != NULL
>> -                && !contains_string_p (imps, cpu_data[i].implementer_id))
>> -           {
>> -                if (n_imps == 2)
>> -                  goto not_found;
>> -
>> -                imps[n_imps++] = cpu_data[i].implementer_id;
>> -
>> -                break;
>> -           }
>> -          continue;
>> +       unsigned cimp = parse_field (buf);
>> +       if (cimp == INVALID_IMP)
>> +         goto not_found;
>> +
>> +       if (imp == INVALID_IMP)
>> +         imp = cimp;
>> +       /* BIG.little implementers are always equal. */
>
> See my comment above. This comment does not neccessarily hold.
>
> In addition, this needs updating for Kyrill's comments.
>
> Thanks,
> James
>
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 0b456f7..798f3e3 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -33,25 +33,26 @@ 
    This need not include flags implied by the architecture.
    COSTS is the name of the rtx_costs routine to use.
    IMP is the implementer ID of the CPU vendor.  On a GNU/Linux system it can
-   be found in /proc/cpuinfo.
+   be found in /proc/cpuinfo.  There is a list in the ARM ARM.
    PART is the part number of the CPU.  On a GNU/Linux system it can be found
-   in /proc/cpuinfo.  For big.LITTLE systems this should have the form at of
-   "<big core part number>.<LITTLE core part number>".  */
+   in /proc/cpuinfo.  For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE
+   where the big part number comes as the first arugment to the macro and little is the
+   second.  */
 
 /* V8 Architecture Processors.  */
 
-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
-AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
-AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
-AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
-AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800")
-AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  "0x43", "0x0a1")
-AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
+AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
+AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
+AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
+AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001)
+AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800)
+AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
+AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
 
 /* V8 big.LITTLE implementations.  */
 
-AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
-AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03")
+AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03))
+AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03))
 
 
 #undef AARCH64_CORE
diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
index d03654c..92388a9 100644
--- a/gcc/config/aarch64/driver-aarch64.c
+++ b/gcc/config/aarch64/driver-aarch64.c
@@ -37,18 +37,23 @@  static struct arch_extension ext_to_feat_string[] =
 struct aarch64_core_data
 {
   const char* name;
-  const char* arch;
-  const char* implementer_id;
-  const char* part_no;
+  const char *arch;
+  unsigned char implementer_id; /* Exactly 8 bits */
+  unsigned int part_no; /* 12 bits + 12 bits */
 };
 
+#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
+  (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
+#define INVALID_IMP ((unsigned char) -1)
+#define INVALID_CORE ((unsigned)-1)
+
 #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
   { CORE_NAME, #ARCH, IMP, PART },
 
 static struct aarch64_core_data cpu_data [] =
 {
 #include "aarch64-cores.def"
-  { NULL, NULL, NULL, NULL }
+  { NULL, NULL, INVALID_IMP, INVALID_CORE }
 };
 
 
@@ -91,27 +96,35 @@  get_arch_name_from_id (const char* id)
    should return true.  */
 
 static bool
-valid_bL_string_p (const char** core, const char* bL_string)
+valid_bL_core_p (unsigned int *core, unsigned int bL_core)
 {
-  return strstr (bL_string, core[0]) != NULL
-         && strstr (bL_string, core[1]) != NULL;
+  return AARCH64_BIG_LITTLE (core[0], core[1]) == bL_core;
 }
 
-/*  Return true iff ARR contains STR in one of its two elements.  */
 
-static bool
-contains_string_p (const char** arr, const char* str)
+/* Returns the integer that is after ':' for the field. */
+static unsigned parse_field (const char *field)
 {
-  bool res = false;
+  const char *rest = strchr (field, ':');
+  char *after;
+  unsigned fint = strtol (rest+1, &after, 16);
+  if (after == rest+1)
+    return -1;
+  return fint;
+}
+
+/*  Return true iff ARR contains CORE, in either of the two elements. */
 
-  if (arr[0] != NULL)
+static bool
+contains_core_p (unsigned *arr, unsigned core)
+{
+  if (arr[0] != INVALID_CORE)
     {
-      res = strstr (arr[0], str) != NULL;
-      if (res)
-        return res;
+      if (arr[0] == core)
+        return true;
 
-      if (arr[1] != NULL)
-        return strstr (arr[1], str) != NULL;
+      if (arr[1] != INVALID_CORE)
+        return arr[1] == core;
     }
 
   return false;
@@ -146,10 +159,9 @@  host_detect_local_cpu (int argc, const char **argv)
   bool cpu = false;
   unsigned int i = 0;
   unsigned int core_idx = 0;
-  const char* imps[2] = { NULL, NULL };
-  const char* cores[2] = { NULL, NULL };
+  unsigned char imp = INVALID_IMP;
+  unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
   unsigned int n_cores = 0;
-  unsigned int n_imps = 0;
   bool processed_exts = false;
   const char *ext_string = "";
 
@@ -180,30 +192,28 @@  host_detect_local_cpu (int argc, const char **argv)
     {
       if (strstr (buf, "implementer") != NULL)
 	{
-	  for (i = 0; cpu_data[i].name != NULL; i++)
-	    if (strstr (buf, cpu_data[i].implementer_id) != NULL
-                && !contains_string_p (imps, cpu_data[i].implementer_id))
-	      {
-                if (n_imps == 2)
-                  goto not_found;
-
-                imps[n_imps++] = cpu_data[i].implementer_id;
-
-                break;
-	      }
-          continue;
+	  unsigned cimp = parse_field (buf);
+	  if (cimp == INVALID_IMP)
+	    goto not_found;
+
+	  if (imp == INVALID_IMP)
+	    imp = cimp;
+	  /* BIG.little implementers are always equal. */
+	  else if (imp != cimp)
+	    goto not_found;
 	}
 
       if (strstr (buf, "part") != NULL)
 	{
+	  unsigned ccore = parse_field (buf);
 	  for (i = 0; cpu_data[i].name != NULL; i++)
-	    if (strstr (buf, cpu_data[i].part_no) != NULL
-                && !contains_string_p (cores, cpu_data[i].part_no))
+	    if (ccore == cpu_data[i].part_no
+                && !contains_core_p (cores, ccore))
 	      {
                 if (n_cores == 2)
                   goto not_found;
 
-                cores[n_cores++] = cpu_data[i].part_no;
+                cores[n_cores++] = ccore;
 	        core_idx = i;
 	        arch_id = cpu_data[i].arch;
 	        break;
@@ -240,7 +250,7 @@  host_detect_local_cpu (int argc, const char **argv)
   f = NULL;
 
   /* Weird cpuinfo format that we don't know how to handle.  */
-  if (n_cores == 0 || n_cores > 2 || n_imps != 1)
+  if (n_cores == 0 || n_cores > 2 || imp == INVALID_IMP)
     goto not_found;
 
   if (arch && !arch_id)
@@ -261,9 +271,8 @@  host_detect_local_cpu (int argc, const char **argv)
     {
       for (i = 0; cpu_data[i].name != NULL; i++)
         {
-          if (strchr (cpu_data[i].part_no, '.') != NULL
-              && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0
-              && valid_bL_string_p (cores, cpu_data[i].part_no))
+          if (cpu_data[i].implementer_id == imp
+              && valid_bL_core_p (cores, cpu_data[i].part_no))
             {
               res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL);
               break;
@@ -275,8 +284,7 @@  host_detect_local_cpu (int argc, const char **argv)
   /* The simple, non-big.LITTLE case.  */
   else
     {
-      if (strncmp (cpu_data[core_idx].implementer_id, imps[0],
-                   strlen (imps[0]) - 1) != 0)
+      if (cpu_data[core_idx].implementer_id != imp)
         goto not_found;
 
       res = concat ("-m", cpu ? "cpu" : "tune", "=",