diff mbox

[5/5,AARCH64] Add variant support to -m="native"and add thunderxt88p1.

Message ID CA+=Sn1n52gDzREzkDLDYC-JafXE9jce4Sv95vDMM3Sgc-UY0kA@mail.gmail.com
State New
Headers show

Commit Message

Andrew Pinski Nov. 1, 2016, 6:08 p.m. UTC
On Tue, Nov 17, 2015 at 2:10 PM, Andrew Pinski <apinski@cavium.com> wrote:
> Since ThunderX T88 pass 1 (variant 0) is a ARMv8 part while pass 2 (variant 1)
> is an ARMv8.1 part, I needed to add detecting of the variant also for this
> difference. Also I simplify a little bit and combined the single core and
> arch detecting cases so it would be easier to add variant.

Actually it is a bit more complex than what I said here, see below for
the full table of options and what are enabled/disabled now.

> OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> Tested -mcpu=native on both T88 pass 1 and T88 pass 2 to make sure it is
> deecting the two seperately.


Here is the final patch in this series updated; I changed the cpu name
slightly and made sure I updated invoke.texi too.

The names are going to match the names in LLVM (worked with our LLVM
engineer here at Cavium about the names).
Here are the names recorded and
-mpcu=thunderx:
*        Matches part num 0xA0 (reserved for ThunderX 8x series)
*        T88 Pass 2 scheduling
*        Hardware prefetching (software prefetching disabled)
*        LSE enabled
*        no v8.1

-mcpu=thunderxt88:
*        Matches part num 0xA1
*        T88 Pass 2 scheduling
*        software prefetching enabled
*        LSE enabled
*        no v8.1

-mcpu=thunderxt88p1 (only for GCC):
*        Matches part num 0xA1, variant 0
*        T88 Pass 1 scheduling
*        software prefetching enabled
*        no LSE enabled
*        no v8.1

-mcpu=thunderxt81 and -mcpu=thunderxt83:
*        Matches part num 0xA2/0xA3
*        T88 Pass 2 scheduling
*        Hardware prefetching (software prefetching disabled)
*        LSE enabled
*        v8.1


I have not hooked up software vs hardware prefetching and the
scheduler parts (the next patch will do part of that); both ARMv8.1-a
and LSE parts are hooked up as those parts are only in
aarch64-cores.def.

OK?  Bootstrapped and tested on ThunderX T88 and ThunderX T81
(aarch64-linux-gnu).

Thanks,
Andrew Pinski

* config/aarch64/aarch64-cores.def: Add -1 as the variant to all of the cores.
(thunderx): Update to include LSE by default.
(thunderxt88p1): New core.
(thunderxt88): New core.
(thunderxt81): New core.
(thunderxt83): New core.
* config/aarch64/driver-aarch64.c (struct aarch64_core_data): Add variant field.
(ALL_VARIANTS): New define.
(AARCH64_CORE): Support VARIANT operand.
(cpu_data): Likewise.
(host_detect_local_cpu): Parse variant field of /proc/cpuinfo.  Combine the arch
and single core case and support variant searching.
* common/config/aarch64/aarch64-common.c (AARCH64_CORE): Add VARIANT operand.
* config/aarch64/aarch64-opts.h (AARCH64_CORE): Likewise.
* config/aarch64/aarch64.c (AARCH64_CORE): Likewise.
* config/aarch64/aarch64.h (AARCH64_CORE): Likewise.
* config/aarch64/aarch64-tune.md: Regenerate.

* doc/invoke.texi (AARCH64/mtune): Document thunderxt88,
thunderxt88p1, thunderxt81, thunderxt83 as available options.


>
> Thanks,
> Andrew Pinski
>
>
> * config/aarch64/aarch64-cores.def: Add -1 as the variant to all of the cores.
> (thunderxt88pass1): New core.
> * config/aarch64/driver-aarch64.c (struct aarch64_core_data): Add variant field.
> (ALL_VARIANTS): New define.
> (AARCH64_CORE): Support VARIANT operand.
> (cpu_data): Likewise.
> (host_detect_local_cpu): Parse variant field of /proc/cpuinfo.  Combine the arch
> and single core case and support variant searching.
> * common/config/aarch64/aarch64-common.c (AARCH64_CORE): Add VARIANT operand.
> * config/aarch64/aarch64-opts.h (AARCH64_CORE): Likewise.
> * config/aarch64/aarch64.c (AARCH64_CORE): Likewise.
> * config/aarch64/aarch64.h (AARCH64_CORE): Likewise.
> * config/aarch64/aarch64-tune.md: Regernate.
> ---
>  gcc/common/config/aarch64/aarch64-common.c |  2 +-
>  gcc/config/aarch64/aarch64-cores.def       | 27 ++++++-----
>  gcc/config/aarch64/aarch64-opts.h          |  2 +-
>  gcc/config/aarch64/aarch64-tune.md         |  2 +-
>  gcc/config/aarch64/aarch64.c               |  2 +-
>  gcc/config/aarch64/aarch64.h               |  2 +-
>  gcc/config/aarch64/driver-aarch64.c        | 78 ++++++++++++++++--------------
>  7 files changed, 64 insertions(+), 51 deletions(-)
>
> diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
> index e312bbc..f6fd7e7 100644
> --- a/gcc/common/config/aarch64/aarch64-common.c
> +++ b/gcc/common/config/aarch64/aarch64-common.c
> @@ -141,7 +141,7 @@ struct arch_to_arch_name
>     the default set of architectural feature flags they support.  */
>  static const struct processor_name_to_arch all_cores[] =
>  {
> -#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART) \
> +#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
>    {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
>  #include "config/aarch64/aarch64-cores.def"
>    {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 05ee525..52a9906 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -21,7 +21,7 @@
>
>     Before using #include to read this file, define a macro:
>
> -      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART)
> +      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT)
>
>     The CORE_NAME is the name of the core, represented as a string constant.
>     The CORE_IDENT is the name of the core, represented as an identifier.
> @@ -37,31 +37,36 @@
>     PART is the part number of the CPU.  On a GNU/Linux system it can be found
>     in /proc/cpuinfo.  For big.LITTLE systems this should use the macro AARCH64_BIG_LITTLE
>     where the big part number comes as the first arugment to the macro and little is the
> -   second.  */
> +   second.
> +   VARIANT is the variant of the CPU.  In a GNU/Linux system it can found
> +   in /proc/cpuinfo.  If this is -1, this means it can match any variant.  */
>
>  /* V8 Architecture Processors.  */
>
>  /* ARM cores. */
> -AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
> -AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
> -AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
> +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
> +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
> +AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
>
>  /* Samsung cores */
> -AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001)
> +AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, 0x53, 0x001, -1)
>
>  /* Qualcomm cores */
> -AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800)
> +AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, 0x51, 0x800, -1)
>
>  /* Cavium cores */
> -AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
> +/* ThunderX T88 pass 1 is 8.0-a arch. */
> +AARCH64_CORE("thunderxt88pass1", thunderxt88pass1,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1, 0)
> +/* ThunderX T88 pass 2 and on is 8.1-a arch. */
> +AARCH64_CORE("thunderx",    thunderx,  thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a1, -1)
>
>  /* APM cores */
> -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
> +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>
>  /* V8 big.LITTLE implementations.  */
>
> -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03))
> -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03))
> +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE(0xd07, 0xd03), -1)
> +AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE(0xd08, 0xd03), -1)
>
>
>  #undef AARCH64_CORE
> diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
> index 5534867..c66cd72 100644
> --- a/gcc/config/aarch64/aarch64-opts.h
> +++ b/gcc/config/aarch64/aarch64-opts.h
> @@ -25,7 +25,7 @@
>  /* The various cores that implement AArch64.  */
>  enum aarch64_processor
>  {
> -#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
> +#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
>    INTERNAL_IDENT,
>  #include "aarch64-cores.def"
>    /* Used to indicate that no processor has been specified.  */
> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
> index c65a124..28b573e 100644
> --- a/gcc/config/aarch64/aarch64-tune.md
> +++ b/gcc/config/aarch64/aarch64-tune.md
> @@ -1,5 +1,5 @@
>  ;; -*- buffer-read-only: t -*-
>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>  (define_attr "tune"
> -       "cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53"
> +       "cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderxt88pass1,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53"
>         (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a5971a1..d244846 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -526,7 +526,7 @@ static const struct processor all_architectures[] =
>  /* Processor cores implementing AArch64.  */
>  static const struct processor all_cores[] =
>  {
> -#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
> +#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
>    {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH,                            \
>    all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
>    FLAGS, &COSTS##_tunings},
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index c6582a4..6f06369 100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -464,7 +464,7 @@ enum reg_class
>
>  enum target_cpus
>  {
> -#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
> +#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
>    TARGET_CPU_##INTERNAL_IDENT,
>  #include "aarch64-cores.def"
>    TARGET_CPU_generic
> diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
> index ea1e856..b11d914 100644
> --- a/gcc/config/aarch64/driver-aarch64.c
> +++ b/gcc/config/aarch64/driver-aarch64.c
> @@ -40,20 +40,22 @@ struct aarch64_core_data
>    const char *arch;
>    unsigned char implementer_id; /* Exactly 8 bits */
>    unsigned int part_no; /* 12 bits + 12 bits */
> +  unsigned variant;
>  };
>
>  #define AARCH64_BIG_LITTLE(BIG, LITTLE) \
>    (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
>  #define INVALID_IMP ((unsigned char) -1)
>  #define INVALID_CORE ((unsigned)-1)
> +#define ALL_VARIANTS ((unsigned)-1)
>
> -#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
> -  { CORE_NAME, #ARCH, IMP, PART },
> +#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
> +  { CORE_NAME, #ARCH, IMP, PART, VARIANT },
>
>  static struct aarch64_core_data cpu_data [] =
>  {
>  #include "aarch64-cores.def"
> -  { NULL, NULL, INVALID_IMP, INVALID_CORE }
> +  { NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS }
>  };
>
>
> @@ -149,7 +151,6 @@ contains_core_p (unsigned *arr, unsigned core)
>  const char *
>  host_detect_local_cpu (int argc, const char **argv)
>  {
> -  const char *arch_id = NULL;
>    const char *res = NULL;
>    static const int num_exts = ARRAY_SIZE (ext_to_feat_string);
>    char buf[128];
> @@ -158,10 +159,11 @@ host_detect_local_cpu (int argc, const char **argv)
>    bool tune = false;
>    bool cpu = false;
>    unsigned int i = 0;
> -  int core_idx = -1;
>    unsigned char imp = INVALID_IMP;
>    unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
>    unsigned int n_cores = 0;
> +  unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS };
> +  unsigned int n_variants = 0;
>    bool processed_exts = false;
>    const char *ext_string = "";
>
> @@ -203,6 +205,19 @@ host_detect_local_cpu (int argc, const char **argv)
>             goto not_found;
>         }
>
> +      if (strstr (buf, "variant") != NULL)
> +       {
> +         unsigned cvariant = parse_field (buf);
> +         if (!contains_core_p (variants, cvariant))
> +           {
> +              if (n_variants == 2)
> +                goto not_found;
> +
> +              variants[n_variants++] = cvariant;
> +           }
> +          continue;
> +        }
> +
>        if (strstr (buf, "part") != NULL)
>         {
>           unsigned ccore = parse_field (buf);
> @@ -245,32 +260,41 @@ host_detect_local_cpu (int argc, const char **argv)
>    f = NULL;
>
>    /* Weird cpuinfo format that we don't know how to handle.  */
> -  if (n_cores == 0 || n_cores > 2 || imp == INVALID_IMP)
> +  if (n_cores == 0 || n_cores > 2
> +      || (n_cores == 1 && n_variants != 1)
> +      || imp == INVALID_IMP)
>      goto not_found;
>
> -  if (arch)
> +
> +  /* Simple case, one core type or just looking for the arch. */
> +  if (n_cores == 1 || arch)
>      {
>        /* Search for one of the cores in the list. */
>        for (i = 0; cpu_data[i].name != NULL; i++)
>         if (cpu_data[i].implementer_id == imp
> -           && contains_core_p (cores, cpu_data[i].part_no))
> -         {
> -           arch_id = cpu_data[i].arch;
> -           break;
> -         }
> -      if (!arch_id)
> +           && cores[0] == cpu_data[i].part_no
> +           && (cpu_data[i].variant == ALL_VARIANTS
> +               || variants[0] == cpu_data[i].variant))
> +         break;
> +      if (cpu_data[i].name == NULL)
>         goto not_found;
>
> -      const char* arch_name = get_arch_name_from_id (arch_id);
> +      if (arch)
> +       {
> +         const char* arch_name = get_arch_name_from_id (cpu_data[i].arch);
>
> -      /* We got some arch indentifier that's not in aarch64-arches.def?  */
> -      if (!arch_name)
> -        goto not_found;
> +         /* We got some arch indentifier that's not in aarch64-arches.def?  */
> +         if (!arch_name)
> +           goto not_found;
>
> -      res = concat ("-march=", arch_name, NULL);
> +         res = concat ("-march=", arch_name, NULL);
> +       }
> +      else
> +        res = concat ("-m", cpu ? "cpu" : "tune", "=",
> +                      cpu_data[i].name, NULL);
>      }
>    /* We have big.LITTLE.  */
> -  else if (n_cores == 2)
> +  else
>      {
>        for (i = 0; cpu_data[i].name != NULL; i++)
>          {
> @@ -284,22 +308,6 @@ host_detect_local_cpu (int argc, const char **argv)
>        if (!res)
>          goto not_found;
>      }
> -  /* The simple, non-big.LITTLE case.  */
> -  else
> -    {
> -      for (i = 0; cpu_data[i].name != NULL; i++)
> -       if (cores[0] == cpu_data[i].part_no
> -           && cpu_data[i].implementer_id == imp)
> -         {
> -           core_idx = i;
> -           break;
> -         }
> -      if (core_idx == -1)
> -       goto not_found;
> -
> -      res = concat ("-m", cpu ? "cpu" : "tune", "=",
> -                      cpu_data[core_idx].name, NULL);
> -    }
>
>    if (tune)
>      return res;
> --
> 1.9.1
>

Comments

James Greenhalgh Nov. 2, 2016, 10:54 a.m. UTC | #1
On Tue, Nov 01, 2016 at 11:08:53AM -0700, Andrew Pinski wrote:
> On Tue, Nov 17, 2015 at 2:10 PM, Andrew Pinski <apinski@cavium.com> wrote:
> > Since ThunderX T88 pass 1 (variant 0) is a ARMv8 part while pass 2 (variant 1)
> > is an ARMv8.1 part, I needed to add detecting of the variant also for this
> > difference. Also I simplify a little bit and combined the single core and
> > arch detecting cases so it would be easier to add variant.
> 
> Actually it is a bit more complex than what I said here, see below for
> the full table of options and what are enabled/disabled now.
> 
> > OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> > Tested -mcpu=native on both T88 pass 1 and T88 pass 2 to make sure it is
> > deecting the two seperately.
> 
> 
> Here is the final patch in this series updated; I changed the cpu name
> slightly and made sure I updated invoke.texi too.
> 
> The names are going to match the names in LLVM (worked with our LLVM
> engineer here at Cavium about the names).
> Here are the names recorded and
> -mpcu=thunderx:
> *        Matches part num 0xA0 (reserved for ThunderX 8x series)
> *        T88 Pass 2 scheduling
> *        Hardware prefetching (software prefetching disabled)
> *        LSE enabled
> *        no v8.1

This doesn't match the current LLVM proposal
( https://reviews.llvm.org/D24540 ) which enables full ARMv8.1-A support
for -mcpu=thunderx.

> -mcpu=thunderxt88:
> *        Matches part num 0xA1
> *        T88 Pass 2 scheduling
> *        software prefetching enabled
> *        LSE enabled
> *        no v8.1
> 
> -mcpu=thunderxt88p1 (only for GCC):
> *        Matches part num 0xA1, variant 0
> *        T88 Pass 1 scheduling
> *        software prefetching enabled
> *        no LSE enabled
> *        no v8.1
> 
> -mcpu=thunderxt81 and -mcpu=thunderxt83:
> *        Matches part num 0xA2/0xA3
> *        T88 Pass 2 scheduling
> *        Hardware prefetching (software prefetching disabled)
> *        LSE enabled
> *        v8.1

This looks like what has been added to LLVM as -mcpu=thunderx.

> I have not hooked up software vs hardware prefetching and the
> scheduler parts (the next patch will do part of that); both ARMv8.1-a
> and LSE parts are hooked up as those parts are only in
> aarch64-cores.def.
> 
> OK?  Bootstrapped and tested on ThunderX T88 and ThunderX T81
> (aarch64-linux-gnu).
> 
> Index: common/config/aarch64/aarch64-common.c
> ===================================================================
> --- common/config/aarch64/aarch64-common.c	(revision 241727)
> +++ common/config/aarch64/aarch64-common.c	(working copy)
> @@ -145,7 +145,7 @@ struct arch_to_arch_name
>     the default set of architectural feature flags they support.  */
>  static const struct processor_name_to_arch all_cores[] =
>  {
> -#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART) \
> +#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
>    {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
>  #include "config/aarch64/aarch64-cores.def"
>    {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
> Index: config/aarch64/aarch64-cores.def
> ===================================================================
> --- config/aarch64/aarch64-cores.def	(revision 241727)
> +++ config/aarch64/aarch64-cores.def	(working copy)
> @@ -21,7 +21,7 @@
>  
>     Before using #include to read this file, define a macro:
>  
> -      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART)
> +      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT)
>  
>     The CORE_NAME is the name of the core, represented as a string constant.
>     The CORE_IDENT is the name of the core, represented as an identifier.
> @@ -39,39 +39,45 @@
>     PART is the part number of the CPU.  On a GNU/Linux system it can be
>     found in /proc/cpuinfo.  For big.LITTLE systems this should use the
>     macro AARCH64_BIG_LITTLE where the big part number comes as the first
> -   argument to the macro and little is the second.  */
> +   argument to the macro and little is the second.
> +   VARIANT is the variant of the CPU.  In a GNU/Linux system it can found
> +   in /proc/cpuinfo.  If this is -1, this means it can match any variant.  */
>  
>  /* V8 Architecture Processors.  */
>  
>  /* ARM ('A') cores. */
> -AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04)
> -AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
> -AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
> -AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
> -AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09)
> +AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
> +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
> +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
> +AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
> +AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
>  
>  /* Samsung ('S') cores. */
> -AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001)
> +AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
>  
>  /* Qualcomm ('Q') cores. */
> -AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800)
> +AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800, -1)
>  
>  /* Cavium ('C') cores. */
> -AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
> +AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a0, -1)
> +AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,		   thunderx,  0x43, 0x0a1, 0)
> +AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a1, -1)

You probably want a comment somewhere here making it clear that the ordering
of thunderxt88p1 and thunderxt88 must remain as is, or detection will fail
(-1 will match before 0). Otherwise someone will come along and helpfully
put these in alphabetical order and cause you trouble...

> +AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a2, -1)
> +AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a3, -1)
>  
>  /* APM ('P') cores. */
> -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
> +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>  
>  /* V8.1 Architecture Processors.  */
>  
>  /* Broadcom ('B') cores. */
> -AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516)
> +AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516, -1)
>  
>  /* V8 big.LITTLE implementations.  */
>  
> -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03))
> -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03))
> -AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04))
> -AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03))
> +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
> +AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
> +AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
> +AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)

Why do variants for big.LITTLE get a single variant number, but you track
two variant numbers in the code below?

Thanks,
James
Jones, Joel Nov. 2, 2016, 12:11 p.m. UTC | #2
What is currently submitted for LLVM review was submitted before we determined this naming scheme. I will mark the current submittal as abandoned, as the scheduling model needs to be split out and revised.

Joel Jones 

Sent from my AArch64 powered iPhone

> On Nov 2, 2016, at 3:55 AM, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> 
>> On Tue, Nov 01, 2016 at 11:08:53AM -0700, Andrew Pinski wrote:
>>> On Tue, Nov 17, 2015 at 2:10 PM, Andrew Pinski <apinski@cavium.com> wrote:
>>> Since ThunderX T88 pass 1 (variant 0) is a ARMv8 part while pass 2 (variant 1)
>>> is an ARMv8.1 part, I needed to add detecting of the variant also for this
>>> difference. Also I simplify a little bit and combined the single core and
>>> arch detecting cases so it would be easier to add variant.
>> 
>> Actually it is a bit more complex than what I said here, see below for
>> the full table of options and what are enabled/disabled now.
>> 
>>> OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>>> Tested -mcpu=native on both T88 pass 1 and T88 pass 2 to make sure it is
>>> deecting the two seperately.
>> 
>> 
>> Here is the final patch in this series updated; I changed the cpu name
>> slightly and made sure I updated invoke.texi too.
>> 
>> The names are going to match the names in LLVM (worked with our LLVM
>> engineer here at Cavium about the names).
>> Here are the names recorded and
>> -mpcu=thunderx:
>> *        Matches part num 0xA0 (reserved for ThunderX 8x series)
>> *        T88 Pass 2 scheduling
>> *        Hardware prefetching (software prefetching disabled)
>> *        LSE enabled
>> *        no v8.1
> 
> This doesn't match the current LLVM proposal
> ( https://reviews.llvm.org/D24540 ) which enables full ARMv8.1-A support
> for -mcpu=thunderx.
> 
>> -mcpu=thunderxt88:
>> *        Matches part num 0xA1
>> *        T88 Pass 2 scheduling
>> *        software prefetching enabled
>> *        LSE enabled
>> *        no v8.1
>> 
>> -mcpu=thunderxt88p1 (only for GCC):
>> *        Matches part num 0xA1, variant 0
>> *        T88 Pass 1 scheduling
>> *        software prefetching enabled
>> *        no LSE enabled
>> *        no v8.1
>> 
>> -mcpu=thunderxt81 and -mcpu=thunderxt83:
>> *        Matches part num 0xA2/0xA3
>> *        T88 Pass 2 scheduling
>> *        Hardware prefetching (software prefetching disabled)
>> *        LSE enabled
>> *        v8.1
> 
> This looks like what has been added to LLVM as -mcpu=thunderx.
> 
>> I have not hooked up software vs hardware prefetching and the
>> scheduler parts (the next patch will do part of that); both ARMv8.1-a
>> and LSE parts are hooked up as those parts are only in
>> aarch64-cores.def.
>> 
>> OK?  Bootstrapped and tested on ThunderX T88 and ThunderX T81
>> (aarch64-linux-gnu).
>> 
>> Index: common/config/aarch64/aarch64-common.c
>> ===================================================================
>> --- common/config/aarch64/aarch64-common.c    (revision 241727)
>> +++ common/config/aarch64/aarch64-common.c    (working copy)
>> @@ -145,7 +145,7 @@ struct arch_to_arch_name
>>    the default set of architectural feature flags they support.  */
>> static const struct processor_name_to_arch all_cores[] =
>> {
>> -#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART) \
>> +#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
>>   {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
>> #include "config/aarch64/aarch64-cores.def"
>>   {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
>> Index: config/aarch64/aarch64-cores.def
>> ===================================================================
>> --- config/aarch64/aarch64-cores.def    (revision 241727)
>> +++ config/aarch64/aarch64-cores.def    (working copy)
>> @@ -21,7 +21,7 @@
>> 
>>    Before using #include to read this file, define a macro:
>> 
>> -      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART)
>> +      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT)
>> 
>>    The CORE_NAME is the name of the core, represented as a string constant.
>>    The CORE_IDENT is the name of the core, represented as an identifier.
>> @@ -39,39 +39,45 @@
>>    PART is the part number of the CPU.  On a GNU/Linux system it can be
>>    found in /proc/cpuinfo.  For big.LITTLE systems this should use the
>>    macro AARCH64_BIG_LITTLE where the big part number comes as the first
>> -   argument to the macro and little is the second.  */
>> +   argument to the macro and little is the second.
>> +   VARIANT is the variant of the CPU.  In a GNU/Linux system it can found
>> +   in /proc/cpuinfo.  If this is -1, this means it can match any variant.  */
>> 
>> /* V8 Architecture Processors.  */
>> 
>> /* ARM ('A') cores. */
>> -AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04)
>> -AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
>> -AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
>> -AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
>> -AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09)
>> +AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
>> +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
>> +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
>> +AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
>> +AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
>> 
>> /* Samsung ('S') cores. */
>> -AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001)
>> +AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
>> 
>> /* Qualcomm ('Q') cores. */
>> -AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800)
>> +AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800, -1)
>> 
>> /* Cavium ('C') cores. */
>> -AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
>> +AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a0, -1)
>> +AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,           thunderx,  0x43, 0x0a1, 0)
>> +AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a1, -1)
> 
> You probably want a comment somewhere here making it clear that the ordering
> of thunderxt88p1 and thunderxt88 must remain as is, or detection will fail
> (-1 will match before 0). Otherwise someone will come along and helpfully
> put these in alphabetical order and cause you trouble...
> 
>> +AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a2, -1)
>> +AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a3, -1)
>> 
>> /* APM ('P') cores. */
>> -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
>> +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>> 
>> /* V8.1 Architecture Processors.  */
>> 
>> /* Broadcom ('B') cores. */
>> -AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516)
>> +AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516, -1)
>> 
>> /* V8 big.LITTLE implementations.  */
>> 
>> -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03))
>> -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03))
>> -AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04))
>> -AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03))
>> +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
>> +AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
>> +AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
>> +AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
> 
> Why do variants for big.LITTLE get a single variant number, but you track
> two variant numbers in the code below?
> 
> Thanks,
> James
Andrew Pinski Nov. 6, 2016, 8:17 a.m. UTC | #3
/On Wed, Nov 2, 2016 at 3:54 AM, James Greenhalgh
<james.greenhalgh@arm.com> wrote:
> On Tue, Nov 01, 2016 at 11:08:53AM -0700, Andrew Pinski wrote:
>> On Tue, Nov 17, 2015 at 2:10 PM, Andrew Pinski <apinski@cavium.com> wrote:
>> > Since ThunderX T88 pass 1 (variant 0) is a ARMv8 part while pass 2 (variant 1)
>> > is an ARMv8.1 part, I needed to add detecting of the variant also for this
>> > difference. Also I simplify a little bit and combined the single core and
>> > arch detecting cases so it would be easier to add variant.
>>
>> Actually it is a bit more complex than what I said here, see below for
>> the full table of options and what are enabled/disabled now.
>>
>> > OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>> > Tested -mcpu=native on both T88 pass 1 and T88 pass 2 to make sure it is
>> > deecting the two seperately.
>>
>>
>> Here is the final patch in this series updated; I changed the cpu name
>> slightly and made sure I updated invoke.texi too.
>>
>> The names are going to match the names in LLVM (worked with our LLVM
>> engineer here at Cavium about the names).
>> Here are the names recorded and
>> -mpcu=thunderx:
>> *        Matches part num 0xA0 (reserved for ThunderX 8x series)
>> *        T88 Pass 2 scheduling
>> *        Hardware prefetching (software prefetching disabled)
>> *        LSE enabled
>> *        no v8.1
>
> This doesn't match the current LLVM proposal
> ( https://reviews.llvm.org/D24540 ) which enables full ARMv8.1-A support
> for -mcpu=thunderx.
>
>> -mcpu=thunderxt88:
>> *        Matches part num 0xA1
>> *        T88 Pass 2 scheduling
>> *        software prefetching enabled
>> *        LSE enabled
>> *        no v8.1
>>
>> -mcpu=thunderxt88p1 (only for GCC):
>> *        Matches part num 0xA1, variant 0
>> *        T88 Pass 1 scheduling
>> *        software prefetching enabled
>> *        no LSE enabled
>> *        no v8.1
>>
>> -mcpu=thunderxt81 and -mcpu=thunderxt83:
>> *        Matches part num 0xA2/0xA3
>> *        T88 Pass 2 scheduling
>> *        Hardware prefetching (software prefetching disabled)
>> *        LSE enabled
>> *        v8.1
>
> This looks like what has been added to LLVM as -mcpu=thunderx.

Yes I Know as I tried to mention we came up with this set after both
submission happened; next time both myself and my LLVM team will will
come to an agreement on names before posting to both LLVM and GCC.

>
>> I have not hooked up software vs hardware prefetching and the
>> scheduler parts (the next patch will do part of that); both ARMv8.1-a
>> and LSE parts are hooked up as those parts are only in
>> aarch64-cores.def.
>>
>> OK?  Bootstrapped and tested on ThunderX T88 and ThunderX T81
>> (aarch64-linux-gnu).
>>
>> Index: common/config/aarch64/aarch64-common.c
>> ===================================================================
>> --- common/config/aarch64/aarch64-common.c    (revision 241727)
>> +++ common/config/aarch64/aarch64-common.c    (working copy)
>> @@ -145,7 +145,7 @@ struct arch_to_arch_name
>>     the default set of architectural feature flags they support.  */
>>  static const struct processor_name_to_arch all_cores[] =
>>  {
>> -#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART) \
>> +#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
>>    {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
>>  #include "config/aarch64/aarch64-cores.def"
>>    {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
>> Index: config/aarch64/aarch64-cores.def
>> ===================================================================
>> --- config/aarch64/aarch64-cores.def  (revision 241727)
>> +++ config/aarch64/aarch64-cores.def  (working copy)
>> @@ -21,7 +21,7 @@
>>
>>     Before using #include to read this file, define a macro:
>>
>> -      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART)
>> +      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT)
>>
>>     The CORE_NAME is the name of the core, represented as a string constant.
>>     The CORE_IDENT is the name of the core, represented as an identifier.
>> @@ -39,39 +39,45 @@
>>     PART is the part number of the CPU.  On a GNU/Linux system it can be
>>     found in /proc/cpuinfo.  For big.LITTLE systems this should use the
>>     macro AARCH64_BIG_LITTLE where the big part number comes as the first
>> -   argument to the macro and little is the second.  */
>> +   argument to the macro and little is the second.
>> +   VARIANT is the variant of the CPU.  In a GNU/Linux system it can found
>> +   in /proc/cpuinfo.  If this is -1, this means it can match any variant.  */
>>
>>  /* V8 Architecture Processors.  */
>>
>>  /* ARM ('A') cores. */
>> -AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04)
>> -AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
>> -AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
>> -AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
>> -AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09)
>> +AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
>> +AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
>> +AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
>> +AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
>> +AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
>>
>>  /* Samsung ('S') cores. */
>> -AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001)
>> +AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
>>
>>  /* Qualcomm ('Q') cores. */
>> -AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800)
>> +AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800, -1)
>>
>>  /* Cavium ('C') cores. */
>> -AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
>> +AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a0, -1)
>> +AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,               thunderx,  0x43, 0x0a1, 0)
>> +AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a1, -1)
>
> You probably want a comment somewhere here making it clear that the ordering
> of thunderxt88p1 and thunderxt88 must remain as is, or detection will fail
> (-1 will match before 0). Otherwise someone will come along and helpfully
> put these in alphabetical order and cause you trouble...

I will do in the next submission.

>
>> +AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a2, -1)
>> +AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a3, -1)
>>
>>  /* APM ('P') cores. */
>> -AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
>> +AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>>
>>  /* V8.1 Architecture Processors.  */
>>
>>  /* Broadcom ('B') cores. */
>> -AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516)
>> +AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516, -1)
>>
>>  /* V8 big.LITTLE implementations.  */
>>
>> -AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03))
>> -AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03))
>> -AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04))
>> -AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03))
>> +AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
>> +AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
>> +AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
>> +AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
>
> Why do variants for big.LITTLE get a single variant number, but you track
> two variant numbers in the code below?

You could in theory only track the last variant.  But I was thinking
rather you cannot have a big.LITTLE where the set of big cores would
be the same and the set of LITTLE cores be the same.
Doing parsing of /proc/cpuinfo is hard way of getting a good idea of
what the cpu is.
Really we should be using readdir of /sys/devices/system/cpu to get
all cpus (cpuN).  And then read regs/identification/midr_el1 and parse
that.

Note that will only work for Linux 4.8 (and above, maybe 4.9 I can't
remember when exactly it went in).

Thanks,
Andrew

>
> Thanks,
> James
diff mbox

Patch

Index: common/config/aarch64/aarch64-common.c
===================================================================
--- common/config/aarch64/aarch64-common.c	(revision 241727)
+++ common/config/aarch64/aarch64-common.c	(working copy)
@@ -145,7 +145,7 @@  struct arch_to_arch_name
    the default set of architectural feature flags they support.  */
 static const struct processor_name_to_arch all_cores[] =
 {
-#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
   {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
 #include "config/aarch64/aarch64-cores.def"
   {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
Index: config/aarch64/aarch64-cores.def
===================================================================
--- config/aarch64/aarch64-cores.def	(revision 241727)
+++ config/aarch64/aarch64-cores.def	(working copy)
@@ -21,7 +21,7 @@ 
 
    Before using #include to read this file, define a macro:
 
-      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART)
+      AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT)
 
    The CORE_NAME is the name of the core, represented as a string constant.
    The CORE_IDENT is the name of the core, represented as an identifier.
@@ -39,39 +39,45 @@ 
    PART is the part number of the CPU.  On a GNU/Linux system it can be
    found in /proc/cpuinfo.  For big.LITTLE systems this should use the
    macro AARCH64_BIG_LITTLE where the big part number comes as the first
-   argument to the macro and little is the second.  */
+   argument to the macro and little is the second.
+   VARIANT is the variant of the CPU.  In a GNU/Linux system it can found
+   in /proc/cpuinfo.  If this is -1, this means it can match any variant.  */
 
 /* V8 Architecture Processors.  */
 
 /* ARM ('A') cores. */
-AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04)
-AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03)
-AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07)
-AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08)
-AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09)
+AARCH64_CORE("cortex-a35",  cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+AARCH64_CORE("cortex-a73",  cortexa73, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
 
 /* Samsung ('S') cores. */
-AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001)
+AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  0x53, 0x001, -1)
 
 /* Qualcomm ('Q') cores. */
-AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800)
+AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx,   0x51, 0x800, -1)
 
 /* Cavium ('C') cores. */
-AARCH64_CORE("thunderx",    thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a1)
+AARCH64_CORE("thunderx",      thunderx,      thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a0, -1)
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO,		   thunderx,  0x43, 0x0a1, 0)
+AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,    AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a1, -1)
+AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a2, -1)
+AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8_1A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_LSE, thunderx,  0x43, 0x0a3, -1)
 
 /* APM ('P') cores. */
-AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000)
+AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
 
 /* V8.1 Architecture Processors.  */
 
 /* Broadcom ('B') cores. */
-AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516)
+AARCH64_CORE("vulcan",  vulcan, cortexa57, 8_1A,  AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, 0x42, 0x516, -1)
 
 /* V8 big.LITTLE implementations.  */
 
-AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03))
-AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03))
-AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04))
-AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03))
+AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
+AARCH64_CORE("cortex-a72.cortex-a53",  cortexa72cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
+AARCH64_CORE("cortex-a73.cortex-a35",  cortexa73cortexa35, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
+AARCH64_CORE("cortex-a73.cortex-a53",  cortexa73cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
 
 #undef AARCH64_CORE
Index: config/aarch64/aarch64-opts.h
===================================================================
--- config/aarch64/aarch64-opts.h	(revision 241727)
+++ config/aarch64/aarch64-opts.h	(working copy)
@@ -25,7 +25,7 @@ 
 /* The various cores that implement AArch64.  */
 enum aarch64_processor
 {
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
   INTERNAL_IDENT,
 #include "aarch64-cores.def"
   /* Used to indicate that no processor has been specified.  */
Index: config/aarch64/aarch64-tune.md
===================================================================
--- config/aarch64/aarch64-tune.md	(revision 241727)
+++ config/aarch64/aarch64-tune.md	(working copy)
@@ -1,5 +1,5 @@ 
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,qdf24xx,thunderx,xgene1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
Index: config/aarch64/aarch64.c
===================================================================
--- config/aarch64/aarch64.c	(revision 241727)
+++ config/aarch64/aarch64.c	(working copy)
@@ -831,7 +831,7 @@  static const struct processor all_archit
 /* Processor cores implementing AArch64.  */
 static const struct processor all_cores[] =
 {
-#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
   {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH,				\
   all_architectures[AARCH64_ARCH_##ARCH].architecture_version,	\
   FLAGS, &COSTS##_tunings},
Index: config/aarch64/aarch64.h
===================================================================
--- config/aarch64/aarch64.h	(revision 241727)
+++ config/aarch64/aarch64.h	(working copy)
@@ -490,7 +490,7 @@  enum reg_class
 
 enum target_cpus
 {
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
+#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
   TARGET_CPU_##INTERNAL_IDENT,
 #include "aarch64-cores.def"
   TARGET_CPU_generic
Index: config/aarch64/driver-aarch64.c
===================================================================
--- config/aarch64/driver-aarch64.c	(revision 241727)
+++ config/aarch64/driver-aarch64.c	(working copy)
@@ -48,6 +48,7 @@  struct aarch64_core_data
   const char* arch;
   unsigned char implementer_id; /* Exactly 8 bits */
   unsigned int part_no; /* 12 bits + 12 bits */
+  unsigned variant;
   const unsigned long flags;
 };
 
@@ -55,14 +56,15 @@  struct aarch64_core_data
   (((BIG)&0xFFFu) << 12 | ((LITTLE) & 0xFFFu))
 #define INVALID_IMP ((unsigned char) -1)
 #define INVALID_CORE ((unsigned)-1)
+#define ALL_VARIANTS ((unsigned)-1)
 
-#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
-  { CORE_NAME, #ARCH, IMP, PART, FLAGS },
+#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
+  { CORE_NAME, #ARCH, IMP, PART, VARIANT, FLAGS },
 
 static struct aarch64_core_data aarch64_cpu_data[] =
 {
 #include "aarch64-cores.def"
-  { NULL, NULL, INVALID_IMP, INVALID_CORE, 0 }
+  { NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS, 0 }
 };
 
 
@@ -160,7 +162,6 @@  contains_core_p (unsigned *arr, unsigned
 const char *
 host_detect_local_cpu (int argc, const char **argv)
 {
-  const char *arch_id = NULL;
   const char *res = NULL;
   static const int num_exts = ARRAY_SIZE (aarch64_extensions);
   char buf[128];
@@ -172,6 +173,8 @@  host_detect_local_cpu (int argc, const c
   unsigned char imp = INVALID_IMP;
   unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
   unsigned int n_cores = 0;
+  unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS };
+  unsigned int n_variants = 0;
   bool processed_exts = false;
   const char *ext_string = "";
   unsigned long extension_flags = 0;
@@ -215,6 +218,19 @@  host_detect_local_cpu (int argc, const c
 	    goto not_found;
 	}
 
+      if (strstr (buf, "variant") != NULL)
+	{
+	  unsigned cvariant = parse_field (buf);
+	  if (!contains_core_p (variants, cvariant))
+	    {
+              if (n_variants == 2)
+                goto not_found;
+
+              variants[n_variants++] = cvariant;
+	    }
+          continue;
+        }
+
       if (strstr (buf, "part") != NULL)
 	{
 	  unsigned ccore = parse_field (buf);
@@ -267,33 +283,48 @@  host_detect_local_cpu (int argc, const c
   f = NULL;
 
   /* Weird cpuinfo format that we don't know how to handle.  */
-  if (n_cores == 0 || n_cores > 2 || imp == INVALID_IMP)
+  if (n_cores == 0
+      || n_cores > 2
+      || (n_cores == 1 && n_variants != 1)
+      || imp == INVALID_IMP)
     goto not_found;
 
-  if (arch)
+  /* Simple case, one core type or just looking for the arch. */
+  if (n_cores == 1 || arch)
     {
       /* Search for one of the cores in the list. */
       for (i = 0; aarch64_cpu_data[i].name != NULL; i++)
 	if (aarch64_cpu_data[i].implementer_id == imp
-	    && contains_core_p (cores, aarch64_cpu_data[i].part_no))
-	  {
-	    arch_id = aarch64_cpu_data[i].arch;
-	    break;
-	  }
-      if (!arch_id)
-	goto not_found;
+            && cores[0] == aarch64_cpu_data[i].part_no
+            && (aarch64_cpu_data[i].variant == ALL_VARIANTS
+                || variants[0] == aarch64_cpu_data[i].variant))
+	  break;
+      if (aarch64_cpu_data[i].name == NULL)
+        goto not_found;
 
-      struct aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id);
+      if (arch)
+	{
+	  const char *arch_id = aarch64_cpu_data[i].arch;
+	  aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id);
 
-      /* We got some arch indentifier that's not in aarch64-arches.def?  */
-      if (!arch_info)
-	goto not_found;
+	  /* We got some arch indentifier that's not in aarch64-arches.def?  */
+	  if (!arch_info)
+	    goto not_found;
 
-      res = concat ("-march=", arch_info->name, NULL);
-      default_flags = arch_info->flags;
+	  res = concat ("-march=", arch_info->name, NULL);
+	  default_flags = arch_info->flags;
+	}
+      else
+	{
+	  default_flags = aarch64_cpu_data[i].flags;
+	  res = concat ("-m",
+			cpu ? "cpu" : "tune", "=",
+			aarch64_cpu_data[i].name,
+			NULL);
+	}
     }
   /* We have big.LITTLE.  */
-  else if (n_cores == 2)
+  else
     {
       for (i = 0; aarch64_cpu_data[i].name != NULL; i++)
 	{
@@ -311,24 +342,6 @@  host_detect_local_cpu (int argc, const c
       if (!res)
 	goto not_found;
     }
-  /* The simple, non-big.LITTLE case.  */
-  else
-    {
-      int core_idx = -1;
-      for (i = 0; aarch64_cpu_data[i].name != NULL; i++)
-	if (cores[0] == aarch64_cpu_data[i].part_no
-	    && aarch64_cpu_data[i].implementer_id == imp)
-	  {
-	    core_idx = i;
-	    break;
-	  }
-      if (core_idx == -1)
-	goto not_found;
-
-      res = concat ("-m", cpu ? "cpu" : "tune", "=",
-		    aarch64_cpu_data[core_idx].name, NULL);
-      default_flags = aarch64_cpu_data[core_idx].flags;
-    }
 
   if (tune)
     return res;
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 241727)
+++ doc/invoke.texi	(working copy)
@@ -13834,7 +13834,8 @@  Specify the name of the target processor
 performance of the code.  Permissible values for this option are:
 @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a57},
 @samp{cortex-a72}, @samp{cortex-a73}, @samp{exynos-m1}, @samp{qdf24xx},
-@samp{thunderx}, @samp{xgene1}, @samp{vulcan}, @samp{cortex-a57.cortex-a53},
+@samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
+@samp{thunderxt83}, @samp{xgene1}, @samp{vulcan}, @samp{cortex-a57.cortex-a53},
 @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
 @samp{cortex-a73.cortex-a53}, @samp{native}.