diff mbox series

[v3,aarch64] Add CPU support for Ampere Computing's eMAG.

Message ID 20181120180055.16226-1-christoph.muellner@theobroma-systems.com
State New
Headers show
Series [v3,aarch64] Add CPU support for Ampere Computing's eMAG. | expand

Commit Message

Christoph Muellner Nov. 20, 2018, 6 p.m. UTC
Tested with "make check" and no regressions found.

This patch depends on the struct xgene1_prefetch_tune,
which has been acknowledged already:
https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html

*** gcc/ChangeLog ***

2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>

	* config/aarch64/aarch64-cores.def: Define emag.
	* config/aarch64/aarch64-tune.md: Regenerated with emag.
	* config/aarch64/aarch64.c (emag_tunings): New struct.
	* doc/invoke.texi: Document mtune value.

Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
---
 gcc/config/aarch64/aarch64-cores.def |  3 +++
 gcc/config/aarch64/aarch64-tune.md   |  2 +-
 gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
 gcc/doc/invoke.texi                  |  2 +-
 4 files changed, 30 insertions(+), 2 deletions(-)

Comments

Kyrill Tkachov Nov. 21, 2018, 10:26 a.m. UTC | #1
Hi Christoph,

On 20/11/18 18:00, Christoph Muellner wrote:
> Tested with "make check" and no regressions found.
>
> This patch depends on the struct xgene1_prefetch_tune,
> which has been acknowledged already:
> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html
>
> *** gcc/ChangeLog ***
>
> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>
>
> 	* config/aarch64/aarch64-cores.def: Define emag.
> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.
> 	* config/aarch64/aarch64.c (emag_tunings): New struct.
> 	* doc/invoke.texi: Document mtune value.

This looks ok to me but you'll need a maintainer to approve.
You mentioned this depends on your previously approved patches.
Do you have write access or do you need someone to commit them for you?

Thanks,
Kyrill

> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
> ---
>   gcc/config/aarch64/aarch64-cores.def |  3 +++
>   gcc/config/aarch64/aarch64-tune.md   |  2 +-
>   gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
>   gcc/doc/invoke.texi                  |  2 +-
>   4 files changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 1f3ac56..68cca00 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
>   AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
>   AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
>   
> +/* Ampere Computing cores. */
> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
> +
>   /* APM ('P') cores. */
>   AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>   
> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
> index fade1d4..2fc7f03 100644
> --- a/gcc/config/aarch64/aarch64-tune.md
> +++ b/gcc/config/aarch64/aarch64-tune.md
> @@ -1,5 +1,5 @@
>   ;; -*- buffer-read-only: t -*-
>   ;; Generated automatically by gentune.sh from aarch64-cores.def
>   (define_attr "tune"
> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>   	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index f7f88a9..995aafe 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =
>     &xgene1_prefetch_tune
>   };
>   
> +static const struct tune_params emag_tunings =
> +{
> +  &xgene1_extra_costs,
> +  &xgene1_addrcost_table,
> +  &xgene1_regmove_cost,
> +  &xgene1_vector_cost,
> +  &generic_branch_cost,
> +  &xgene1_approx_modes,
> +  6, /* memmov_cost  */
> +  4, /* issue_rate  */
> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */
> +  "16",	/* function_align.  */
> +  "16",	/* jump_align.  */
> +  "16",	/* loop_align.  */
> +  2,	/* int_reassoc_width.  */
> +  4,	/* fp_reassoc_width.  */
> +  1,	/* vec_reassoc_width.  */
> +  2,	/* min_div_recip_mul_sf.  */
> +  2,	/* min_div_recip_mul_df.  */
> +  17,	/* max_case_values.  */
> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
> +  &xgene1_prefetch_tune
> +};
> +
>   static const struct tune_params qdf24xx_tunings =
>   {
>     &qdf24xx_extra_costs,
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e016dce..ac81fb2 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the
>   performance of the code.  Permissible values for this option are:
>   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
>   @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
>   @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>   @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
Christoph Muellner Nov. 21, 2018, 11:15 a.m. UTC | #2
> On 21.11.2018, at 11:26, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:
> 
> Hi Christoph,
> 
> On 20/11/18 18:00, Christoph Muellner wrote:
>> Tested with "make check" and no regressions found.
>> 
>> This patch depends on the struct xgene1_prefetch_tune,
>> which has been acknowledged already:
>> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html
>> 
>> *** gcc/ChangeLog ***
>> 
>> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>
>> 
>> 	* config/aarch64/aarch64-cores.def: Define emag.
>> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.
>> 	* config/aarch64/aarch64.c (emag_tunings): New struct.
>> 	* doc/invoke.texi: Document mtune value.
> 
> This looks ok to me but you'll need a maintainer to approve.
> You mentioned this depends on your previously approved patches.
> Do you have write access or do you need someone to commit them for you?

I'd don't have write access.
But I have already contacted somebody with write access to get my ACK'ed changes in.

Thanks,
Christoph

> 
> Thanks,
> Kyrill
> 
>> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
>> ---
>>  gcc/config/aarch64/aarch64-cores.def |  3 +++
>>  gcc/config/aarch64/aarch64-tune.md   |  2 +-
>>  gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
>>  gcc/doc/invoke.texi                  |  2 +-
>>  4 files changed, 30 insertions(+), 2 deletions(-)
>> 
>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>> index 1f3ac56..68cca00 100644
>> --- a/gcc/config/aarch64/aarch64-cores.def
>> +++ b/gcc/config/aarch64/aarch64-cores.def
>> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
>>  AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
>>  AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
>>  +/* Ampere Computing cores. */
>> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
>> +
>>  /* APM ('P') cores. */
>>  AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>>  diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
>> index fade1d4..2fc7f03 100644
>> --- a/gcc/config/aarch64/aarch64-tune.md
>> +++ b/gcc/config/aarch64/aarch64-tune.md
>> @@ -1,5 +1,5 @@
>>  ;; -*- buffer-read-only: t -*-
>>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>>  (define_attr "tune"
>> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>>  	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index f7f88a9..995aafe 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =
>>    &xgene1_prefetch_tune
>>  };
>>  +static const struct tune_params emag_tunings =
>> +{
>> +  &xgene1_extra_costs,
>> +  &xgene1_addrcost_table,
>> +  &xgene1_regmove_cost,
>> +  &xgene1_vector_cost,
>> +  &generic_branch_cost,
>> +  &xgene1_approx_modes,
>> +  6, /* memmov_cost  */
>> +  4, /* issue_rate  */
>> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */
>> +  "16",	/* function_align.  */
>> +  "16",	/* jump_align.  */
>> +  "16",	/* loop_align.  */
>> +  2,	/* int_reassoc_width.  */
>> +  4,	/* fp_reassoc_width.  */
>> +  1,	/* vec_reassoc_width.  */
>> +  2,	/* min_div_recip_mul_sf.  */
>> +  2,	/* min_div_recip_mul_df.  */
>> +  17,	/* max_case_values.  */
>> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
>> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
>> +  &xgene1_prefetch_tune
>> +};
>> +
>>  static const struct tune_params qdf24xx_tunings =
>>  {
>>    &qdf24xx_extra_costs,
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index e016dce..ac81fb2 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the
>>  performance of the code.  Permissible values for this option are:
>>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
>> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
>>  @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
>>  @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>  @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
>
Philipp Tomsich Nov. 21, 2018, 12:26 p.m. UTC | #3
This is currently slowed down by the speed of subversion (as my subversion tree
was outdated).  So it should only be a matter of days ... ;-)

> On 21.11.2018, at 12:15, Christoph Müllner <christoph.muellner@theobroma-systems.com> wrote:
> 
>> 
>> On 21.11.2018, at 11:26, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:
>> 
>> Hi Christoph,
>> 
>> On 20/11/18 18:00, Christoph Muellner wrote:
>>> Tested with "make check" and no regressions found.
>>> 
>>> This patch depends on the struct xgene1_prefetch_tune,
>>> which has been acknowledged already:
>>> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html
>>> 
>>> *** gcc/ChangeLog ***
>>> 
>>> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>
>>> 
>>> 	* config/aarch64/aarch64-cores.def: Define emag.
>>> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.
>>> 	* config/aarch64/aarch64.c (emag_tunings): New struct.
>>> 	* doc/invoke.texi: Document mtune value.
>> 
>> This looks ok to me but you'll need a maintainer to approve.
>> You mentioned this depends on your previously approved patches.
>> Do you have write access or do you need someone to commit them for you?
> 
> I'd don't have write access.
> But I have already contacted somebody with write access to get my ACK'ed changes in.
> 
> Thanks,
> Christoph
> 
>> 
>> Thanks,
>> Kyrill
>> 
>>> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
>>> ---
>>> gcc/config/aarch64/aarch64-cores.def |  3 +++
>>> gcc/config/aarch64/aarch64-tune.md   |  2 +-
>>> gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
>>> gcc/doc/invoke.texi                  |  2 +-
>>> 4 files changed, 30 insertions(+), 2 deletions(-)
>>> 
>>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>>> index 1f3ac56..68cca00 100644
>>> --- a/gcc/config/aarch64/aarch64-cores.def
>>> +++ b/gcc/config/aarch64/aarch64-cores.def
>>> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
>>> AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
>>> AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
>>> +/* Ampere Computing cores. */
>>> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
>>> +
>>> /* APM ('P') cores. */
>>> AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>>> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
>>> index fade1d4..2fc7f03 100644
>>> --- a/gcc/config/aarch64/aarch64-tune.md
>>> +++ b/gcc/config/aarch64/aarch64-tune.md
>>> @@ -1,5 +1,5 @@
>>> ;; -*- buffer-read-only: t -*-
>>> ;; Generated automatically by gentune.sh from aarch64-cores.def
>>> (define_attr "tune"
>>> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>>> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>>> 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>>> index f7f88a9..995aafe 100644
>>> --- a/gcc/config/aarch64/aarch64.c
>>> +++ b/gcc/config/aarch64/aarch64.c
>>> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =
>>>   &xgene1_prefetch_tune
>>> };
>>> +static const struct tune_params emag_tunings =
>>> +{
>>> +  &xgene1_extra_costs,
>>> +  &xgene1_addrcost_table,
>>> +  &xgene1_regmove_cost,
>>> +  &xgene1_vector_cost,
>>> +  &generic_branch_cost,
>>> +  &xgene1_approx_modes,
>>> +  6, /* memmov_cost  */
>>> +  4, /* issue_rate  */
>>> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */
>>> +  "16",	/* function_align.  */
>>> +  "16",	/* jump_align.  */
>>> +  "16",	/* loop_align.  */
>>> +  2,	/* int_reassoc_width.  */
>>> +  4,	/* fp_reassoc_width.  */
>>> +  1,	/* vec_reassoc_width.  */
>>> +  2,	/* min_div_recip_mul_sf.  */
>>> +  2,	/* min_div_recip_mul_df.  */
>>> +  17,	/* max_case_values.  */
>>> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
>>> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
>>> +  &xgene1_prefetch_tune
>>> +};
>>> +
>>> static const struct tune_params qdf24xx_tunings =
>>> {
>>>   &qdf24xx_extra_costs,
>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>> index e016dce..ac81fb2 100644
>>> --- a/gcc/doc/invoke.texi
>>> +++ b/gcc/doc/invoke.texi
>>> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the
>>> performance of the code.  Permissible values for this option are:
>>> @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>> @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>>> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
>>> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
>>> @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
>>> @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>> @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
Richard Earnshaw (lists) Nov. 21, 2018, 4:44 p.m. UTC | #4
On 20/11/2018 18:00, Christoph Muellner wrote:
> Tested with "make check" and no regressions found.
> 
> This patch depends on the struct xgene1_prefetch_tune,
> which has been acknowledged already:
> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html
> 
> *** gcc/ChangeLog ***
> 
> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>
> 
> 	* config/aarch64/aarch64-cores.def: Define emag.
> 	* config/aarch64/aarch64-tune.md: Regenerated with emag.
> 	* config/aarch64/aarch64.c (emag_tunings): New struct.
> 	* doc/invoke.texi: Document mtune value.

OK.

R.

> 
> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
> ---
>  gcc/config/aarch64/aarch64-cores.def |  3 +++
>  gcc/config/aarch64/aarch64-tune.md   |  2 +-
>  gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
>  gcc/doc/invoke.texi                  |  2 +-
>  4 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 1f3ac56..68cca00 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
>  AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
>  AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
>  
> +/* Ampere Computing cores. */
> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
> +
>  /* APM ('P') cores. */
>  AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>  
> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
> index fade1d4..2fc7f03 100644
> --- a/gcc/config/aarch64/aarch64-tune.md
> +++ b/gcc/config/aarch64/aarch64-tune.md
> @@ -1,5 +1,5 @@
>  ;; -*- buffer-read-only: t -*-
>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>  (define_attr "tune"
> -	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
> +	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>  	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index f7f88a9..995aafe 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =
>    &xgene1_prefetch_tune
>  };
>  
> +static const struct tune_params emag_tunings =
> +{
> +  &xgene1_extra_costs,
> +  &xgene1_addrcost_table,
> +  &xgene1_regmove_cost,
> +  &xgene1_vector_cost,
> +  &generic_branch_cost,
> +  &xgene1_approx_modes,
> +  6, /* memmov_cost  */
> +  4, /* issue_rate  */
> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */
> +  "16",	/* function_align.  */
> +  "16",	/* jump_align.  */
> +  "16",	/* loop_align.  */
> +  2,	/* int_reassoc_width.  */
> +  4,	/* fp_reassoc_width.  */
> +  1,	/* vec_reassoc_width.  */
> +  2,	/* min_div_recip_mul_sf.  */
> +  2,	/* min_div_recip_mul_df.  */
> +  17,	/* max_case_values.  */
> +  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
> +  &xgene1_prefetch_tune
> +};
> +
>  static const struct tune_params qdf24xx_tunings =
>  {
>    &qdf24xx_extra_costs,
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e016dce..ac81fb2 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the
>  performance of the code.  Permissible values for this option are:
>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
>  @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
>  @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>  @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
>
Andrew Pinski Nov. 22, 2018, 7:54 a.m. UTC | #5
One small comment.

On Tue, Nov 20, 2018 at 10:01 AM Christoph Muellner
<christoph.muellner@theobroma-systems.com> wrote:
>
> Tested with "make check" and no regressions found.
>
> This patch depends on the struct xgene1_prefetch_tune,
> which has been acknowledged already:
> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html
>
> *** gcc/ChangeLog ***
>
> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>
>
>         * config/aarch64/aarch64-cores.def: Define emag.
>         * config/aarch64/aarch64-tune.md: Regenerated with emag.
>         * config/aarch64/aarch64.c (emag_tunings): New struct.
>         * doc/invoke.texi: Document mtune value.
>
> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
> ---
>  gcc/config/aarch64/aarch64-cores.def |  3 +++
>  gcc/config/aarch64/aarch64-tune.md   |  2 +-
>  gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
>  gcc/doc/invoke.texi                  |  2 +-
>  4 files changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 1f3ac56..68cca00 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
>  AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
>  AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
>
> +/* Ampere Computing cores. */
> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)

I think you should add a comment to say why this order is required
like above for thunderxt88p1.

Thanks,
Andrew Pinski

> +
>  /* APM ('P') cores. */
>  AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>
> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
> index fade1d4..2fc7f03 100644
> --- a/gcc/config/aarch64/aarch64-tune.md
> +++ b/gcc/config/aarch64/aarch64-tune.md
> @@ -1,5 +1,5 @@
>  ;; -*- buffer-read-only: t -*-
>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>  (define_attr "tune"
> -       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
> +       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>         (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index f7f88a9..995aafe 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =
>    &xgene1_prefetch_tune
>  };
>
> +static const struct tune_params emag_tunings =
> +{
> +  &xgene1_extra_costs,
> +  &xgene1_addrcost_table,
> +  &xgene1_regmove_cost,
> +  &xgene1_vector_cost,
> +  &generic_branch_cost,
> +  &xgene1_approx_modes,
> +  6, /* memmov_cost  */
> +  4, /* issue_rate  */
> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */
> +  "16",        /* function_align.  */
> +  "16",        /* jump_align.  */
> +  "16",        /* loop_align.  */
> +  2,   /* int_reassoc_width.  */
> +  4,   /* fp_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2,   /* min_div_recip_mul_df.  */
> +  17,  /* max_case_values.  */
> +  tune_params::AUTOPREFETCHER_OFF,     /* autoprefetcher_model.  */
> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),       /* tune_flags.  */
> +  &xgene1_prefetch_tune
> +};
> +
>  static const struct tune_params qdf24xx_tunings =
>  {
>    &qdf24xx_extra_costs,
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e016dce..ac81fb2 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the
>  performance of the code.  Permissible values for this option are:
>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
>  @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
>  @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>  @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
> --
> 2.9.5
>
Christoph Muellner Nov. 22, 2018, 8 a.m. UTC | #6
On 11/22/18 8:54 AM, Andrew Pinski wrote:
> One small comment.
> 
> On Tue, Nov 20, 2018 at 10:01 AM Christoph Muellner
> <christoph.muellner@theobroma-systems.com> wrote:
>>
>> Tested with "make check" and no regressions found.
>>
>> This patch depends on the struct xgene1_prefetch_tune,
>> which has been acknowledged already:
>> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html
>>
>> *** gcc/ChangeLog ***
>>
>> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>
>>
>>         * config/aarch64/aarch64-cores.def: Define emag.
>>         * config/aarch64/aarch64-tune.md: Regenerated with emag.
>>         * config/aarch64/aarch64.c (emag_tunings): New struct.
>>         * doc/invoke.texi: Document mtune value.
>>
>> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
>> ---
>>  gcc/config/aarch64/aarch64-cores.def |  3 +++
>>  gcc/config/aarch64/aarch64-tune.md   |  2 +-
>>  gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
>>  gcc/doc/invoke.texi                  |  2 +-
>>  4 files changed, 30 insertions(+), 2 deletions(-)
>>
>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>> index 1f3ac56..68cca00 100644
>> --- a/gcc/config/aarch64/aarch64-cores.def
>> +++ b/gcc/config/aarch64/aarch64-cores.def
>> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
>>  AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
>>  AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
>>
>> +/* Ampere Computing cores. */
>> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
> 
> I think you should add a comment to say why this order is required
> like above for thunderxt88p1.

Ok, will do.

Thanks,
Christoph

> 
> Thanks,
> Andrew Pinski
> 
>> +
>>  /* APM ('P') cores. */
>>  AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>>
>> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
>> index fade1d4..2fc7f03 100644
>> --- a/gcc/config/aarch64/aarch64-tune.md
>> +++ b/gcc/config/aarch64/aarch64-tune.md
>> @@ -1,5 +1,5 @@
>>  ;; -*- buffer-read-only: t -*-
>>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>>  (define_attr "tune"
>> -       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>> +       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>>         (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index f7f88a9..995aafe 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =
>>    &xgene1_prefetch_tune
>>  };
>>
>> +static const struct tune_params emag_tunings =
>> +{
>> +  &xgene1_extra_costs,
>> +  &xgene1_addrcost_table,
>> +  &xgene1_regmove_cost,
>> +  &xgene1_vector_cost,
>> +  &generic_branch_cost,
>> +  &xgene1_approx_modes,
>> +  6, /* memmov_cost  */
>> +  4, /* issue_rate  */
>> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */
>> +  "16",        /* function_align.  */
>> +  "16",        /* jump_align.  */
>> +  "16",        /* loop_align.  */
>> +  2,   /* int_reassoc_width.  */
>> +  4,   /* fp_reassoc_width.  */
>> +  1,   /* vec_reassoc_width.  */
>> +  2,   /* min_div_recip_mul_sf.  */
>> +  2,   /* min_div_recip_mul_df.  */
>> +  17,  /* max_case_values.  */
>> +  tune_params::AUTOPREFETCHER_OFF,     /* autoprefetcher_model.  */
>> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),       /* tune_flags.  */
>> +  &xgene1_prefetch_tune
>> +};
>> +
>>  static const struct tune_params qdf24xx_tunings =
>>  {
>>    &qdf24xx_extra_costs,
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index e016dce..ac81fb2 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the
>>  performance of the code.  Permissible values for this option are:
>>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
>> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
>>  @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
>>  @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>  @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
>> --
>> 2.9.5
>>
Kyrill Tkachov Nov. 26, 2018, 5:41 p.m. UTC | #7
Hi Christoph,

On 22/11/18 08:00, Christoph Müllner wrote:
>
> On 11/22/18 8:54 AM, Andrew Pinski wrote:
>> One small comment.
>>
>> On Tue, Nov 20, 2018 at 10:01 AM Christoph Muellner
>> <christoph.muellner@theobroma-systems.com> wrote:
>>> Tested with "make check" and no regressions found.
>>>
>>> This patch depends on the struct xgene1_prefetch_tune,
>>> which has been acknowledged already:
>>> https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00985.html
>>>
>>> *** gcc/ChangeLog ***
>>>
>>> 2018-xx-xx  Christoph Muellner <christoph.muellner@theobroma-system.com>
>>>
>>>          * config/aarch64/aarch64-cores.def: Define emag.
>>>          * config/aarch64/aarch64-tune.md: Regenerated with emag.
>>>          * config/aarch64/aarch64.c (emag_tunings): New struct.
>>>          * doc/invoke.texi: Document mtune value.
>>>
>>> Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
>>> ---
>>>   gcc/config/aarch64/aarch64-cores.def |  3 +++
>>>   gcc/config/aarch64/aarch64-tune.md   |  2 +-
>>>   gcc/config/aarch64/aarch64.c         | 25 +++++++++++++++++++++++++
>>>   gcc/doc/invoke.texi                  |  2 +-
>>>   4 files changed, 30 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>>> index 1f3ac56..68cca00 100644
>>> --- a/gcc/config/aarch64/aarch64-cores.def
>>> +++ b/gcc/config/aarch64/aarch64-cores.def
>>> @@ -61,6 +61,9 @@ AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
>>>   AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
>>>   AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
>>>
>>> +/* Ampere Computing cores. */
>>> +AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
>> I think you should add a comment to say why this order is required
>> like above for thunderxt88p1.
> Ok, will do.

I see this has now been committed.
Can you please create a patch for the changes.html page (https://gcc.gnu.org/gcc-9/changes.html)
mentioning the new CPU support? There are examples on that page of the kind of appropriate wording.
You can find details of how to do this at https://gcc.gnu.org/about.html

Thanks,
Kyrill

> Thanks,
> Christoph
>
>> Thanks,
>> Andrew Pinski
>>
>>> +
>>>   /* APM ('P') cores. */
>>>   AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
>>>
>>> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
>>> index fade1d4..2fc7f03 100644
>>> --- a/gcc/config/aarch64/aarch64-tune.md
>>> +++ b/gcc/config/aarch64/aarch64-tune.md
>>> @@ -1,5 +1,5 @@
>>>   ;; -*- buffer-read-only: t -*-
>>>   ;; Generated automatically by gentune.sh from aarch64-cores.def
>>>   (define_attr "tune"
>>> -       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>>> +       "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
>>>          (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>>> index f7f88a9..995aafe 100644
>>> --- a/gcc/config/aarch64/aarch64.c
>>> +++ b/gcc/config/aarch64/aarch64.c
>>> @@ -957,6 +957,31 @@ static const struct tune_params xgene1_tunings =
>>>     &xgene1_prefetch_tune
>>>   };
>>>
>>> +static const struct tune_params emag_tunings =
>>> +{
>>> +  &xgene1_extra_costs,
>>> +  &xgene1_addrcost_table,
>>> +  &xgene1_regmove_cost,
>>> +  &xgene1_vector_cost,
>>> +  &generic_branch_cost,
>>> +  &xgene1_approx_modes,
>>> +  6, /* memmov_cost  */
>>> +  4, /* issue_rate  */
>>> +  AARCH64_FUSE_NOTHING, /* fusible_ops  */
>>> +  "16",        /* function_align.  */
>>> +  "16",        /* jump_align.  */
>>> +  "16",        /* loop_align.  */
>>> +  2,   /* int_reassoc_width.  */
>>> +  4,   /* fp_reassoc_width.  */
>>> +  1,   /* vec_reassoc_width.  */
>>> +  2,   /* min_div_recip_mul_sf.  */
>>> +  2,   /* min_div_recip_mul_df.  */
>>> +  17,  /* max_case_values.  */
>>> +  tune_params::AUTOPREFETCHER_OFF,     /* autoprefetcher_model.  */
>>> +  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),       /* tune_flags.  */
>>> +  &xgene1_prefetch_tune
>>> +};
>>> +
>>>   static const struct tune_params qdf24xx_tunings =
>>>   {
>>>     &qdf24xx_extra_costs,
>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>> index e016dce..ac81fb2 100644
>>> --- a/gcc/doc/invoke.texi
>>> +++ b/gcc/doc/invoke.texi
>>> @@ -15288,7 +15288,7 @@ Specify the name of the target processor for which GCC should tune the
>>>   performance of the code.  Permissible values for this option are:
>>>   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>>   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>>> -@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
>>> +@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
>>>   @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
>>>   @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>>   @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},
>>> --
>>> 2.9.5
>>>
Gerald Pfeifer Dec. 29, 2018, 2:16 a.m. UTC | #8
On Mon, 26 Nov 2018, Kyrill  Tkachov wrote:
> Hi Christoph,
:
> Can you please create a patch for the changes.html page
> (https://gcc.gnu.org/gcc-9/changes.html)
> mentioning the new CPU support? There are examples on that page of the kind of
> appropriate wording.
> You can find details of how to do this at https://gcc.gnu.org/about.html

Hear, hear, Kyrill! :-)

Christoph, I haven't seen a change yet.  If I've missed it, or you
need help, please give me a shout.

Gerald
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 1f3ac56..68cca00 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -61,6 +61,9 @@  AARCH64_CORE("thunderxt88",   thunderxt88,   thunderx,  8A,  AARCH64_FL_FOR_ARCH
 AARCH64_CORE("thunderxt81",   thunderxt81,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a2, -1)
 AARCH64_CORE("thunderxt83",   thunderxt83,   thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  0x43, 0x0a3, -1)
 
+/* Ampere Computing cores. */
+AARCH64_CORE("emag",        emag,      xgene1,    8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+
 /* APM ('P') cores. */
 AARCH64_CORE("xgene1",      xgene1,    xgene1,    8A,  AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
 
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index fade1d4..2fc7f03 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@ 
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f7f88a9..995aafe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -957,6 +957,31 @@  static const struct tune_params xgene1_tunings =
   &xgene1_prefetch_tune
 };
 
+static const struct tune_params emag_tunings =
+{
+  &xgene1_extra_costs,
+  &xgene1_addrcost_table,
+  &xgene1_regmove_cost,
+  &xgene1_vector_cost,
+  &generic_branch_cost,
+  &xgene1_approx_modes,
+  6, /* memmov_cost  */
+  4, /* issue_rate  */
+  AARCH64_FUSE_NOTHING, /* fusible_ops  */
+  "16",	/* function_align.  */
+  "16",	/* jump_align.  */
+  "16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  17,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS),	/* tune_flags.  */
+  &xgene1_prefetch_tune
+};
+
 static const struct tune_params qdf24xx_tunings =
 {
   &qdf24xx_extra_costs,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e016dce..ac81fb2 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -15288,7 +15288,7 @@  Specify the name of the target processor for which GCC should tune the
 performance of the code.  Permissible values for this option are:
 @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
 @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
-@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{falkor},
+@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
 @samp{thunderx}, @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
 @samp{tsv110}, @samp{thunderxt83}, @samp{thunderx2t99},