[RFC,aarch64] Add HiSilicon tsv110 CPU support.
diff mbox series

Message ID 1526978418-62290-2-git-send-email-zhangshaokun@hisilicon.com
State New
Headers show
Series
  • [RFC,aarch64] Add HiSilicon tsv110 CPU support.
Related show

Commit Message

Shaokun Zhang May 22, 2018, 8:40 a.m. UTC
This patch adds HiSilicon's an mcpu: tsv110.

---
 gcc/ChangeLog                            |   9 +++
 gcc/config/aarch64/aarch64-cores.def     |   5 ++
 gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++++++++++++++++++++++++++++
 gcc/config/aarch64/aarch64-tune.md       |   2 +-
 gcc/config/aarch64/aarch64.c             |  79 ++++++++++++++++++++++++
 gcc/doc/invoke.texi                      |   2 +-
 6 files changed, 198 insertions(+), 2 deletions(-)

Comments

Kyrill Tkachov May 22, 2018, 10:52 a.m. UTC | #1
Hi Shaokun,

On 22/05/18 09:40, Shaokun Zhang wrote:
> This patch adds HiSilicon's an mcpu: tsv110.
>
> ---
>  gcc/ChangeLog                            |   9 +++
>  gcc/config/aarch64/aarch64-cores.def     |   5 ++
>  gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++++++++++++++++++++++++++++
>  gcc/config/aarch64/aarch64-tune.md       |   2 +-
>  gcc/config/aarch64/aarch64.c             |  79 ++++++++++++++++++++++++
>  gcc/doc/invoke.texi                      |   2 +-
>  6 files changed, 198 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index cec2892..5d44966 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,12 @@
> +2018-05-22  Shaokun Zhang <zhangshaokun@hisilicon.com>
> +            Bo Zhou  <zbo.zhou@hisilicon.com>
> +
> +       * config/aarch64/aarch64-cores.def (tsv110): New CPU.
> +       * config/aarch64/aarch64-tune.md: Regenerated.
> +       * doc/invoke.texi (AArch61 Options/-mtune): Add "tsv110".

typo: AArch64.

> +       * gcc/config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
> +       * gcc/config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra costs.

Please start the path with config/.

> +
>  2018-05-21  Michael Meissner <meissner@linux.ibm.com>
>
>          PR target/85657
> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
> index 33b96ca..db7a412 100644
> --- a/gcc/config/aarch64/aarch64-cores.def
> +++ b/gcc/config/aarch64/aarch64-cores.def
> @@ -91,6 +91,11 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2
>  /* Qualcomm ('Q') cores. */
>  AARCH64_CORE("saphira",     saphira,    falkor,    8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
>
> +/* ARMv8.4-A Architecture Processors.  */
> +
> +/* HiSilicon ('H') cores. */
> +AARCH64_CORE("tsv110",     tsv110,    tsv110,    8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
> +

The third field is the scheduler model to use when optimising.
Since there is no tsv110 scheduling model, using the name "tsv110"
in the third field will generally give pretty poor schedules.
I recommend you specify an scheduling model that most closely matches your core
for the time being. But I don't think it's required and I wouldn't let it hold
up the patch.

You'll need approval from an aarch64 maintainer (cc'ed some for you).

Thanks,
Kyrill

>  /* ARMv8-A big.LITTLE implementations.  */
>
>  AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
> index a455c62..b6890d6 100644
> --- a/gcc/config/aarch64/aarch64-cost-tables.h
> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
> @@ -334,4 +334,107 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
>    }
>  };
>
> +const struct cpu_cost_table tsv110_extra_costs =
> +{
> +  /* ALU */
> +  {
> +    0,                 /* arith.  */
> +    0,                 /* logical.  */
> +    0,                 /* shift.  */
> +    0,                 /* shift_reg.  */
> +    COSTS_N_INSNS (1), /* arith_shift.  */
> +    COSTS_N_INSNS (1), /* arith_shift_reg.  */
> +    COSTS_N_INSNS (1), /* log_shift.  */
> +    COSTS_N_INSNS (1), /* log_shift_reg.  */
> +    0,                 /* extend.  */
> +    COSTS_N_INSNS (1), /* extend_arith.  */
> +    0,                 /* bfi.  */
> +    0,                 /* bfx.  */
> +    0,                 /* clz.  */
> +    0,                /* rev.  */
> +    0,                 /* non_exec.  */
> +    true               /* non_exec_costs_exec.  */
> +  },
> +  {
> +    /* MULT SImode */
> +    {
> +      COSTS_N_INSNS (2),       /* simple.  */
> +      COSTS_N_INSNS (2),       /* flag_setting.  */
> +      COSTS_N_INSNS (2),       /* extend.  */
> +      COSTS_N_INSNS (2),       /* add.  */
> +      COSTS_N_INSNS (2),       /* extend_add.  */
> +      COSTS_N_INSNS (11)       /* idiv.  */
> +    },
> +    /* MULT DImode */
> +    {
> +      COSTS_N_INSNS (3),       /* simple.  */
> +      0,                       /* flag_setting (N/A).  */
> +      COSTS_N_INSNS (3),       /* extend.  */
> +      COSTS_N_INSNS (3),       /* add.  */
> +      COSTS_N_INSNS (3),       /* extend_add.  */
> +      COSTS_N_INSNS (19)       /* idiv.  */
> +    }
> +  },
> +  /* LD/ST */
> +  {
> +    COSTS_N_INSNS (3),         /* load.  */
> +    COSTS_N_INSNS (4),         /* load_sign_extend.  */
> +    COSTS_N_INSNS (3),         /* ldrd.  */
> +    COSTS_N_INSNS (3),         /* ldm_1st.  */
> +    1,                         /* ldm_regs_per_insn_1st. */
> +    2,                         /* ldm_regs_per_insn_subsequent.  */
> +    COSTS_N_INSNS (4),         /* loadf.  */
> +    COSTS_N_INSNS (4),         /* loadd.  */
> +    COSTS_N_INSNS (4),         /* load_unaligned.  */
> +    0,                         /* store.  */
> +    0,                         /* strd.  */
> +    0,                         /* stm_1st.  */
> +    1,                         /* stm_regs_per_insn_1st. */
> +    2,                         /* stm_regs_per_insn_subsequent.  */
> +    0,                         /* storef.  */
> +    0,                         /* stored.  */
> +    COSTS_N_INSNS (1),         /* store_unaligned.  */
> +    COSTS_N_INSNS (4),         /* loadv.  */
> +    COSTS_N_INSNS (4)          /* storev.  */
> +  },
> +  {
> +    /* FP SFmode */
> +    {
> +      COSTS_N_INSNS (10),      /* div.  */
> +      COSTS_N_INSNS (4),       /* mult.  */
> +      COSTS_N_INSNS (4),       /* mult_addsub.  */
> +      COSTS_N_INSNS (4),       /* fma.  */
> +      COSTS_N_INSNS (4),       /* addsub.  */
> +      COSTS_N_INSNS (1),       /* fpconst.  */
> +      COSTS_N_INSNS (1),       /* neg.  */
> +      COSTS_N_INSNS (1),       /* compare.  */
> +      COSTS_N_INSNS (2),       /* widen.  */
> +      COSTS_N_INSNS (2),       /* narrow.  */
> +      COSTS_N_INSNS (2),       /* toint.  */
> +      COSTS_N_INSNS (1),       /* fromint.  */
> +      COSTS_N_INSNS (2)        /* roundint.  */
> +    },
> +    /* FP DFmode */
> +    {
> +      COSTS_N_INSNS (17),      /* div.  */
> +      COSTS_N_INSNS (4),       /* mult.  */
> +      COSTS_N_INSNS (6),       /* mult_addsub.  */
> +      COSTS_N_INSNS (6),       /* fma.  */
> +      COSTS_N_INSNS (3),       /* addsub.  */
> +      COSTS_N_INSNS (1),       /* fpconst.  */
> +      COSTS_N_INSTS (1),       /* neg.  */
> +      COSTS_N_INSTS (1),       /* compare.  */
> +      COSTS_N_INSNS (2),       /* widen.  */
> +      COSTS_N_INSNS (2),       /* narrow.  */
> +      COSTS_N_INSNS (2),       /* toint.  */
> +      COSTS_N_INSNS (1),       /* fromint.  */
> +      COSTS_N_INSNS (2)        /* roundint.  */
> +    }
> +  },
> +  /* Vector */
> +  {
> +    COSTS_N_INSNS (1)  /* alu.  */
> +  }
> +};
> +
>  #endif
> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
> index 7b3a746..a10f2e7 100644
> --- a/gcc/config/aarch64/aarch64-tune.md
> +++ b/gcc/config/aarch64/aarch64-tune.md
> @@ -1,5 +1,5 @@
>  ;; -*- buffer-read-only: t -*-
>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>  (define_attr "tune"
> - "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
> + "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,tsv110,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>          (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 6bf6c05..0788c14 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -266,6 +266,22 @@ static const struct cpu_addrcost_table generic_addrcost_table =
>    0 /* imm_offset  */
>  };
>
> +static const struct cpu_addrcost_table tsv110_addrcost_table =
> +{
> +    {
> +      1, /* hi  */
> +      0, /* si  */
> +      0, /* di  */
> +      1, /* ti  */
> +    },
> +  0, /* pre_modify  */
> +  0, /* post_modify  */
> +  0, /* register_offset  */
> +  1, /* register_sextend  */
> +  1, /* register_zextend  */
> +  0 /* imm_offset  */
> +};
> +
>  static const struct cpu_addrcost_table exynosm1_addrcost_table =
>  {
>      {
> @@ -344,6 +360,16 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost =
>    2 /* FP2FP  */
>  };
>
> +static const struct cpu_regmove_cost tsv110_regmove_cost =
> +{
> +  1, /* GP2GP  */
> +  /* Avoid the use of slow int<->fp moves for spilling by setting
> +     their cost higher than memmov_cost.  */
> +  2, /* GP2FP  */
> +  3, /* FP2GP  */
> +  2  /* FP2FP  */
> +};
> +
>  static const struct cpu_regmove_cost exynosm1_regmove_cost =
>  {
>    1, /* GP2GP  */
> @@ -450,6 +476,25 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
>    1 /* cond_not_taken_branch_cost  */
>  };
>
> +static const struct cpu_vector_cost tsv110_vector_cost =
> +{
> +  1, /* scalar_int_stmt_cost  */
> +  1, /* scalar_fp_stmt_cost  */
> +  5, /* scalar_load_cost  */
> +  1, /* scalar_store_cost  */
> +  2, /* vec_int_stmt_cost  */
> +  2, /* vec_fp_stmt_cost  */
> +  2, /* vec_permute_cost  */
> +  3, /* vec_to_scalar_cost  */
> +  2, /* scalar_to_vec_cost  */
> +  5, /* vec_align_load_cost  */
> +  5, /* vec_unalign_load_cost  */
> +  1, /* vec_unalign_store_cost  */
> +  1, /* vec_store_cost  */
> +  1, /* cond_taken_branch_cost  */
> +  1 /* cond_not_taken_branch_cost  */
> +};
> +
>  static const struct cpu_vector_cost exynosm1_vector_cost =
>  {
>    1, /* scalar_int_stmt_cost  */
> @@ -550,6 +595,15 @@ static const cpu_prefetch_tune generic_prefetch_tune =
>    -1                   /* default_opt_level  */
>  };
>
> +static const cpu_prefetch_tune tsv110_prefetch_tune =
> +{
> +  0,                   /* num_slots  */
> +  64,                  /* l1_cache_size  */
> +  64,                  /* l1_cache_line_size  */
> +  512,                 /* l2_cache_size  */
> +  -1                   /* default_opt_level  */
> +};
> +
>  static const cpu_prefetch_tune exynosm1_prefetch_tune =
>  {
>    0,                   /* num_slots  */
> @@ -751,6 +805,31 @@ static const struct tune_params cortexa73_tunings =
>  };
>
>
> +static const struct tune_params tsv110_tunings =
> +{
> +  &tsv110_extra_costs,
> +  &tsv110_addrcost_table,
> +  &tsv110_regmove_cost,
> +  &tsv110_vector_cost,
> +  &generic_branch_cost,
> +  &generic_approx_modes,
> +  4, /* memmov_cost  */
> +  4, /* issue_rate  */
> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
> +   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
> +  16,  /* function_align.  */
> +  4,   /* jump_align.  */
> +  8,   /* loop_align.  */
> +  2,   /* int_reassoc_width.  */
> +  4,   /* fp_reassoc_width.  */
> +  1,   /* vec_reassoc_width.  */
> +  2,   /* min_div_recip_mul_sf.  */
> +  2,   /* min_div_recip_mul_df.  */
> +  0,   /* max_case_values.  */
> +  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
> +  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
> +  &tsv110_prefetch_tune
> +};
>
>  static const struct tune_params exynosm1_tunings =
>  {
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index beba295..55fcd42 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -14713,7 +14713,7 @@ performance of the code. Permissible values for this option are:
>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>  @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
> -@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
> +@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110},
>  @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>  @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
>  @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
> -- 
> 2.7.4
>
Shaokun Zhang May 23, 2018, 4:54 a.m. UTC | #2
Hi Kyrill,

On 2018/5/22 18:52, Kyrill Tkachov wrote:
> Hi Shaokun,
> 
> On 22/05/18 09:40, Shaokun Zhang wrote:
>> This patch adds HiSilicon's an mcpu: tsv110.
>>
>> ---
>>  gcc/ChangeLog                            |   9 +++
>>  gcc/config/aarch64/aarch64-cores.def     |   5 ++
>>  gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++++++++++++++++++++++++++++
>>  gcc/config/aarch64/aarch64-tune.md       |   2 +-
>>  gcc/config/aarch64/aarch64.c             |  79 ++++++++++++++++++++++++
>>  gcc/doc/invoke.texi                      |   2 +-
>>  6 files changed, 198 insertions(+), 2 deletions(-)
>>
>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>> index cec2892..5d44966 100644
>> --- a/gcc/ChangeLog
>> +++ b/gcc/ChangeLog
>> @@ -1,3 +1,12 @@
>> +2018-05-22  Shaokun Zhang <zhangshaokun@hisilicon.com>
>> +            Bo Zhou  <zbo.zhou@hisilicon.com>
>> +
>> +       * config/aarch64/aarch64-cores.def (tsv110): New CPU.
>> +       * config/aarch64/aarch64-tune.md: Regenerated.
>> +       * doc/invoke.texi (AArch61 Options/-mtune): Add "tsv110".
> 
> typo: AArch64.
> 

Good catch, my mistake.

>> +       * gcc/config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
>> +       * gcc/config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra costs.
> 
> Please start the path with config/.
> 

Sure, Will remove gcc/ next version.

>> +
>>  2018-05-21  Michael Meissner <meissner@linux.ibm.com>
>>
>>          PR target/85657
>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>> index 33b96ca..db7a412 100644
>> --- a/gcc/config/aarch64/aarch64-cores.def
>> +++ b/gcc/config/aarch64/aarch64-cores.def
>> @@ -91,6 +91,11 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2
>>  /* Qualcomm ('Q') cores. */
>>  AARCH64_CORE("saphira",     saphira,    falkor,    8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
>>
>> +/* ARMv8.4-A Architecture Processors.  */
>> +
>> +/* HiSilicon ('H') cores. */
>> +AARCH64_CORE("tsv110",     tsv110,    tsv110,    8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
>> +
> 
> The third field is the scheduler model to use when optimising.
> Since there is no tsv110 scheduling model, using the name "tsv110"
> in the third field will generally give pretty poor schedules.
> I recommend you specify an scheduling model that most closely matches your core
> for the time being. But I don't think it's required and I wouldn't let it hold

I checked it again, cortexa57 is most closely matches tsv110 and thanks your
suggestion.
If i choose cortexa57, can i add the tsv110_tunings which will use tsv110's
pipeline features, like the rest patch as follow or only use generic feature?

> up the patch.
> 
> You'll need approval from an aarch64 maintainer (cc'ed some for you).
> 

Good, thanks for your nice guidance.

Thanks,
Shaokun

> Thanks,
> Kyrill
> 
>>  /* ARMv8-A big.LITTLE implementations.  */
>>
>>  AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
>> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
>> index a455c62..b6890d6 100644
>> --- a/gcc/config/aarch64/aarch64-cost-tables.h
>> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
>> @@ -334,4 +334,107 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
>>    }
>>  };
>>
>> +const struct cpu_cost_table tsv110_extra_costs =
>> +{
>> +  /* ALU */
>> +  {
>> +    0,                 /* arith.  */
>> +    0,                 /* logical.  */
>> +    0,                 /* shift.  */
>> +    0,                 /* shift_reg.  */
>> +    COSTS_N_INSNS (1), /* arith_shift.  */
>> +    COSTS_N_INSNS (1), /* arith_shift_reg.  */
>> +    COSTS_N_INSNS (1), /* log_shift.  */
>> +    COSTS_N_INSNS (1), /* log_shift_reg.  */
>> +    0,                 /* extend.  */
>> +    COSTS_N_INSNS (1), /* extend_arith.  */
>> +    0,                 /* bfi.  */
>> +    0,                 /* bfx.  */
>> +    0,                 /* clz.  */
>> +    0,                /* rev.  */
>> +    0,                 /* non_exec.  */
>> +    true               /* non_exec_costs_exec.  */
>> +  },
>> +  {
>> +    /* MULT SImode */
>> +    {
>> +      COSTS_N_INSNS (2),       /* simple.  */
>> +      COSTS_N_INSNS (2),       /* flag_setting.  */
>> +      COSTS_N_INSNS (2),       /* extend.  */
>> +      COSTS_N_INSNS (2),       /* add.  */
>> +      COSTS_N_INSNS (2),       /* extend_add.  */
>> +      COSTS_N_INSNS (11)       /* idiv.  */
>> +    },
>> +    /* MULT DImode */
>> +    {
>> +      COSTS_N_INSNS (3),       /* simple.  */
>> +      0,                       /* flag_setting (N/A).  */
>> +      COSTS_N_INSNS (3),       /* extend.  */
>> +      COSTS_N_INSNS (3),       /* add.  */
>> +      COSTS_N_INSNS (3),       /* extend_add.  */
>> +      COSTS_N_INSNS (19)       /* idiv.  */
>> +    }
>> +  },
>> +  /* LD/ST */
>> +  {
>> +    COSTS_N_INSNS (3),         /* load.  */
>> +    COSTS_N_INSNS (4),         /* load_sign_extend.  */
>> +    COSTS_N_INSNS (3),         /* ldrd.  */
>> +    COSTS_N_INSNS (3),         /* ldm_1st.  */
>> +    1,                         /* ldm_regs_per_insn_1st. */
>> +    2,                         /* ldm_regs_per_insn_subsequent.  */
>> +    COSTS_N_INSNS (4),         /* loadf.  */
>> +    COSTS_N_INSNS (4),         /* loadd.  */
>> +    COSTS_N_INSNS (4),         /* load_unaligned.  */
>> +    0,                         /* store.  */
>> +    0,                         /* strd.  */
>> +    0,                         /* stm_1st.  */
>> +    1,                         /* stm_regs_per_insn_1st. */
>> +    2,                         /* stm_regs_per_insn_subsequent.  */
>> +    0,                         /* storef.  */
>> +    0,                         /* stored.  */
>> +    COSTS_N_INSNS (1),         /* store_unaligned.  */
>> +    COSTS_N_INSNS (4),         /* loadv.  */
>> +    COSTS_N_INSNS (4)          /* storev.  */
>> +  },
>> +  {
>> +    /* FP SFmode */
>> +    {
>> +      COSTS_N_INSNS (10),      /* div.  */
>> +      COSTS_N_INSNS (4),       /* mult.  */
>> +      COSTS_N_INSNS (4),       /* mult_addsub.  */
>> +      COSTS_N_INSNS (4),       /* fma.  */
>> +      COSTS_N_INSNS (4),       /* addsub.  */
>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>> +      COSTS_N_INSNS (1),       /* neg.  */
>> +      COSTS_N_INSNS (1),       /* compare.  */
>> +      COSTS_N_INSNS (2),       /* widen.  */
>> +      COSTS_N_INSNS (2),       /* narrow.  */
>> +      COSTS_N_INSNS (2),       /* toint.  */
>> +      COSTS_N_INSNS (1),       /* fromint.  */
>> +      COSTS_N_INSNS (2)        /* roundint.  */
>> +    },
>> +    /* FP DFmode */
>> +    {
>> +      COSTS_N_INSNS (17),      /* div.  */
>> +      COSTS_N_INSNS (4),       /* mult.  */
>> +      COSTS_N_INSNS (6),       /* mult_addsub.  */
>> +      COSTS_N_INSNS (6),       /* fma.  */
>> +      COSTS_N_INSNS (3),       /* addsub.  */
>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>> +      COSTS_N_INSTS (1),       /* neg.  */
>> +      COSTS_N_INSTS (1),       /* compare.  */
>> +      COSTS_N_INSNS (2),       /* widen.  */
>> +      COSTS_N_INSNS (2),       /* narrow.  */
>> +      COSTS_N_INSNS (2),       /* toint.  */
>> +      COSTS_N_INSNS (1),       /* fromint.  */
>> +      COSTS_N_INSNS (2)        /* roundint.  */
>> +    }
>> +  },
>> +  /* Vector */
>> +  {
>> +    COSTS_N_INSNS (1)  /* alu.  */
>> +  }
>> +};
>> +
>>  #endif
>> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
>> index 7b3a746..a10f2e7 100644
>> --- a/gcc/config/aarch64/aarch64-tune.md
>> +++ b/gcc/config/aarch64/aarch64-tune.md
>> @@ -1,5 +1,5 @@
>>  ;; -*- buffer-read-only: t -*-
>>  ;; Generated automatically by gentune.sh from aarch64-cores.def
>>  (define_attr "tune"
>> - "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>> + "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,tsv110,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>>          (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index 6bf6c05..0788c14 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -266,6 +266,22 @@ static const struct cpu_addrcost_table generic_addrcost_table =
>>    0 /* imm_offset  */
>>  };
>>
>> +static const struct cpu_addrcost_table tsv110_addrcost_table =
>> +{
>> +    {
>> +      1, /* hi  */
>> +      0, /* si  */
>> +      0, /* di  */
>> +      1, /* ti  */
>> +    },
>> +  0, /* pre_modify  */
>> +  0, /* post_modify  */
>> +  0, /* register_offset  */
>> +  1, /* register_sextend  */
>> +  1, /* register_zextend  */
>> +  0 /* imm_offset  */
>> +};
>> +
>>  static const struct cpu_addrcost_table exynosm1_addrcost_table =
>>  {
>>      {
>> @@ -344,6 +360,16 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost =
>>    2 /* FP2FP  */
>>  };
>>
>> +static const struct cpu_regmove_cost tsv110_regmove_cost =
>> +{
>> +  1, /* GP2GP  */
>> +  /* Avoid the use of slow int<->fp moves for spilling by setting
>> +     their cost higher than memmov_cost.  */
>> +  2, /* GP2FP  */
>> +  3, /* FP2GP  */
>> +  2  /* FP2FP  */
>> +};
>> +
>>  static const struct cpu_regmove_cost exynosm1_regmove_cost =
>>  {
>>    1, /* GP2GP  */
>> @@ -450,6 +476,25 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
>>    1 /* cond_not_taken_branch_cost  */
>>  };
>>
>> +static const struct cpu_vector_cost tsv110_vector_cost =
>> +{
>> +  1, /* scalar_int_stmt_cost  */
>> +  1, /* scalar_fp_stmt_cost  */
>> +  5, /* scalar_load_cost  */
>> +  1, /* scalar_store_cost  */
>> +  2, /* vec_int_stmt_cost  */
>> +  2, /* vec_fp_stmt_cost  */
>> +  2, /* vec_permute_cost  */
>> +  3, /* vec_to_scalar_cost  */
>> +  2, /* scalar_to_vec_cost  */
>> +  5, /* vec_align_load_cost  */
>> +  5, /* vec_unalign_load_cost  */
>> +  1, /* vec_unalign_store_cost  */
>> +  1, /* vec_store_cost  */
>> +  1, /* cond_taken_branch_cost  */
>> +  1 /* cond_not_taken_branch_cost  */
>> +};
>> +
>>  static const struct cpu_vector_cost exynosm1_vector_cost =
>>  {
>>    1, /* scalar_int_stmt_cost  */
>> @@ -550,6 +595,15 @@ static const cpu_prefetch_tune generic_prefetch_tune =
>>    -1                   /* default_opt_level  */
>>  };
>>
>> +static const cpu_prefetch_tune tsv110_prefetch_tune =
>> +{
>> +  0,                   /* num_slots  */
>> +  64,                  /* l1_cache_size  */
>> +  64,                  /* l1_cache_line_size  */
>> +  512,                 /* l2_cache_size  */
>> +  -1                   /* default_opt_level  */
>> +};
>> +
>>  static const cpu_prefetch_tune exynosm1_prefetch_tune =
>>  {
>>    0,                   /* num_slots  */
>> @@ -751,6 +805,31 @@ static const struct tune_params cortexa73_tunings =
>>  };
>>
>>
>> +static const struct tune_params tsv110_tunings =
>> +{
>> +  &tsv110_extra_costs,
>> +  &tsv110_addrcost_table,
>> +  &tsv110_regmove_cost,
>> +  &tsv110_vector_cost,
>> +  &generic_branch_cost,
>> +  &generic_approx_modes,
>> +  4, /* memmov_cost  */
>> +  4, /* issue_rate  */
>> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
>> +   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
>> +  16,  /* function_align.  */
>> +  4,   /* jump_align.  */
>> +  8,   /* loop_align.  */
>> +  2,   /* int_reassoc_width.  */
>> +  4,   /* fp_reassoc_width.  */
>> +  1,   /* vec_reassoc_width.  */
>> +  2,   /* min_div_recip_mul_sf.  */
>> +  2,   /* min_div_recip_mul_df.  */
>> +  0,   /* max_case_values.  */
>> +  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
>> +  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
>> +  &tsv110_prefetch_tune
>> +};
>>
>>  static const struct tune_params exynosm1_tunings =
>>  {
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index beba295..55fcd42 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -14713,7 +14713,7 @@ performance of the code. Permissible values for this option are:
>>  @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>  @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>>  @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
>> -@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
>> +@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110},
>>  @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>  @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
>>  @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
>> -- 
>> 2.7.4
>>
> 
> 
> .
>
Kyrill Tkachov May 23, 2018, 8:08 a.m. UTC | #3
On 23/05/18 05:54, Zhangshaokun wrote:
> Hi Kyrill,
>
> On 2018/5/22 18:52, Kyrill Tkachov wrote:
>> Hi Shaokun,
>>
>> On 22/05/18 09:40, Shaokun Zhang wrote:
>>> This patch adds HiSilicon's an mcpu: tsv110.
>>>
>>> ---
>>>   gcc/ChangeLog                            |   9 +++
>>>   gcc/config/aarch64/aarch64-cores.def     |   5 ++
>>>   gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++++++++++++++++++++++++++++
>>>   gcc/config/aarch64/aarch64-tune.md       |   2 +-
>>>   gcc/config/aarch64/aarch64.c             |  79 ++++++++++++++++++++++++
>>>   gcc/doc/invoke.texi                      |   2 +-
>>>   6 files changed, 198 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>>> index cec2892..5d44966 100644
>>> --- a/gcc/ChangeLog
>>> +++ b/gcc/ChangeLog
>>> @@ -1,3 +1,12 @@
>>> +2018-05-22  Shaokun Zhang <zhangshaokun@hisilicon.com>
>>> +            Bo Zhou  <zbo.zhou@hisilicon.com>
>>> +
>>> +       * config/aarch64/aarch64-cores.def (tsv110): New CPU.
>>> +       * config/aarch64/aarch64-tune.md: Regenerated.
>>> +       * doc/invoke.texi (AArch61 Options/-mtune): Add "tsv110".
>> typo: AArch64.
>>
> Good catch, my mistake.
>
>>> +       * gcc/config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
>>> +       * gcc/config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra costs.
>> Please start the path with config/.
>>
> Sure, Will remove gcc/ next version.
>
>>> +
>>>   2018-05-21  Michael Meissner <meissner@linux.ibm.com>
>>>
>>>           PR target/85657
>>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>>> index 33b96ca..db7a412 100644
>>> --- a/gcc/config/aarch64/aarch64-cores.def
>>> +++ b/gcc/config/aarch64/aarch64-cores.def
>>> @@ -91,6 +91,11 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2
>>>   /* Qualcomm ('Q') cores. */
>>>   AARCH64_CORE("saphira",     saphira,    falkor,    8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
>>>
>>> +/* ARMv8.4-A Architecture Processors.  */
>>> +
>>> +/* HiSilicon ('H') cores. */
>>> +AARCH64_CORE("tsv110",     tsv110,    tsv110,    8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
>>> +
>> The third field is the scheduler model to use when optimising.
>> Since there is no tsv110 scheduling model, using the name "tsv110"
>> in the third field will generally give pretty poor schedules.
>> I recommend you specify an scheduling model that most closely matches your core
>> for the time being. But I don't think it's required and I wouldn't let it hold
> I checked it again, cortexa57 is most closely matches tsv110 and thanks your
> suggestion.
> If i choose cortexa57, can i add the tsv110_tunings which will use tsv110's
> pipeline features, like the rest patch as follow or only use generic feature?

If you use cortexa57 for the scheduling model (the 3rd field) you should still
use tsv110_tunings in the 6th field as this will specify other important parameters
like instruction selection costs, fusion capabilities, alignment requirements etc.

Thanks,
Kyrill

>
>> up the patch.
>>
>> You'll need approval from an aarch64 maintainer (cc'ed some for you).
>>
> Good, thanks for your nice guidance.
>
> Thanks,
> Shaokun
>
>> Thanks,
>> Kyrill
>>
>>>   /* ARMv8-A big.LITTLE implementations.  */
>>>
>>>   AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
>>> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
>>> index a455c62..b6890d6 100644
>>> --- a/gcc/config/aarch64/aarch64-cost-tables.h
>>> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
>>> @@ -334,4 +334,107 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
>>>     }
>>>   };
>>>
>>> +const struct cpu_cost_table tsv110_extra_costs =
>>> +{
>>> +  /* ALU */
>>> +  {
>>> +    0,                 /* arith.  */
>>> +    0,                 /* logical.  */
>>> +    0,                 /* shift.  */
>>> +    0,                 /* shift_reg.  */
>>> +    COSTS_N_INSNS (1), /* arith_shift.  */
>>> +    COSTS_N_INSNS (1), /* arith_shift_reg.  */
>>> +    COSTS_N_INSNS (1), /* log_shift.  */
>>> +    COSTS_N_INSNS (1), /* log_shift_reg.  */
>>> +    0,                 /* extend.  */
>>> +    COSTS_N_INSNS (1), /* extend_arith.  */
>>> +    0,                 /* bfi.  */
>>> +    0,                 /* bfx.  */
>>> +    0,                 /* clz.  */
>>> +    0,                /* rev.  */
>>> +    0,                 /* non_exec.  */
>>> +    true               /* non_exec_costs_exec.  */
>>> +  },
>>> +  {
>>> +    /* MULT SImode */
>>> +    {
>>> +      COSTS_N_INSNS (2),       /* simple.  */
>>> +      COSTS_N_INSNS (2),       /* flag_setting.  */
>>> +      COSTS_N_INSNS (2),       /* extend.  */
>>> +      COSTS_N_INSNS (2),       /* add.  */
>>> +      COSTS_N_INSNS (2),       /* extend_add.  */
>>> +      COSTS_N_INSNS (11)       /* idiv.  */
>>> +    },
>>> +    /* MULT DImode */
>>> +    {
>>> +      COSTS_N_INSNS (3),       /* simple.  */
>>> +      0,                       /* flag_setting (N/A).  */
>>> +      COSTS_N_INSNS (3),       /* extend.  */
>>> +      COSTS_N_INSNS (3),       /* add.  */
>>> +      COSTS_N_INSNS (3),       /* extend_add.  */
>>> +      COSTS_N_INSNS (19)       /* idiv.  */
>>> +    }
>>> +  },
>>> +  /* LD/ST */
>>> +  {
>>> +    COSTS_N_INSNS (3),         /* load.  */
>>> +    COSTS_N_INSNS (4),         /* load_sign_extend.  */
>>> +    COSTS_N_INSNS (3),         /* ldrd.  */
>>> +    COSTS_N_INSNS (3),         /* ldm_1st.  */
>>> +    1,                         /* ldm_regs_per_insn_1st. */
>>> +    2,                         /* ldm_regs_per_insn_subsequent.  */
>>> +    COSTS_N_INSNS (4),         /* loadf.  */
>>> +    COSTS_N_INSNS (4),         /* loadd.  */
>>> +    COSTS_N_INSNS (4),         /* load_unaligned.  */
>>> +    0,                         /* store.  */
>>> +    0,                         /* strd.  */
>>> +    0,                         /* stm_1st.  */
>>> +    1,                         /* stm_regs_per_insn_1st. */
>>> +    2,                         /* stm_regs_per_insn_subsequent.  */
>>> +    0,                         /* storef.  */
>>> +    0,                         /* stored.  */
>>> +    COSTS_N_INSNS (1),         /* store_unaligned.  */
>>> +    COSTS_N_INSNS (4),         /* loadv.  */
>>> +    COSTS_N_INSNS (4)          /* storev.  */
>>> +  },
>>> +  {
>>> +    /* FP SFmode */
>>> +    {
>>> +      COSTS_N_INSNS (10),      /* div.  */
>>> +      COSTS_N_INSNS (4),       /* mult.  */
>>> +      COSTS_N_INSNS (4),       /* mult_addsub.  */
>>> +      COSTS_N_INSNS (4),       /* fma.  */
>>> +      COSTS_N_INSNS (4),       /* addsub.  */
>>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>>> +      COSTS_N_INSNS (1),       /* neg.  */
>>> +      COSTS_N_INSNS (1),       /* compare.  */
>>> +      COSTS_N_INSNS (2),       /* widen.  */
>>> +      COSTS_N_INSNS (2),       /* narrow.  */
>>> +      COSTS_N_INSNS (2),       /* toint.  */
>>> +      COSTS_N_INSNS (1),       /* fromint.  */
>>> +      COSTS_N_INSNS (2)        /* roundint.  */
>>> +    },
>>> +    /* FP DFmode */
>>> +    {
>>> +      COSTS_N_INSNS (17),      /* div.  */
>>> +      COSTS_N_INSNS (4),       /* mult.  */
>>> +      COSTS_N_INSNS (6),       /* mult_addsub.  */
>>> +      COSTS_N_INSNS (6),       /* fma.  */
>>> +      COSTS_N_INSNS (3),       /* addsub.  */
>>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>>> +      COSTS_N_INSTS (1),       /* neg.  */
>>> +      COSTS_N_INSTS (1),       /* compare.  */
>>> +      COSTS_N_INSNS (2),       /* widen.  */
>>> +      COSTS_N_INSNS (2),       /* narrow.  */
>>> +      COSTS_N_INSNS (2),       /* toint.  */
>>> +      COSTS_N_INSNS (1),       /* fromint.  */
>>> +      COSTS_N_INSNS (2)        /* roundint.  */
>>> +    }
>>> +  },
>>> +  /* Vector */
>>> +  {
>>> +    COSTS_N_INSNS (1)  /* alu.  */
>>> +  }
>>> +};
>>> +
>>>   #endif
>>> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
>>> index 7b3a746..a10f2e7 100644
>>> --- a/gcc/config/aarch64/aarch64-tune.md
>>> +++ b/gcc/config/aarch64/aarch64-tune.md
>>> @@ -1,5 +1,5 @@
>>>   ;; -*- buffer-read-only: t -*-
>>>   ;; Generated automatically by gentune.sh from aarch64-cores.def
>>>   (define_attr "tune"
>>> - "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>>> + "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,tsv110,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>>>           (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>>> index 6bf6c05..0788c14 100644
>>> --- a/gcc/config/aarch64/aarch64.c
>>> +++ b/gcc/config/aarch64/aarch64.c
>>> @@ -266,6 +266,22 @@ static const struct cpu_addrcost_table generic_addrcost_table =
>>>     0 /* imm_offset  */
>>>   };
>>>
>>> +static const struct cpu_addrcost_table tsv110_addrcost_table =
>>> +{
>>> +    {
>>> +      1, /* hi  */
>>> +      0, /* si  */
>>> +      0, /* di  */
>>> +      1, /* ti  */
>>> +    },
>>> +  0, /* pre_modify  */
>>> +  0, /* post_modify  */
>>> +  0, /* register_offset  */
>>> +  1, /* register_sextend  */
>>> +  1, /* register_zextend  */
>>> +  0 /* imm_offset  */
>>> +};
>>> +
>>>   static const struct cpu_addrcost_table exynosm1_addrcost_table =
>>>   {
>>>       {
>>> @@ -344,6 +360,16 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost =
>>>     2 /* FP2FP  */
>>>   };
>>>
>>> +static const struct cpu_regmove_cost tsv110_regmove_cost =
>>> +{
>>> +  1, /* GP2GP  */
>>> +  /* Avoid the use of slow int<->fp moves for spilling by setting
>>> +     their cost higher than memmov_cost.  */
>>> +  2, /* GP2FP  */
>>> +  3, /* FP2GP  */
>>> +  2  /* FP2FP  */
>>> +};
>>> +
>>>   static const struct cpu_regmove_cost exynosm1_regmove_cost =
>>>   {
>>>     1, /* GP2GP  */
>>> @@ -450,6 +476,25 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
>>>     1 /* cond_not_taken_branch_cost  */
>>>   };
>>>
>>> +static const struct cpu_vector_cost tsv110_vector_cost =
>>> +{
>>> +  1, /* scalar_int_stmt_cost  */
>>> +  1, /* scalar_fp_stmt_cost  */
>>> +  5, /* scalar_load_cost  */
>>> +  1, /* scalar_store_cost  */
>>> +  2, /* vec_int_stmt_cost  */
>>> +  2, /* vec_fp_stmt_cost  */
>>> +  2, /* vec_permute_cost  */
>>> +  3, /* vec_to_scalar_cost  */
>>> +  2, /* scalar_to_vec_cost  */
>>> +  5, /* vec_align_load_cost  */
>>> +  5, /* vec_unalign_load_cost  */
>>> +  1, /* vec_unalign_store_cost  */
>>> +  1, /* vec_store_cost  */
>>> +  1, /* cond_taken_branch_cost  */
>>> +  1 /* cond_not_taken_branch_cost  */
>>> +};
>>> +
>>>   static const struct cpu_vector_cost exynosm1_vector_cost =
>>>   {
>>>     1, /* scalar_int_stmt_cost  */
>>> @@ -550,6 +595,15 @@ static const cpu_prefetch_tune generic_prefetch_tune =
>>>     -1                   /* default_opt_level  */
>>>   };
>>>
>>> +static const cpu_prefetch_tune tsv110_prefetch_tune =
>>> +{
>>> +  0,                   /* num_slots  */
>>> +  64,                  /* l1_cache_size  */
>>> +  64,                  /* l1_cache_line_size  */
>>> +  512,                 /* l2_cache_size  */
>>> +  -1                   /* default_opt_level  */
>>> +};
>>> +
>>>   static const cpu_prefetch_tune exynosm1_prefetch_tune =
>>>   {
>>>     0,                   /* num_slots  */
>>> @@ -751,6 +805,31 @@ static const struct tune_params cortexa73_tunings =
>>>   };
>>>
>>>
>>> +static const struct tune_params tsv110_tunings =
>>> +{
>>> +  &tsv110_extra_costs,
>>> +  &tsv110_addrcost_table,
>>> +  &tsv110_regmove_cost,
>>> +  &tsv110_vector_cost,
>>> +  &generic_branch_cost,
>>> +  &generic_approx_modes,
>>> +  4, /* memmov_cost  */
>>> +  4, /* issue_rate  */
>>> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
>>> +   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
>>> +  16,  /* function_align.  */
>>> +  4,   /* jump_align.  */
>>> +  8,   /* loop_align.  */
>>> +  2,   /* int_reassoc_width.  */
>>> +  4,   /* fp_reassoc_width.  */
>>> +  1,   /* vec_reassoc_width.  */
>>> +  2,   /* min_div_recip_mul_sf.  */
>>> +  2,   /* min_div_recip_mul_df.  */
>>> +  0,   /* max_case_values.  */
>>> +  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
>>> +  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
>>> +  &tsv110_prefetch_tune
>>> +};
>>>
>>>   static const struct tune_params exynosm1_tunings =
>>>   {
>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>> index beba295..55fcd42 100644
>>> --- a/gcc/doc/invoke.texi
>>> +++ b/gcc/doc/invoke.texi
>>> @@ -14713,7 +14713,7 @@ performance of the code. Permissible values for this option are:
>>>   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>>   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>>>   @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
>>> -@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
>>> +@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110},
>>>   @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>>   @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
>>>   @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
>>> -- 
>>> 2.7.4
>>>
>>
>> .
>>
Shaokun Zhang May 23, 2018, 9:03 a.m. UTC | #4
Hi Kyrill,

On 2018/5/23 16:08, Kyrill Tkachov wrote:
> 
> On 23/05/18 05:54, Zhangshaokun wrote:
>> Hi Kyrill,
>>
>> On 2018/5/22 18:52, Kyrill Tkachov wrote:
>>> Hi Shaokun,
>>>
>>> On 22/05/18 09:40, Shaokun Zhang wrote:
>>>> This patch adds HiSilicon's an mcpu: tsv110.
>>>>
>>>> ---
>>>>   gcc/ChangeLog                            |   9 +++
>>>>   gcc/config/aarch64/aarch64-cores.def     |   5 ++
>>>>   gcc/config/aarch64/aarch64-cost-tables.h | 103 +++++++++++++++++++++++++++++++
>>>>   gcc/config/aarch64/aarch64-tune.md       |   2 +-
>>>>   gcc/config/aarch64/aarch64.c             |  79 ++++++++++++++++++++++++
>>>>   gcc/doc/invoke.texi                      |   2 +-
>>>>   6 files changed, 198 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
>>>> index cec2892..5d44966 100644
>>>> --- a/gcc/ChangeLog
>>>> +++ b/gcc/ChangeLog
>>>> @@ -1,3 +1,12 @@
>>>> +2018-05-22  Shaokun Zhang <zhangshaokun@hisilicon.com>
>>>> +            Bo Zhou  <zbo.zhou@hisilicon.com>
>>>> +
>>>> +       * config/aarch64/aarch64-cores.def (tsv110): New CPU.
>>>> +       * config/aarch64/aarch64-tune.md: Regenerated.
>>>> +       * doc/invoke.texi (AArch61 Options/-mtune): Add "tsv110".
>>> typo: AArch64.
>>>
>> Good catch, my mistake.
>>
>>>> +       * gcc/config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
>>>> +       * gcc/config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra costs.
>>> Please start the path with config/.
>>>
>> Sure, Will remove gcc/ next version.
>>
>>>> +
>>>>   2018-05-21  Michael Meissner <meissner@linux.ibm.com>
>>>>
>>>>           PR target/85657
>>>> diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
>>>> index 33b96ca..db7a412 100644
>>>> --- a/gcc/config/aarch64/aarch64-cores.def
>>>> +++ b/gcc/config/aarch64/aarch64-cores.def
>>>> @@ -91,6 +91,11 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2
>>>>   /* Qualcomm ('Q') cores. */
>>>>   AARCH64_CORE("saphira",     saphira,    falkor,    8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
>>>>
>>>> +/* ARMv8.4-A Architecture Processors.  */
>>>> +
>>>> +/* HiSilicon ('H') cores. */
>>>> +AARCH64_CORE("tsv110",     tsv110,    tsv110,    8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
>>>> +
>>> The third field is the scheduler model to use when optimising.
>>> Since there is no tsv110 scheduling model, using the name "tsv110"
>>> in the third field will generally give pretty poor schedules.
>>> I recommend you specify an scheduling model that most closely matches your core
>>> for the time being. But I don't think it's required and I wouldn't let it hold
>> I checked it again, cortexa57 is most closely matches tsv110 and thanks your
>> suggestion.
>> If i choose cortexa57, can i add the tsv110_tunings which will use tsv110's
>> pipeline features, like the rest patch as follow or only use generic feature?
> 
> If you use cortexa57 for the scheduling model (the 3rd field) you should still
> use tsv110_tunings in the 6th field as this will specify other important parameters
> like instruction selection costs, fusion capabilities, alignment requirements etc.
> 

Thanks your comments, i will wait other maintainers comments and prepare next version.
One more question, any thoughts on my cover letter issue that skips DC CVAU for
HiSilicon tsv110 when sync icache and dcache?

Thanks,
Shaokun

> Thanks,
> Kyrill
> 
>>
>>> up the patch.
>>>
>>> You'll need approval from an aarch64 maintainer (cc'ed some for you).
>>>
>> Good, thanks for your nice guidance.
>>
>> Thanks,
>> Shaokun
>>
>>> Thanks,
>>> Kyrill
>>>
>>>>   /* ARMv8-A big.LITTLE implementations.  */
>>>>
>>>>   AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
>>>> diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
>>>> index a455c62..b6890d6 100644
>>>> --- a/gcc/config/aarch64/aarch64-cost-tables.h
>>>> +++ b/gcc/config/aarch64/aarch64-cost-tables.h
>>>> @@ -334,4 +334,107 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
>>>>     }
>>>>   };
>>>>
>>>> +const struct cpu_cost_table tsv110_extra_costs =
>>>> +{
>>>> +  /* ALU */
>>>> +  {
>>>> +    0,                 /* arith.  */
>>>> +    0,                 /* logical.  */
>>>> +    0,                 /* shift.  */
>>>> +    0,                 /* shift_reg.  */
>>>> +    COSTS_N_INSNS (1), /* arith_shift.  */
>>>> +    COSTS_N_INSNS (1), /* arith_shift_reg.  */
>>>> +    COSTS_N_INSNS (1), /* log_shift.  */
>>>> +    COSTS_N_INSNS (1), /* log_shift_reg.  */
>>>> +    0,                 /* extend.  */
>>>> +    COSTS_N_INSNS (1), /* extend_arith.  */
>>>> +    0,                 /* bfi.  */
>>>> +    0,                 /* bfx.  */
>>>> +    0,                 /* clz.  */
>>>> +    0,                /* rev.  */
>>>> +    0,                 /* non_exec.  */
>>>> +    true               /* non_exec_costs_exec.  */
>>>> +  },
>>>> +  {
>>>> +    /* MULT SImode */
>>>> +    {
>>>> +      COSTS_N_INSNS (2),       /* simple.  */
>>>> +      COSTS_N_INSNS (2),       /* flag_setting.  */
>>>> +      COSTS_N_INSNS (2),       /* extend.  */
>>>> +      COSTS_N_INSNS (2),       /* add.  */
>>>> +      COSTS_N_INSNS (2),       /* extend_add.  */
>>>> +      COSTS_N_INSNS (11)       /* idiv.  */
>>>> +    },
>>>> +    /* MULT DImode */
>>>> +    {
>>>> +      COSTS_N_INSNS (3),       /* simple.  */
>>>> +      0,                       /* flag_setting (N/A).  */
>>>> +      COSTS_N_INSNS (3),       /* extend.  */
>>>> +      COSTS_N_INSNS (3),       /* add.  */
>>>> +      COSTS_N_INSNS (3),       /* extend_add.  */
>>>> +      COSTS_N_INSNS (19)       /* idiv.  */
>>>> +    }
>>>> +  },
>>>> +  /* LD/ST */
>>>> +  {
>>>> +    COSTS_N_INSNS (3),         /* load.  */
>>>> +    COSTS_N_INSNS (4),         /* load_sign_extend.  */
>>>> +    COSTS_N_INSNS (3),         /* ldrd.  */
>>>> +    COSTS_N_INSNS (3),         /* ldm_1st.  */
>>>> +    1,                         /* ldm_regs_per_insn_1st. */
>>>> +    2,                         /* ldm_regs_per_insn_subsequent.  */
>>>> +    COSTS_N_INSNS (4),         /* loadf.  */
>>>> +    COSTS_N_INSNS (4),         /* loadd.  */
>>>> +    COSTS_N_INSNS (4),         /* load_unaligned.  */
>>>> +    0,                         /* store.  */
>>>> +    0,                         /* strd.  */
>>>> +    0,                         /* stm_1st.  */
>>>> +    1,                         /* stm_regs_per_insn_1st. */
>>>> +    2,                         /* stm_regs_per_insn_subsequent.  */
>>>> +    0,                         /* storef.  */
>>>> +    0,                         /* stored.  */
>>>> +    COSTS_N_INSNS (1),         /* store_unaligned.  */
>>>> +    COSTS_N_INSNS (4),         /* loadv.  */
>>>> +    COSTS_N_INSNS (4)          /* storev.  */
>>>> +  },
>>>> +  {
>>>> +    /* FP SFmode */
>>>> +    {
>>>> +      COSTS_N_INSNS (10),      /* div.  */
>>>> +      COSTS_N_INSNS (4),       /* mult.  */
>>>> +      COSTS_N_INSNS (4),       /* mult_addsub.  */
>>>> +      COSTS_N_INSNS (4),       /* fma.  */
>>>> +      COSTS_N_INSNS (4),       /* addsub.  */
>>>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>>>> +      COSTS_N_INSNS (1),       /* neg.  */
>>>> +      COSTS_N_INSNS (1),       /* compare.  */
>>>> +      COSTS_N_INSNS (2),       /* widen.  */
>>>> +      COSTS_N_INSNS (2),       /* narrow.  */
>>>> +      COSTS_N_INSNS (2),       /* toint.  */
>>>> +      COSTS_N_INSNS (1),       /* fromint.  */
>>>> +      COSTS_N_INSNS (2)        /* roundint.  */
>>>> +    },
>>>> +    /* FP DFmode */
>>>> +    {
>>>> +      COSTS_N_INSNS (17),      /* div.  */
>>>> +      COSTS_N_INSNS (4),       /* mult.  */
>>>> +      COSTS_N_INSNS (6),       /* mult_addsub.  */
>>>> +      COSTS_N_INSNS (6),       /* fma.  */
>>>> +      COSTS_N_INSNS (3),       /* addsub.  */
>>>> +      COSTS_N_INSNS (1),       /* fpconst.  */
>>>> +      COSTS_N_INSTS (1),       /* neg.  */
>>>> +      COSTS_N_INSTS (1),       /* compare.  */
>>>> +      COSTS_N_INSNS (2),       /* widen.  */
>>>> +      COSTS_N_INSNS (2),       /* narrow.  */
>>>> +      COSTS_N_INSNS (2),       /* toint.  */
>>>> +      COSTS_N_INSNS (1),       /* fromint.  */
>>>> +      COSTS_N_INSNS (2)        /* roundint.  */
>>>> +    }
>>>> +  },
>>>> +  /* Vector */
>>>> +  {
>>>> +    COSTS_N_INSNS (1)  /* alu.  */
>>>> +  }
>>>> +};
>>>> +
>>>>   #endif
>>>> diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
>>>> index 7b3a746..a10f2e7 100644
>>>> --- a/gcc/config/aarch64/aarch64-tune.md
>>>> +++ b/gcc/config/aarch64/aarch64-tune.md
>>>> @@ -1,5 +1,5 @@
>>>>   ;; -*- buffer-read-only: t -*-
>>>>   ;; Generated automatically by gentune.sh from aarch64-cores.def
>>>>   (define_attr "tune"
>>>> - "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>>>> + "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,tsv110,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
>>>>           (const (symbol_ref "((enum attr_tune) aarch64_tune)")))
>>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>>>> index 6bf6c05..0788c14 100644
>>>> --- a/gcc/config/aarch64/aarch64.c
>>>> +++ b/gcc/config/aarch64/aarch64.c
>>>> @@ -266,6 +266,22 @@ static const struct cpu_addrcost_table generic_addrcost_table =
>>>>     0 /* imm_offset  */
>>>>   };
>>>>
>>>> +static const struct cpu_addrcost_table tsv110_addrcost_table =
>>>> +{
>>>> +    {
>>>> +      1, /* hi  */
>>>> +      0, /* si  */
>>>> +      0, /* di  */
>>>> +      1, /* ti  */
>>>> +    },
>>>> +  0, /* pre_modify  */
>>>> +  0, /* post_modify  */
>>>> +  0, /* register_offset  */
>>>> +  1, /* register_sextend  */
>>>> +  1, /* register_zextend  */
>>>> +  0 /* imm_offset  */
>>>> +};
>>>> +
>>>>   static const struct cpu_addrcost_table exynosm1_addrcost_table =
>>>>   {
>>>>       {
>>>> @@ -344,6 +360,16 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost =
>>>>     2 /* FP2FP  */
>>>>   };
>>>>
>>>> +static const struct cpu_regmove_cost tsv110_regmove_cost =
>>>> +{
>>>> +  1, /* GP2GP  */
>>>> +  /* Avoid the use of slow int<->fp moves for spilling by setting
>>>> +     their cost higher than memmov_cost.  */
>>>> +  2, /* GP2FP  */
>>>> +  3, /* FP2GP  */
>>>> +  2  /* FP2FP  */
>>>> +};
>>>> +
>>>>   static const struct cpu_regmove_cost exynosm1_regmove_cost =
>>>>   {
>>>>     1, /* GP2GP  */
>>>> @@ -450,6 +476,25 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
>>>>     1 /* cond_not_taken_branch_cost  */
>>>>   };
>>>>
>>>> +static const struct cpu_vector_cost tsv110_vector_cost =
>>>> +{
>>>> +  1, /* scalar_int_stmt_cost  */
>>>> +  1, /* scalar_fp_stmt_cost  */
>>>> +  5, /* scalar_load_cost  */
>>>> +  1, /* scalar_store_cost  */
>>>> +  2, /* vec_int_stmt_cost  */
>>>> +  2, /* vec_fp_stmt_cost  */
>>>> +  2, /* vec_permute_cost  */
>>>> +  3, /* vec_to_scalar_cost  */
>>>> +  2, /* scalar_to_vec_cost  */
>>>> +  5, /* vec_align_load_cost  */
>>>> +  5, /* vec_unalign_load_cost  */
>>>> +  1, /* vec_unalign_store_cost  */
>>>> +  1, /* vec_store_cost  */
>>>> +  1, /* cond_taken_branch_cost  */
>>>> +  1 /* cond_not_taken_branch_cost  */
>>>> +};
>>>> +
>>>>   static const struct cpu_vector_cost exynosm1_vector_cost =
>>>>   {
>>>>     1, /* scalar_int_stmt_cost  */
>>>> @@ -550,6 +595,15 @@ static const cpu_prefetch_tune generic_prefetch_tune =
>>>>     -1                   /* default_opt_level  */
>>>>   };
>>>>
>>>> +static const cpu_prefetch_tune tsv110_prefetch_tune =
>>>> +{
>>>> +  0,                   /* num_slots  */
>>>> +  64,                  /* l1_cache_size  */
>>>> +  64,                  /* l1_cache_line_size  */
>>>> +  512,                 /* l2_cache_size  */
>>>> +  -1                   /* default_opt_level  */
>>>> +};
>>>> +
>>>>   static const cpu_prefetch_tune exynosm1_prefetch_tune =
>>>>   {
>>>>     0,                   /* num_slots  */
>>>> @@ -751,6 +805,31 @@ static const struct tune_params cortexa73_tunings =
>>>>   };
>>>>
>>>>
>>>> +static const struct tune_params tsv110_tunings =
>>>> +{
>>>> +  &tsv110_extra_costs,
>>>> +  &tsv110_addrcost_table,
>>>> +  &tsv110_regmove_cost,
>>>> +  &tsv110_vector_cost,
>>>> +  &generic_branch_cost,
>>>> +  &generic_approx_modes,
>>>> +  4, /* memmov_cost  */
>>>> +  4, /* issue_rate  */
>>>> +  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
>>>> +   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
>>>> +  16,  /* function_align.  */
>>>> +  4,   /* jump_align.  */
>>>> +  8,   /* loop_align.  */
>>>> +  2,   /* int_reassoc_width.  */
>>>> +  4,   /* fp_reassoc_width.  */
>>>> +  1,   /* vec_reassoc_width.  */
>>>> +  2,   /* min_div_recip_mul_sf.  */
>>>> +  2,   /* min_div_recip_mul_df.  */
>>>> +  0,   /* max_case_values.  */
>>>> +  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
>>>> +  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
>>>> +  &tsv110_prefetch_tune
>>>> +};
>>>>
>>>>   static const struct tune_params exynosm1_tunings =
>>>>   {
>>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>>> index beba295..55fcd42 100644
>>>> --- a/gcc/doc/invoke.texi
>>>> +++ b/gcc/doc/invoke.texi
>>>> @@ -14713,7 +14713,7 @@ performance of the code. Permissible values for this option are:
>>>>   @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
>>>>   @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
>>>>   @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
>>>> -@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
>>>> +@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110},
>>>>   @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
>>>>   @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
>>>>   @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
>>>> -- 
>>>> 2.7.4
>>>>
>>>
>>> .
>>>
> 
> 
> .
>

Patch
diff mbox series

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cec2892..5d44966 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@ 
+2018-05-22  Shaokun Zhang  <zhangshaokun@hisilicon.com>
+            Bo Zhou  <zbo.zhou@hisilicon.com>
+
+	* config/aarch64/aarch64-cores.def (tsv110): New CPU.
+	* config/aarch64/aarch64-tune.md: Regenerated.
+	* doc/invoke.texi (AArch61 Options/-mtune): Add "tsv110".
+	* gcc/config/aarch64/aarch64.c (tsv110_tunings): New tuning table.
+	* gcc/config/aarch64/aarch64-cost-tables.h: Add "tsv110" extra costs.
+
 2018-05-21  Michael Meissner  <meissner@linux.ibm.com>
 
 	PR target/85657
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 33b96ca..db7a412 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -91,6 +91,11 @@  AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira",     saphira,    falkor,    8_3A,  AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
 
+/* ARMv8.4-A Architecture Processors.  */
+
+/* HiSilicon ('H') cores. */
+AARCH64_CORE("tsv110",     tsv110,    tsv110,    8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
+
 /* ARMv8-A big.LITTLE implementations.  */
 
 AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index a455c62..b6890d6 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -334,4 +334,107 @@  const struct cpu_cost_table thunderx2t99_extra_costs =
   }
 };
 
+const struct cpu_cost_table tsv110_extra_costs =
+{
+  /* ALU */
+  {
+    0,                 /* arith.  */
+    0,                 /* logical.  */
+    0,                 /* shift.  */
+    0,                 /* shift_reg.  */
+    COSTS_N_INSNS (1), /* arith_shift.  */
+    COSTS_N_INSNS (1), /* arith_shift_reg.  */
+    COSTS_N_INSNS (1), /* log_shift.  */
+    COSTS_N_INSNS (1), /* log_shift_reg.  */
+    0,                 /* extend.  */
+    COSTS_N_INSNS (1), /* extend_arith.  */
+    0,                 /* bfi.  */
+    0,                 /* bfx.  */
+    0,                 /* clz.  */
+    0,	               /* rev.  */
+    0,                 /* non_exec.  */
+    true               /* non_exec_costs_exec.  */
+  },
+  {
+    /* MULT SImode */
+    {
+      COSTS_N_INSNS (2),       /* simple.  */
+      COSTS_N_INSNS (2),       /* flag_setting.  */
+      COSTS_N_INSNS (2),       /* extend.  */
+      COSTS_N_INSNS (2),       /* add.  */
+      COSTS_N_INSNS (2),       /* extend_add.  */
+      COSTS_N_INSNS (11)       /* idiv.  */
+    },
+    /* MULT DImode */
+    {
+      COSTS_N_INSNS (3),       /* simple.  */
+      0,                       /* flag_setting (N/A).  */
+      COSTS_N_INSNS (3),       /* extend.  */
+      COSTS_N_INSNS (3),       /* add.  */
+      COSTS_N_INSNS (3),       /* extend_add.  */
+      COSTS_N_INSNS (19)       /* idiv.  */
+    }
+  },
+  /* LD/ST */
+  {
+    COSTS_N_INSNS (3),         /* load.  */
+    COSTS_N_INSNS (4),         /* load_sign_extend.  */
+    COSTS_N_INSNS (3),         /* ldrd.  */
+    COSTS_N_INSNS (3),         /* ldm_1st.  */
+    1,                         /* ldm_regs_per_insn_1st.  */
+    2,                         /* ldm_regs_per_insn_subsequent.  */
+    COSTS_N_INSNS (4),         /* loadf.  */
+    COSTS_N_INSNS (4),         /* loadd.  */
+    COSTS_N_INSNS (4),         /* load_unaligned.  */
+    0,                         /* store.  */
+    0,                         /* strd.  */
+    0,                         /* stm_1st.  */
+    1,                         /* stm_regs_per_insn_1st.  */
+    2,                         /* stm_regs_per_insn_subsequent.  */
+    0,                         /* storef.  */
+    0,                         /* stored.  */
+    COSTS_N_INSNS (1),         /* store_unaligned.  */
+    COSTS_N_INSNS (4),         /* loadv.  */
+    COSTS_N_INSNS (4)          /* storev.  */
+  },
+  {
+    /* FP SFmode */
+    {
+      COSTS_N_INSNS (10),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (4),       /* mult_addsub.  */
+      COSTS_N_INSNS (4),       /* fma.  */
+      COSTS_N_INSNS (4),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSNS (1),       /* neg.  */
+      COSTS_N_INSNS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    },
+    /* FP DFmode */
+    {
+      COSTS_N_INSNS (17),      /* div.  */
+      COSTS_N_INSNS (4),       /* mult.  */
+      COSTS_N_INSNS (6),       /* mult_addsub.  */
+      COSTS_N_INSNS (6),       /* fma.  */
+      COSTS_N_INSNS (3),       /* addsub.  */
+      COSTS_N_INSNS (1),       /* fpconst.  */
+      COSTS_N_INSTS (1),       /* neg.  */
+      COSTS_N_INSTS (1),       /* compare.  */
+      COSTS_N_INSNS (2),       /* widen.  */
+      COSTS_N_INSNS (2),       /* narrow.  */
+      COSTS_N_INSNS (2),       /* toint.  */
+      COSTS_N_INSNS (1),       /* fromint.  */
+      COSTS_N_INSNS (2)        /* roundint.  */
+    }
+  },
+  /* Vector */
+  {
+    COSTS_N_INSNS (1)  /* alu.  */
+  }
+};
+
 #endif
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 7b3a746..a10f2e7 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@ 
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
+	"cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,xgene1,falkor,qdf24xx,exynosm1,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,saphira,tsv110,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6bf6c05..0788c14 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -266,6 +266,22 @@  static const struct cpu_addrcost_table generic_addrcost_table =
   0 /* imm_offset  */
 };
 
+static const struct cpu_addrcost_table tsv110_addrcost_table =
+{
+    {
+      1, /* hi  */
+      0, /* si  */
+      0, /* di  */
+      1, /* ti  */
+    },
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  0, /* register_offset  */
+  1, /* register_sextend  */
+  1, /* register_zextend  */
+  0 /* imm_offset  */
+};
+
 static const struct cpu_addrcost_table exynosm1_addrcost_table =
 {
     {
@@ -344,6 +360,16 @@  static const struct cpu_regmove_cost cortexa53_regmove_cost =
   2 /* FP2FP  */
 };
 
+static const struct cpu_regmove_cost tsv110_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+     their cost higher than memmov_cost.  */
+  2, /* GP2FP  */
+  3, /* FP2GP  */
+  2  /* FP2FP  */
+};
+
 static const struct cpu_regmove_cost exynosm1_regmove_cost =
 {
   1, /* GP2GP  */
@@ -450,6 +476,25 @@  static const struct cpu_vector_cost cortexa57_vector_cost =
   1 /* cond_not_taken_branch_cost  */
 };
 
+static const struct cpu_vector_cost tsv110_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  1, /* scalar_fp_stmt_cost  */
+  5, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  2, /* vec_int_stmt_cost  */
+  2, /* vec_fp_stmt_cost  */
+  2, /* vec_permute_cost  */
+  3, /* vec_to_scalar_cost  */
+  2, /* scalar_to_vec_cost  */
+  5, /* vec_align_load_cost  */
+  5, /* vec_unalign_load_cost  */
+  1, /* vec_unalign_store_cost  */
+  1, /* vec_store_cost  */
+  1, /* cond_taken_branch_cost  */
+  1 /* cond_not_taken_branch_cost  */
+};
+
 static const struct cpu_vector_cost exynosm1_vector_cost =
 {
   1, /* scalar_int_stmt_cost  */
@@ -550,6 +595,15 @@  static const cpu_prefetch_tune generic_prefetch_tune =
   -1			/* default_opt_level  */
 };
 
+static const cpu_prefetch_tune tsv110_prefetch_tune =
+{
+  0,			/* num_slots  */
+  64,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  512,			/* l2_cache_size  */
+  -1			/* default_opt_level  */
+};
+
 static const cpu_prefetch_tune exynosm1_prefetch_tune =
 {
   0,			/* num_slots  */
@@ -751,6 +805,31 @@  static const struct tune_params cortexa73_tunings =
 };
 
 
+static const struct tune_params tsv110_tunings =
+{
+  &tsv110_extra_costs,
+  &tsv110_addrcost_table,
+  &tsv110_regmove_cost,
+  &tsv110_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  4, /* memmov_cost  */
+  4, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
+   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
+  16,	/* function_align.  */
+  4,	/* jump_align.  */
+  8,	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  1,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &tsv110_prefetch_tune
+};
 
 static const struct tune_params exynosm1_tunings =
 {
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index beba295..55fcd42 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14713,7 +14713,7 @@  performance of the code.  Permissible values for this option are:
 @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55},
 @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75},
 @samp{exynos-m1}, @samp{falkor}, @samp{qdf24xx}, @samp{saphira},
-@samp{xgene1}, @samp{vulcan}, @samp{thunderx},
+@samp{xgene1}, @samp{vulcan}, @samp{thunderx}, @samp{tsv110},
 @samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
 @samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
 @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},