diff mbox series

[x86_64] : Zhaoxin shijidadao enablement

Message ID 20240527083253.23868-1-MayShao-oc@zhaoxin.com
State New
Headers show
Series [x86_64] : Zhaoxin shijidadao enablement | expand

Commit Message

MayShao May 27, 2024, 8:32 a.m. UTC
From: mayshao <mayshao-oc@zhaoxin.com>

Hi all:
    This patch enables -march/-mtune=shijidadao, costs and tunings are set according to the characteristics of the processor.

    Bootstrapped /regtested X86_64.

    Ok for trunk?
BR
Mayshao
gcc/ChangeLog:

	* common/config/i386/cpuinfo.h (get_zhaoxin_cpu): Recognize shijidadao.
	* common/config/i386/i386-common.cc: Add shijidadao.
	* common/config/i386/i386-cpuinfo.h (enum processor_subtypes):
	Add ZHAOXIN_FAM7H_SHIJIDADAO.
	* config.gcc: Add shijidadao.
	* config/i386/driver-i386.cc (host_detect_local_cpu):
	Let -march=native recognize shijidadao processors.
	* config/i386/i386-c.cc (ix86_target_macros_internal): Add shijidadao.
	* config/i386/i386-options.cc (m_ZHAOXIN): Add m_SHIJIDADAO.
	(m_SHIJIDADAO): New definition.
	* config/i386/i386.h (enum processor_type): Add PROCESSOR_SHIJIDADAO.
	* config/i386/x86-tune-costs.h (struct processor_costs):
	Add shijidadao_cost.
	* config/i386/x86-tune-sched.cc (ix86_issue_rate): Add shijidadao.
	(ix86_adjust_cost): Ditto.
	* config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Add m_SHIJIDADAO.
	(X86_TUNE_USE_GATHER_4PARTS): Ditto.
	(X86_TUNE_USE_GATHER_8PARTS): Ditto.
	(X86_TUNE_AVOID_128FMA_CHAINS): Ditto.
	* doc/extend.texi: Add details about shijidadao.
	* doc/invoke.texi: Ditto.

gcc/testsuite/ChangeLog:

	* g++.target/i386/mv32.C: Handle new -march
	* gcc.target/i386/funcspec-56.inc: Ditto.
---
 gcc/common/config/i386/cpuinfo.h              |   8 +-
 gcc/common/config/i386/i386-common.cc         |   8 +-
 gcc/common/config/i386/i386-cpuinfo.h         |   1 +
 gcc/config.gcc                                |  14 ++-
 gcc/config/i386/driver-i386.cc                |  11 +-
 gcc/config/i386/i386-c.cc                     |   7 ++
 gcc/config/i386/i386-options.cc               |   4 +-
 gcc/config/i386/i386.h                        |   1 +
 gcc/config/i386/x86-tune-costs.h              | 116 ++++++++++++++++++
 gcc/config/i386/x86-tune-sched.cc             |   2 +
 gcc/config/i386/x86-tune.def                  |   8 +-
 gcc/doc/extend.texi                           |   3 +
 gcc/doc/invoke.texi                           |   6 +
 gcc/testsuite/g++.target/i386/mv32.C          |   6 +
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 15 files changed, 183 insertions(+), 14 deletions(-)

Comments

Uros Bizjak May 28, 2024, 6:15 a.m. UTC | #1
On Mon, May 27, 2024 at 10:33 AM MayShao <MayShao-oc@zhaoxin.com> wrote:
>
> From: mayshao <mayshao-oc@zhaoxin.com>
>
> Hi all:
>     This patch enables -march/-mtune=shijidadao, costs and tunings are set according to the characteristics of the processor.
>
>     Bootstrapped /regtested X86_64.
>
>     Ok for trunk?

OK.

Thanks,
Uros.

> BR
> Mayshao
> gcc/ChangeLog:
>
>         * common/config/i386/cpuinfo.h (get_zhaoxin_cpu): Recognize shijidadao.
>         * common/config/i386/i386-common.cc: Add shijidadao.
>         * common/config/i386/i386-cpuinfo.h (enum processor_subtypes):
>         Add ZHAOXIN_FAM7H_SHIJIDADAO.
>         * config.gcc: Add shijidadao.
>         * config/i386/driver-i386.cc (host_detect_local_cpu):
>         Let -march=native recognize shijidadao processors.
>         * config/i386/i386-c.cc (ix86_target_macros_internal): Add shijidadao.
>         * config/i386/i386-options.cc (m_ZHAOXIN): Add m_SHIJIDADAO.
>         (m_SHIJIDADAO): New definition.
>         * config/i386/i386.h (enum processor_type): Add PROCESSOR_SHIJIDADAO.
>         * config/i386/x86-tune-costs.h (struct processor_costs):
>         Add shijidadao_cost.
>         * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add shijidadao.
>         (ix86_adjust_cost): Ditto.
>         * config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Add m_SHIJIDADAO.
>         (X86_TUNE_USE_GATHER_4PARTS): Ditto.
>         (X86_TUNE_USE_GATHER_8PARTS): Ditto.
>         (X86_TUNE_AVOID_128FMA_CHAINS): Ditto.
>         * doc/extend.texi: Add details about shijidadao.
>         * doc/invoke.texi: Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/mv32.C: Handle new -march
>         * gcc.target/i386/funcspec-56.inc: Ditto.
> ---
>  gcc/common/config/i386/cpuinfo.h              |   8 +-
>  gcc/common/config/i386/i386-common.cc         |   8 +-
>  gcc/common/config/i386/i386-cpuinfo.h         |   1 +
>  gcc/config.gcc                                |  14 ++-
>  gcc/config/i386/driver-i386.cc                |  11 +-
>  gcc/config/i386/i386-c.cc                     |   7 ++
>  gcc/config/i386/i386-options.cc               |   4 +-
>  gcc/config/i386/i386.h                        |   1 +
>  gcc/config/i386/x86-tune-costs.h              | 116 ++++++++++++++++++
>  gcc/config/i386/x86-tune-sched.cc             |   2 +
>  gcc/config/i386/x86-tune.def                  |   8 +-
>  gcc/doc/extend.texi                           |   3 +
>  gcc/doc/invoke.texi                           |   6 +
>  gcc/testsuite/g++.target/i386/mv32.C          |   6 +
>  gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
>  15 files changed, 183 insertions(+), 14 deletions(-)
>
> diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
> index 4610bf6d6a4..936039725ab 100644
> --- a/gcc/common/config/i386/cpuinfo.h
> +++ b/gcc/common/config/i386/cpuinfo.h
> @@ -667,12 +667,18 @@ get_zhaoxin_cpu (struct __processor_model *cpu_model,
>           reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C);
>           cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI;
>         }
> -     else if (model >= 0x5b)
> +     else if (model == 0x5b)
>         {
>           cpu = "yongfeng";
>           CHECK___builtin_cpu_is ("yongfeng");
>           cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_YONGFENG;
>         }
> +     else if (model >= 0x6b)
> +       {
> +         cpu = "shijidadao";
> +         CHECK___builtin_cpu_is ("shijidadao");
> +         cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_SHIJIDADAO;
> +       }
>        break;
>      default:
>        break;
> diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
> index 895e5fa662d..eb3f94c529c 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -2066,6 +2066,7 @@ const char *const processor_names[] =
>    "intel",
>    "lujiazui",
>    "yongfeng",
> +  "shijidadao",
>    "geode",
>    "k6",
>    "athlon",
> @@ -2271,10 +2272,13 @@ const pta processor_alias_table[] =
>        | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR, 0, P_NONE},
>    {"lujiazui", PROCESSOR_LUJIAZUI, CPU_LUJIAZUI,
>         PTA_LUJIAZUI,
> -       M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_NONE},
> +       M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_PROC_BMI},
>    {"yongfeng", PROCESSOR_YONGFENG, CPU_YONGFENG,
>         PTA_YONGFENG,
> -       M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_NONE},
> +       M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_PROC_AVX2},
> +  {"shijidadao", PROCESSOR_SHIJIDADAO, CPU_YONGFENG,
> +       PTA_YONGFENG,
> +       M_CPU_SUBTYPE (ZHAOXIN_FAM7H_SHIJIDADAO), P_PROC_AVX2},
>    {"k8", PROCESSOR_K8, CPU_K8,
>      PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
>        | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, 0, P_NONE},
> diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
> index 9edad96d4fd..fa3b76f4931 100644
> --- a/gcc/common/config/i386/i386-cpuinfo.h
> +++ b/gcc/common/config/i386/i386-cpuinfo.h
> @@ -104,6 +104,7 @@ enum processor_subtypes
>    INTEL_COREI7_PANTHERLAKE,
>    ZHAOXIN_FAM7H_YONGFENG,
>    AMDFAM1AH_ZNVER5,
> +  ZHAOXIN_FAM7H_SHIJIDADAO,
>    CPU_SUBTYPE_MAX
>  };
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index a37113bd00a..91a3a3152dc 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -711,9 +711,9 @@ atom slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
>  silvermont skylake-avx512 cannonlake icelake-client icelake-server \
>  skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
>  sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
> -nano-x2 eden-x4 nano-x4 lujiazui yongfeng x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
> -sierraforest graniterapids graniterapids-d grandridge arrowlake arrowlake-s \
> -clearwaterforest pantherlake native"
> +nano-x2 eden-x4 nano-x4 lujiazui yongfeng shijidadao x86-64 x86-64-v2 \
> +x86-64-v3 x86-64-v4 sierraforest graniterapids graniterapids-d grandridge \
> +arrowlake arrowlake-s clearwaterforest pantherlake native"
>
>  # Additional x86 processors supported by --with-cpu=.  Each processor
>  # MUST be separated by exactly one space.
> @@ -3855,6 +3855,10 @@ case ${target} in
>         arch=yongfeng
>         cpu=yongfeng
>         ;;
> +      shijidadao-*)
> +       arch=shijidadao
> +       cpu=shijidadao
> +       ;;
>        pentium2-*)
>         arch=pentium2
>         cpu=pentium2
> @@ -3980,6 +3984,10 @@ case ${target} in
>         arch=yongfeng
>         cpu=yongfeng
>         ;;
> +      shijidadao-*)
> +       arch=shijidadao
> +       cpu=shijidadao
> +       ;;
>        nocona-*)
>         arch=nocona
>         cpu=nocona
> diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
> index 0176d8b6cd2..11470eaea12 100644
> --- a/gcc/config/i386/driver-i386.cc
> +++ b/gcc/config/i386/driver-i386.cc
> @@ -558,10 +558,12 @@ const char *host_detect_local_cpu (int argc, const char **argv)
>        switch (family)
>         {
>         case 7:
> -         if (model == 0x3b)
> -           processor = PROCESSOR_LUJIAZUI;
> -         else if (model >= 0x5b)
> +         if (model >= 0x6b)
> +           processor = PROCESSOR_SHIJIDADAO;
> +         else if (model == 0x5b)
>             processor = PROCESSOR_YONGFENG;
> +         else if (model == 0x3b)
> +           processor = PROCESSOR_LUJIAZUI;
>           break;
>         default:
>           break;
> @@ -853,6 +855,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
>      case PROCESSOR_YONGFENG:
>        cpu = "yongfeng";
>        break;
> +    case PROCESSOR_SHIJIDADAO:
> +      cpu = "shijidadao";
> +      break;
>
>      default:
>        /* Use something reasonable.  */
> diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
> index 7b0ad9e9181..403475d5b6b 100644
> --- a/gcc/config/i386/i386-c.cc
> +++ b/gcc/config/i386/i386-c.cc
> @@ -156,6 +156,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
>        def_or_undef (parse_in, "__yongfeng");
>        def_or_undef (parse_in, "__yongfeng__");
>        break;
> +    case PROCESSOR_SHIJIDADAO:
> +      def_or_undef (parse_in, "__shijidadao");
> +      def_or_undef (parse_in, "__shijidadao__");
> +      break;
>      case PROCESSOR_PENTIUM4:
>        def_or_undef (parse_in, "__pentium4");
>        def_or_undef (parse_in, "__pentium4__");
> @@ -386,6 +390,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
>      case PROCESSOR_YONGFENG:
>        def_or_undef (parse_in, "__tune_yongfeng__");
>         break;
> +    case PROCESSOR_SHIJIDADAO:
> +      def_or_undef (parse_in, "__tune_shijidadao__");
> +       break;
>      case PROCESSOR_PENTIUM4:
>        def_or_undef (parse_in, "__tune_pentium4__");
>        break;
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 78602a17f7e..2af869d0ecf 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -155,7 +155,8 @@ along with GCC; see the file COPYING3.  If not see
>
>  #define m_LUJIAZUI (HOST_WIDE_INT_1U<<PROCESSOR_LUJIAZUI)
>  #define m_YONGFENG (HOST_WIDE_INT_1U<<PROCESSOR_YONGFENG)
> -#define m_ZHAOXIN  (m_LUJIAZUI | m_YONGFENG)
> +#define m_SHIJIDADAO (HOST_WIDE_INT_1U<<PROCESSOR_SHIJIDADAO)
> +#define m_ZHAOXIN  (m_LUJIAZUI | m_YONGFENG | m_SHIJIDADAO)
>
>  #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
>  #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
> @@ -793,6 +794,7 @@ static const struct processor_costs *processor_cost_table[] =
>    &intel_cost,
>    &lujiazui_cost,
>    &yongfeng_cost,
> +  &shijidadao_cost,
>    &geode_cost,
>    &k6_cost,
>    &athlon_cost,
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 359a8408263..d8c225cacdd 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -2301,6 +2301,7 @@ enum processor_type
>    PROCESSOR_INTEL,
>    PROCESSOR_LUJIAZUI,
>    PROCESSOR_YONGFENG,
> +  PROCESSOR_SHIJIDADAO,
>    PROCESSOR_GEODE,
>    PROCESSOR_K6,
>    PROCESSOR_ATHLON,
> diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
> index 65d7d1f7e42..1060b0685a8 100644
> --- a/gcc/config/i386/x86-tune-costs.h
> +++ b/gcc/config/i386/x86-tune-costs.h
> @@ -3644,6 +3644,122 @@ struct processor_costs yongfeng_cost = {
>    2,                                   /* Small unroll factor.  */
>  };
>
> +/* shijidadao_cost should produce code tuned for ZHAOXIN shijidadao CPU.  */
> +static stringop_algs shijidadao_memcpy[2] = {
> +  {libcall, {{8, unrolled_loop, true}, {256, unrolled_loop, false},
> +                        {-1, libcall, false}}},
> +  {libcall, {{10, loop, true}, {256, unrolled_loop, false},
> +                        {-1, libcall, false}}}};
> +static stringop_algs shijidadao_memset[2] = {
> +  {libcall, {{4, loop, true}, {128, unrolled_loop, false},
> +                        {-1, libcall, false}}},
> +  {libcall, {{1, rep_prefix_4_byte, false}, {14, loop, true},
> +                        {1024, vector_loop, false},
> +                        {-1, libcall, false}}}};
> +static const
> +struct processor_costs shijidadao_cost = {
> +  {
> +  /* Start of register allocator costs.  integer->integer move cost is 2.  */
> +  8,                           /* cost for loading QImode using movzbl.  */
> +  {8, 8, 8},                   /* cost of loading integer registers
> +                                          in QImode, HImode and SImode.
> +                                          Relative to reg-reg move (2).  */
> +  {8, 8, 8},                   /* cost of storing integer registers.  */
> +  2,                                   /* cost of reg,reg fld/fst.  */
> +  {8, 8, 8},                   /* cost of loading fp registers
> +                               in SFmode, DFmode and XFmode.  */
> +  {8, 8, 8},                   /* cost of storing fp registers
> +                               in SFmode, DFmode and XFmode.  */
> +  2,                           /* cost of moving MMX register.  */
> +  {8, 8},                      /* cost of loading MMX registers
> +                               in SImode and DImode.  */
> +  {8, 8},                      /* cost of storing MMX registers
> +                               in SImode and DImode.  */
> +  2, 3, 4,                     /* cost of moving XMM,YMM,ZMM register.  */
> +  {8, 8, 8, 10, 15},   /* cost of loading SSE registers
> +                               in 32,64,128,256 and 512-bit.  */
> +  {8, 8, 8, 10, 15},   /* cost of storing SSE registers
> +                               in 32,64,128,256 and 512-bit.  */
> +  8, 8,                                /* SSE->integer and integer->SSE moves.  */
> +  8, 8,                                /* mask->integer and integer->mask moves.  */
> +  {8, 8, 8},           /* cost of loading mask register
> +                               in QImode, HImode, SImode.  */
> +  {8, 8, 8},           /* cost if storing mask register
> +                               in QImode, HImode, SImode.  */
> +  2,                           /* cost of moving mask register.  */
> +  /* End of register allocator costs.  */
> +  },
> +
> +  COSTS_N_INSNS (1),                   /* cost of an add instruction.  */
> +  COSTS_N_INSNS (1),                   /* cost of a lea instruction.  */
> +  COSTS_N_INSNS (1),                   /* variable shift costs.  */
> +  COSTS_N_INSNS (1),                   /* constant shift costs.  */
> +  {COSTS_N_INSNS (2),                  /* cost of starting multiply for QI.  */
> +   COSTS_N_INSNS (3),                  /*                               HI.  */
> +   COSTS_N_INSNS (2),                  /*                               SI.  */
> +   COSTS_N_INSNS (2),                  /*                               DI.  */
> +   COSTS_N_INSNS (3)},         /*                               other.  */
> +  0,                           /* cost of multiply per each bit set.  */
> +  {COSTS_N_INSNS (9),                  /* cost of a divide/mod for QI.  */
> +   COSTS_N_INSNS (10),                 /*                          HI.  */
> +   COSTS_N_INSNS (9),                  /*                          SI.  */
> +   COSTS_N_INSNS (50),                 /*                          DI.  */
> +   COSTS_N_INSNS (50)},                /*                          other.  */
> +  COSTS_N_INSNS (1),                   /* cost of movsx.  */
> +  COSTS_N_INSNS (1),                   /* cost of movzx.  */
> +  8,                                   /* "large" insn.  */
> +  17,                                  /* MOVE_RATIO.  */
> +  6,                                   /* CLEAR_RATIO.  */
> +  {8, 8, 8},                           /* cost of loading integer registers
> +                                          in QImode, HImode and SImode.
> +                                          Relative to reg-reg move (2).  */
> +  {8, 8, 8},                   /* cost of storing integer registers.  */
> +  {8, 8, 8, 12, 15},                   /* cost of loading SSE register
> +                               in 32bit, 64bit, 128bit, 256bit and 512bit.  */
> +  {8, 8, 8, 12, 15},                   /* cost of storing SSE register
> +                               in 32bit, 64bit, 128bit, 256bit and 512bit.  */
> +  {8, 8, 8, 12, 15},                   /* cost of unaligned loads.  */
> +  {8, 8, 8, 12, 15},                   /* cost of unaligned storess.  */
> +  2, 3, 4,                     /* cost of moving XMM,YMM,ZMM register.  */
> +  8,                           /* cost of moving SSE register to integer.  */
> +  18, 6,                               /* Gather load static, per_elt.  */
> +  18, 6,                               /* Gather store static, per_elt.  */
> +  32,                                  /* size of l1 cache.  */
> +  256,                                 /* size of l2 cache.  */
> +  64,                                  /* size of prefetch block.  */
> +  12,                                  /* number of parallel prefetches.  */
> +  3,                                   /* Branch cost.  */
> +  COSTS_N_INSNS (3),                   /* cost of FADD and FSUB insns.  */
> +  COSTS_N_INSNS (3),                   /* cost of FMUL instruction.  */
> +  COSTS_N_INSNS (13),                  /* cost of FDIV instruction.  */
> +  COSTS_N_INSNS (2),                   /* cost of FABS instruction.  */
> +  COSTS_N_INSNS (2),                   /* cost of FCHS instruction.  */
> +  COSTS_N_INSNS (44),                  /* cost of FSQRT instruction.  */
> +
> +  COSTS_N_INSNS (1),                   /* cost of cheap SSE instruction.  */
> +  COSTS_N_INSNS (3),                   /* cost of ADDSS/SD SUBSS/SD insns.  */
> +  COSTS_N_INSNS (3),                   /* cost of MULSS instruction.  */
> +  COSTS_N_INSNS (3),                   /* cost of MULSD instruction.  */
> +  COSTS_N_INSNS (5),                   /* cost of FMA SS instruction.  */
> +  COSTS_N_INSNS (5),                   /* cost of FMA SD instruction.  */
> +  COSTS_N_INSNS (11),                  /* cost of DIVSS instruction.  */
> +  COSTS_N_INSNS (14),                  /* cost of DIVSD instruction.  */
> +  COSTS_N_INSNS (11),                  /* cost of SQRTSS instruction.  */
> +  COSTS_N_INSNS (18),                  /* cost of SQRTSD instruction.  */
> +  4, 4, 4, 4,                          /* reassoc int, fp, vec_int, vec_fp.  */
> +  shijidadao_memcpy,
> +  shijidadao_memset,
> +  COSTS_N_INSNS (3),                   /* cond_taken_branch_cost.  */
> +  COSTS_N_INSNS (1),                   /* cond_not_taken_branch_cost.  */
> +  "16:11:8",                           /* Loop alignment.  */
> +  "16:11:8",                           /* Jump alignment.  */
> +  "0:0:8",                             /* Label alignment.  */
> +  "16",                                /* Func alignment.  */
> +  4,                                   /* Small unroll limit.  */
> +  2,                                   /* Small unroll factor.  */
> +};
> +
> +
>
>  /* Generic should produce code tuned for Core-i7 (and newer chips)
>     and btver1 (and newer chips).  */
> diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
> index f70846e628e..d77298b0e34 100644
> --- a/gcc/config/i386/x86-tune-sched.cc
> +++ b/gcc/config/i386/x86-tune-sched.cc
> @@ -79,6 +79,7 @@ ix86_issue_rate (void)
>      case PROCESSOR_CANNONLAKE:
>      case PROCESSOR_ALDERLAKE:
>      case PROCESSOR_YONGFENG:
> +    case PROCESSOR_SHIJIDADAO:
>      case PROCESSOR_GENERIC:
>        return 4;
>
> @@ -446,6 +447,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
>        break;
>
>      case PROCESSOR_YONGFENG:
> +    case PROCESSOR_SHIJIDADAO:
>        /* Stack engine allows to execute push&pop instructions in parallel.  */
>        if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
>           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index 0fa1484b48d..dbb2543c68f 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -477,7 +477,7 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
>     elements.  */
>  DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
>           ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
> -           | m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
> +           | m_YONGFENG | m_SHIJIDADAO | m_CORE_ATOM | m_GENERIC | m_GDS))
>
>  /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
>     elements.  */
> @@ -488,7 +488,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
>     elements.  */
>  DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
>           ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
> -           | m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
> +           | m_YONGFENG | m_SHIJIDADAO | m_CORE_ATOM | m_GENERIC | m_GDS))
>
>  /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
>     elements.  */
> @@ -499,7 +499,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
>     elements.  */
>  DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
>           ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_CORE_HYBRID | m_CORE_ATOM
> -           | m_YONGFENG | m_GENERIC | m_GDS))
> +           | m_YONGFENG | m_SHIJIDADAO | m_GENERIC | m_GDS))
>
>  /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
>     elements.  */
> @@ -509,7 +509,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
>  /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
>     smaller FMA chain.  */
>  DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4
> -          | m_YONGFENG | m_GENERIC)
> +          | m_YONGFENG | m_SHIJIDADAO | m_GENERIC)
>
>  /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
>     smaller FMA chain.  */
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 8786249fb6f..11bb675830e 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -26153,6 +26153,9 @@ ZHAOXIN lujiazui CPU.
>  @item yongfeng
>  ZHAOXIN yongfeng CPU.
>
> +@item shijidadao
> +ZHAOXIN shijidadao CPU.
> +
>  @item amdfam10h
>  AMD Family 10h CPU.
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 218901c0b20..82bf07a83bb 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -34799,6 +34799,12 @@ SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16,
>  ABM, BMI, BMI2, F16C, FXSR, RDSEED, AVX2, FMA, SHA, LZCNT
>  instruction set support.
>
> +@item shijidadao
> +ZHAOXIN shijidadao CPU with x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1,
> +SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16,
> +ABM, BMI, BMI2, F16C, FXSR, RDSEED, AVX2, FMA, SHA, LZCNT
> +instruction set support.
> +
>  @item geode
>  AMD Geode embedded processor with MMX and 3DNow!@: instruction set support.
>  @end table
> diff --git a/gcc/testsuite/g++.target/i386/mv32.C b/gcc/testsuite/g++.target/i386/mv32.C
> index 6c993218d01..b311c35baa3 100644
> --- a/gcc/testsuite/g++.target/i386/mv32.C
> +++ b/gcc/testsuite/g++.target/i386/mv32.C
> @@ -21,6 +21,10 @@ int __attribute__ ((target("arch=yongfeng"))) foo () {
>    return 2;
>  }
>
> +int __attribute__ ((target("arch=shijidadao"))) foo () {
> +  return 3;
> +}
> +
>  int main ()
>  {
>    int val = foo ();
> @@ -29,6 +33,8 @@ int main ()
>      assert (val == 1);
>    else if (__builtin_cpu_is ("yongfeng"))
>      assert (val == 2);
> +  else if (__builtin_cpu_is ("shijidadao"))
> +    assert (val == 3);
>    else
>      assert (val == 0);
>
> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> index 2a50f5bf67c..c4dc89367ef 100644
> --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
> @@ -208,6 +208,7 @@ extern void test_arch_arrowlake_s (void)    __attribute__((__target__("arch=arrowla
>  extern void test_arch_pantherlake (void)       __attribute__((__target__("arch=pantherlake")));
>  extern void test_arch_lujiazui (void)          __attribute__((__target__("arch=lujiazui")));
>  extern void test_arch_yongfeng (void)          __attribute__((__target__("arch=yongfeng")));
> +extern void test_arch_shijidadao (void)                __attribute__((__target__("arch=shijidadao")));
>  extern void test_arch_k8 (void)                        __attribute__((__target__("arch=k8")));
>  extern void test_arch_k8_sse3 (void)           __attribute__((__target__("arch=k8-sse3")));
>  extern void test_arch_opteron (void)           __attribute__((__target__("arch=opteron")));
> @@ -233,6 +234,7 @@ extern void test_tune_corei7_avx (void)             __attribute__((__target__("tune=corei7-
>  extern void test_tune_core_avx2 (void)         __attribute__((__target__("tune=core-avx2")));
>  extern void test_tune_lujiazui (void)          __attribute__((__target__("tune=lujiazui")));
>  extern void test_tune_yongfeng (void)          __attribute__((__target__("tune=yongfeng")));
> +extern void test_tune_shijidadao (void)                __attribute__((__target__("tune=shijidadao")));
>  extern void test_tune_k8 (void)                        __attribute__((__target__("tune=k8")));
>  extern void test_tune_k8_sse3 (void)           __attribute__((__target__("tune=k8-sse3")));
>  extern void test_tune_opteron (void)           __attribute__((__target__("tune=opteron")));
> --
> 2.27.0
>
diff mbox series

Patch

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index 4610bf6d6a4..936039725ab 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -667,12 +667,18 @@  get_zhaoxin_cpu (struct __processor_model *cpu_model,
 	  reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C);
 	  cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI;
 	}
-     else if (model >= 0x5b)
+     else if (model == 0x5b)
 	{
 	  cpu = "yongfeng";
 	  CHECK___builtin_cpu_is ("yongfeng");
 	  cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_YONGFENG;
 	}
+     else if (model >= 0x6b)
+	{
+	  cpu = "shijidadao";
+	  CHECK___builtin_cpu_is ("shijidadao");
+	  cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_SHIJIDADAO;
+	}
       break;
     default:
       break;
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index 895e5fa662d..eb3f94c529c 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -2066,6 +2066,7 @@  const char *const processor_names[] =
   "intel",
   "lujiazui",
   "yongfeng",
+  "shijidadao",
   "geode",
   "k6",
   "athlon",
@@ -2271,10 +2272,13 @@  const pta processor_alias_table[] =
       | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR, 0, P_NONE},
   {"lujiazui", PROCESSOR_LUJIAZUI, CPU_LUJIAZUI,
 	PTA_LUJIAZUI,
-	M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_NONE},
+	M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_PROC_BMI},
   {"yongfeng", PROCESSOR_YONGFENG, CPU_YONGFENG,
 	PTA_YONGFENG,
-	M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_NONE},
+	M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_PROC_AVX2},
+  {"shijidadao", PROCESSOR_SHIJIDADAO, CPU_YONGFENG,
+	PTA_YONGFENG,
+	M_CPU_SUBTYPE (ZHAOXIN_FAM7H_SHIJIDADAO), P_PROC_AVX2},
   {"k8", PROCESSOR_K8, CPU_K8,
     PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
       | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, 0, P_NONE},
diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h
index 9edad96d4fd..fa3b76f4931 100644
--- a/gcc/common/config/i386/i386-cpuinfo.h
+++ b/gcc/common/config/i386/i386-cpuinfo.h
@@ -104,6 +104,7 @@  enum processor_subtypes
   INTEL_COREI7_PANTHERLAKE,
   ZHAOXIN_FAM7H_YONGFENG,
   AMDFAM1AH_ZNVER5,
+  ZHAOXIN_FAM7H_SHIJIDADAO,
   CPU_SUBTYPE_MAX
 };
 
diff --git a/gcc/config.gcc b/gcc/config.gcc
index a37113bd00a..91a3a3152dc 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -711,9 +711,9 @@  atom slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
 silvermont skylake-avx512 cannonlake icelake-client icelake-server \
 skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
 sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
-nano-x2 eden-x4 nano-x4 lujiazui yongfeng x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
-sierraforest graniterapids graniterapids-d grandridge arrowlake arrowlake-s \
-clearwaterforest pantherlake native"
+nano-x2 eden-x4 nano-x4 lujiazui yongfeng shijidadao x86-64 x86-64-v2 \
+x86-64-v3 x86-64-v4 sierraforest graniterapids graniterapids-d grandridge \
+arrowlake arrowlake-s clearwaterforest pantherlake native"
 
 # Additional x86 processors supported by --with-cpu=.  Each processor
 # MUST be separated by exactly one space.
@@ -3855,6 +3855,10 @@  case ${target} in
 	arch=yongfeng
 	cpu=yongfeng
 	;;
+      shijidadao-*)
+	arch=shijidadao
+	cpu=shijidadao
+	;;
       pentium2-*)
 	arch=pentium2
 	cpu=pentium2
@@ -3980,6 +3984,10 @@  case ${target} in
 	arch=yongfeng
 	cpu=yongfeng
 	;;
+      shijidadao-*)
+	arch=shijidadao
+	cpu=shijidadao
+	;;
       nocona-*)
 	arch=nocona
 	cpu=nocona
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 0176d8b6cd2..11470eaea12 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -558,10 +558,12 @@  const char *host_detect_local_cpu (int argc, const char **argv)
       switch (family)
 	{
 	case 7:
-	  if (model == 0x3b)
-	    processor = PROCESSOR_LUJIAZUI;
-	  else if (model >= 0x5b)
+	  if (model >= 0x6b)
+	    processor = PROCESSOR_SHIJIDADAO;
+	  else if (model == 0x5b)
 	    processor = PROCESSOR_YONGFENG;
+	  else if (model == 0x3b)
+	    processor = PROCESSOR_LUJIAZUI;
 	  break;
 	default:
 	  break;
@@ -853,6 +855,9 @@  const char *host_detect_local_cpu (int argc, const char **argv)
     case PROCESSOR_YONGFENG:
       cpu = "yongfeng";
       break;
+    case PROCESSOR_SHIJIDADAO:
+      cpu = "shijidadao";
+      break;
 
     default:
       /* Use something reasonable.  */
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 7b0ad9e9181..403475d5b6b 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -156,6 +156,10 @@  ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
       def_or_undef (parse_in, "__yongfeng");
       def_or_undef (parse_in, "__yongfeng__");
       break;
+    case PROCESSOR_SHIJIDADAO:
+      def_or_undef (parse_in, "__shijidadao");
+      def_or_undef (parse_in, "__shijidadao__");
+      break;
     case PROCESSOR_PENTIUM4:
       def_or_undef (parse_in, "__pentium4");
       def_or_undef (parse_in, "__pentium4__");
@@ -386,6 +390,9 @@  ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     case PROCESSOR_YONGFENG:
       def_or_undef (parse_in, "__tune_yongfeng__");
        break;
+    case PROCESSOR_SHIJIDADAO:
+      def_or_undef (parse_in, "__tune_shijidadao__");
+       break;
     case PROCESSOR_PENTIUM4:
       def_or_undef (parse_in, "__tune_pentium4__");
       break;
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 78602a17f7e..2af869d0ecf 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -155,7 +155,8 @@  along with GCC; see the file COPYING3.  If not see
 
 #define m_LUJIAZUI (HOST_WIDE_INT_1U<<PROCESSOR_LUJIAZUI)
 #define m_YONGFENG (HOST_WIDE_INT_1U<<PROCESSOR_YONGFENG)
-#define m_ZHAOXIN  (m_LUJIAZUI | m_YONGFENG)
+#define m_SHIJIDADAO (HOST_WIDE_INT_1U<<PROCESSOR_SHIJIDADAO)
+#define m_ZHAOXIN  (m_LUJIAZUI | m_YONGFENG | m_SHIJIDADAO)
 
 #define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
 #define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
@@ -793,6 +794,7 @@  static const struct processor_costs *processor_cost_table[] =
   &intel_cost,
   &lujiazui_cost,
   &yongfeng_cost,
+  &shijidadao_cost,
   &geode_cost,
   &k6_cost,
   &athlon_cost,
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 359a8408263..d8c225cacdd 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2301,6 +2301,7 @@  enum processor_type
   PROCESSOR_INTEL,
   PROCESSOR_LUJIAZUI,
   PROCESSOR_YONGFENG,
+  PROCESSOR_SHIJIDADAO,
   PROCESSOR_GEODE,
   PROCESSOR_K6,
   PROCESSOR_ATHLON,
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 65d7d1f7e42..1060b0685a8 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -3644,6 +3644,122 @@  struct processor_costs yongfeng_cost = {
   2,					/* Small unroll factor.  */
 };
 
+/* shijidadao_cost should produce code tuned for ZHAOXIN shijidadao CPU.  */
+static stringop_algs shijidadao_memcpy[2] = {
+  {libcall, {{8, unrolled_loop, true}, {256, unrolled_loop, false},
+			 {-1, libcall, false}}},
+  {libcall, {{10, loop, true}, {256, unrolled_loop, false},
+			 {-1, libcall, false}}}};
+static stringop_algs shijidadao_memset[2] = {
+  {libcall, {{4, loop, true}, {128, unrolled_loop, false},
+			 {-1, libcall, false}}},
+  {libcall, {{1, rep_prefix_4_byte, false}, {14, loop, true},
+			 {1024, vector_loop, false},
+			 {-1, libcall, false}}}};
+static const
+struct processor_costs shijidadao_cost = {
+  {
+  /* Start of register allocator costs.  integer->integer move cost is 2.  */
+  8,				/* cost for loading QImode using movzbl.  */
+  {8, 8, 8},			/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {8, 8, 8},			/* cost of storing integer registers.  */
+  2,					/* cost of reg,reg fld/fst.  */
+  {8, 8, 8},			/* cost of loading fp registers
+				in SFmode, DFmode and XFmode.  */
+  {8, 8, 8},			/* cost of storing fp registers
+				in SFmode, DFmode and XFmode.  */
+  2,				/* cost of moving MMX register.  */
+  {8, 8},			/* cost of loading MMX registers
+				in SImode and DImode.  */
+  {8, 8},			/* cost of storing MMX registers
+				in SImode and DImode.  */
+  2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
+  {8, 8, 8, 10, 15},	/* cost of loading SSE registers
+				in 32,64,128,256 and 512-bit.  */
+  {8, 8, 8, 10, 15},	/* cost of storing SSE registers
+				in 32,64,128,256 and 512-bit.  */
+  8, 8,				/* SSE->integer and integer->SSE moves.  */
+  8, 8,				/* mask->integer and integer->mask moves.  */
+  {8, 8, 8},		/* cost of loading mask register
+				in QImode, HImode, SImode.  */
+  {8, 8, 8},		/* cost if storing mask register
+				in QImode, HImode, SImode.  */
+  2,				/* cost of moving mask register.  */
+  /* End of register allocator costs.  */
+  },
+
+  COSTS_N_INSNS (1),			/* cost of an add instruction.  */
+  COSTS_N_INSNS (1),		        /* cost of a lea instruction.  */
+  COSTS_N_INSNS (1),			/* variable shift costs.  */
+  COSTS_N_INSNS (1),			/* constant shift costs.  */
+  {COSTS_N_INSNS (2),			/* cost of starting multiply for QI.  */
+   COSTS_N_INSNS (3),			/*				 HI.  */
+   COSTS_N_INSNS (2),			/*				 SI.  */
+   COSTS_N_INSNS (2),			/*				 DI.  */
+   COSTS_N_INSNS (3)},		/*				 other.  */
+  0,				/* cost of multiply per each bit set.  */
+  {COSTS_N_INSNS (9),			/* cost of a divide/mod for QI.  */
+   COSTS_N_INSNS (10),			/*			    HI.  */
+   COSTS_N_INSNS (9),			/*			    SI.  */
+   COSTS_N_INSNS (50),			/*			    DI.  */
+   COSTS_N_INSNS (50)},		/*			    other.  */
+  COSTS_N_INSNS (1),			/* cost of movsx.  */
+  COSTS_N_INSNS (1),			/* cost of movzx.  */
+  8,					/* "large" insn.  */
+  17,					/* MOVE_RATIO.  */
+  6,					/* CLEAR_RATIO.  */
+  {8, 8, 8},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {8, 8, 8},			/* cost of storing integer registers.  */
+  {8, 8, 8, 12, 15},			/* cost of loading SSE register
+				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+  {8, 8, 8, 12, 15},			/* cost of storing SSE register
+				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+  {8, 8, 8, 12, 15},			/* cost of unaligned loads.  */
+  {8, 8, 8, 12, 15},			/* cost of unaligned storess.  */
+  2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
+  8,				/* cost of moving SSE register to integer.  */
+  18, 6,				/* Gather load static, per_elt.  */
+  18, 6,				/* Gather store static, per_elt.  */
+  32,				  	/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block.  */
+  12,					/* number of parallel prefetches.  */
+  3,					/* Branch cost.  */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (13),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
+
+  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
+  COSTS_N_INSNS (3),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (3),			/* cost of MULSS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of MULSD instruction.  */
+  COSTS_N_INSNS (5),			/* cost of FMA SS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of FMA SD instruction.  */
+  COSTS_N_INSNS (11),			/* cost of DIVSS instruction.  */
+  COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
+  COSTS_N_INSNS (11),			/* cost of SQRTSS instruction.  */
+  COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  4, 4, 4, 4,				/* reassoc int, fp, vec_int, vec_fp.  */
+  shijidadao_memcpy,
+  shijidadao_memset,
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
+  "16:11:8",				/* Loop alignment.  */
+  "16:11:8",				/* Jump alignment.  */
+  "0:0:8",				/* Label alignment.  */
+  "16",				/* Func alignment.  */
+  4,					/* Small unroll limit.  */
+  2,					/* Small unroll factor.  */
+};
+
+
 
 /* Generic should produce code tuned for Core-i7 (and newer chips)
    and btver1 (and newer chips).  */
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index f70846e628e..d77298b0e34 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -79,6 +79,7 @@  ix86_issue_rate (void)
     case PROCESSOR_CANNONLAKE:
     case PROCESSOR_ALDERLAKE:
     case PROCESSOR_YONGFENG:
+    case PROCESSOR_SHIJIDADAO:
     case PROCESSOR_GENERIC:
       return 4;
 
@@ -446,6 +447,7 @@  ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
       break;
 
     case PROCESSOR_YONGFENG:
+    case PROCESSOR_SHIJIDADAO:
       /* Stack engine allows to execute push&pop instructions in parallel.  */
       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
 	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 0fa1484b48d..dbb2543c68f 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -477,7 +477,7 @@  DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
 	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
-	    | m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
+	    | m_YONGFENG | m_SHIJIDADAO | m_CORE_ATOM | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
    elements.  */
@@ -488,7 +488,7 @@  DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
 	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
-	    | m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
+	    | m_YONGFENG | m_SHIJIDADAO | m_CORE_ATOM | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
    elements.  */
@@ -499,7 +499,7 @@  DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
 	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_CORE_HYBRID | m_CORE_ATOM
-	    | m_YONGFENG | m_GENERIC | m_GDS))
+	    | m_YONGFENG | m_SHIJIDADAO | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
    elements.  */
@@ -509,7 +509,7 @@  DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
    smaller FMA chain.  */
 DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4
-          | m_YONGFENG | m_GENERIC)
+          | m_YONGFENG | m_SHIJIDADAO | m_GENERIC)
 
 /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
    smaller FMA chain.  */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8786249fb6f..11bb675830e 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -26153,6 +26153,9 @@  ZHAOXIN lujiazui CPU.
 @item yongfeng
 ZHAOXIN yongfeng CPU.
 
+@item shijidadao
+ZHAOXIN shijidadao CPU.
+
 @item amdfam10h
 AMD Family 10h CPU.
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 218901c0b20..82bf07a83bb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -34799,6 +34799,12 @@  SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16,
 ABM, BMI, BMI2, F16C, FXSR, RDSEED, AVX2, FMA, SHA, LZCNT
 instruction set support.
 
+@item shijidadao
+ZHAOXIN shijidadao CPU with x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1,
+SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16,
+ABM, BMI, BMI2, F16C, FXSR, RDSEED, AVX2, FMA, SHA, LZCNT
+instruction set support.
+
 @item geode
 AMD Geode embedded processor with MMX and 3DNow!@: instruction set support.
 @end table
diff --git a/gcc/testsuite/g++.target/i386/mv32.C b/gcc/testsuite/g++.target/i386/mv32.C
index 6c993218d01..b311c35baa3 100644
--- a/gcc/testsuite/g++.target/i386/mv32.C
+++ b/gcc/testsuite/g++.target/i386/mv32.C
@@ -21,6 +21,10 @@  int __attribute__ ((target("arch=yongfeng"))) foo () {
   return 2;
 }
 
+int __attribute__ ((target("arch=shijidadao"))) foo () {
+  return 3;
+}
+
 int main ()
 {
   int val = foo ();
@@ -29,6 +33,8 @@  int main ()
     assert (val == 1);
   else if (__builtin_cpu_is ("yongfeng"))
     assert (val == 2);
+  else if (__builtin_cpu_is ("shijidadao"))
+    assert (val == 3);
   else
     assert (val == 0);
 
diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
index 2a50f5bf67c..c4dc89367ef 100644
--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc
+++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc
@@ -208,6 +208,7 @@  extern void test_arch_arrowlake_s (void)	__attribute__((__target__("arch=arrowla
 extern void test_arch_pantherlake (void)	__attribute__((__target__("arch=pantherlake")));
 extern void test_arch_lujiazui (void)		__attribute__((__target__("arch=lujiazui")));
 extern void test_arch_yongfeng (void)		__attribute__((__target__("arch=yongfeng")));
+extern void test_arch_shijidadao (void)		__attribute__((__target__("arch=shijidadao")));
 extern void test_arch_k8 (void)			__attribute__((__target__("arch=k8")));
 extern void test_arch_k8_sse3 (void)		__attribute__((__target__("arch=k8-sse3")));
 extern void test_arch_opteron (void)		__attribute__((__target__("arch=opteron")));
@@ -233,6 +234,7 @@  extern void test_tune_corei7_avx (void)		__attribute__((__target__("tune=corei7-
 extern void test_tune_core_avx2 (void)		__attribute__((__target__("tune=core-avx2")));
 extern void test_tune_lujiazui (void)		__attribute__((__target__("tune=lujiazui")));
 extern void test_tune_yongfeng (void)		__attribute__((__target__("tune=yongfeng")));
+extern void test_tune_shijidadao (void)		__attribute__((__target__("tune=shijidadao")));
 extern void test_tune_k8 (void)			__attribute__((__target__("tune=k8")));
 extern void test_tune_k8_sse3 (void)		__attribute__((__target__("tune=k8-sse3")));
 extern void test_tune_opteron (void)		__attribute__((__target__("tune=opteron")));