Message ID | AANLkTi=CgGQLzBbOkwB4rKZYTcwmKCKLdif942P_0X5F@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Fri, Oct 15, 2010 at 1:45 PM, Quentin Neill <quentin.neill.gnu@gmail.com> wrote: > These patches add support for upcoming bdver2 AMD processors: > BMI (Bit Manipulation Instructions) > TBM (Trailing Bit Manipulation) > FMA3 (three operand FMA) instructions > > The public specifications for BMI and TBM are in progress (they are > today available under NDA). They will appear in one of the AMD64 > Architecture Programmer's Manual Volumes 3-6. I can post the > mnemonics definitions if needed. The FMA3 specification is documented > in http://software.intel.com/en-us/avx/ > > > 2010-10-15 Quentin Neill <quentin.neill.gnu@amd.com> > > gcc/ > * config.gcc (i[34567]86-*-*): Include tbmintrin.h. > (x86_64-*-*): Likewise. > > * config/i386/cpuid.h: Define TBM bit. > > * config/i386/driver-i386.c (host_detect_local_cpu): Define > and set has_tbm. > > * config/i386/i386-c.c (ix86_target_macros_internal): Check > isa_flag for TBM. > > * config/i386/i386.c (OPTION_MASK_ISA_TBM_SET): New. > (OPTION_MASK_ISA_TBM_UNSET): New. > (ix86_handle_option): Handle -mbmi. > (isa_opts): Add -mtbm. > (enum pta_flags): Add PTA_TBM. > (ix86_option_override_internal): Add TBM support. > (ix86_valid_target_attribute_inner_p): Handle -mtbm. > (IX86_BUILTIN_BEXTRI32): New for TBM intrinsic. > (IX86_BUILTIN_BEXTRI64): Likewise. > (IX86_BUILTIN_BLCFILL32): Likewise. > (IX86_BUILTIN_BLCFILL64): Likewise. > (IX86_BUILTIN_BLCI32): Likewise. > (IX86_BUILTIN_BLCI64): Likewise. > (IX86_BUILTIN_BLCIC32): Likewise. > (IX86_BUILTIN_BLCIC64): Likewise. > (IX86_BUILTIN_BLCMSK32): Likewise. > (IX86_BUILTIN_BLCMSK64): Likewise. > (IX86_BUILTIN_BLCS32): Likewise. > (IX86_BUILTIN_BLCS64): Likewise. > (IX86_BUILTIN_BLSFILL32): Likewise. > (IX86_BUILTIN_BLSFILL64): Likewise. > (IX86_BUILTIN_BLSIC32): Likewise. > (IX86_BUILTIN_BLSIC64): Likewise. > (IX86_BUILTIN_T1MSKC32): Likewise. > (IX86_BUILTIN_T1MSKC64): Likewise. > (IX86_BUILTIN_TZMSK32): Likewise. > (IX86_BUILTIN_TZMSK64): Likewise. > (bdesc_args): Add TBM intrinsics. > (ix86_expand_builtin): Add TBM specific cases for > BEXTR immediate operands. > > * config/i386/i386.h (TARGET_TBM): New for TBM. > > * config/i386/i386.md (UNSPEC_BEXTRI): New for TBM. > (UNSPEC_BLCFILL): Likewise. > (UNSPEC_BLCI): Likewise. > (UNSPEC_BLCIC): Likewise. > (UNSPEC_BLCMSK): Likewise. > (UNSPEC_BLCS): Likewise. > (UNSPEC_BLSFILL): Likewise. > (UNSPEC_BLSIC): Likewise. > (UNSPEC_T1MSKC): Likewise. > (UNSPEC_TZMSK): Likewise. > (tbm_bextri<mode>): Likewise. > (tbm_blcfill<mode>): Likewise. > (tbm_blci<mode>): Likewise. > (tbm_blcic<mode>): Likewise. > (tbm_blcmsk<mode>): Likewise. > (tbm_blcs<mode>): Likewise. > (tbm_blsfill<mode>): Likewise. > (tbm_blsic<mode>): Likewise. > (tbm_t1mskc<mode>): Likewise. > (tbm_tzmsk<mode>): Likewise. > (bsr_rex64): Likewise. > > * config/i386/i386.opt: Add -mtbm. > > * config/i386/tbmintrin.h (__bextri_u32): New. > (__blcfill_u32): Likewise. > (__blsfill_u32): Likewise. > (__blcs_u32): Likewise. > (__tzmsk_u32): Likewise. > (__blcic_u32): Likewise. > (__blsic_u32): Likewise. > (__t1mskc_u32): Likewise. > (__blcmsk_u32): Likewise. > (__blci_u32): Likewise. > (__bextri_u64): Likewise. > (__blcfill_u64): Likewise. > (__blsfill_u64): Likewise. > (__blcs_u64): Likewise. > (__tzmsk_u64): Likewise. > (__blcic_u64): Likewise. > (__blsic_u64): Likewise. > (__t1mskc_u64): Likewise. > (__blcmsk_u64): Likewise. > (__blci_u64): Likewise. > > * config/i386/x86intrin.h: Add TBM check and tbmintrin.h. > > * doc/invoke.texi: Document -mtbm. > > * doc/extend.texi: Document TBM built-in functions. > > gcc/testsuite/ > > * g++.dg/other/i386-2.C: Add -mtbm. > > * g++.dg/other/i386-3.C: Likewise. > > * gcc.target/i386/funcspec-5.c: Add tbm and no-tbm targets. > > * gcc.target/i386/funcspec-6.c: Likewise. > > * gcc.target/i386/sse-12.c: Add -mtbm. > > * gcc.target/i386/sse-13.c: Add -mtbm and test immediate > operand intrinsics. > > * gcc.target/i386/sse-14.c: Likewise. > > * gcc.target/i386/sse-22.c: Likewise. > > * gcc.target/i386/sse-23.c: Likewise. > > * gcc.target/i386/tbm-1.c: New file. > > * gcc.target/i386/tbm-2.c: Likewise. > > > diff --git a/gcc/config.gcc b/gcc/config.gcc > index 4034241..f923990 100644 > --- a/gcc/config.gcc > +++ b/gcc/config.gcc > @@ -318,7 +318,7 @@ i[34567]86-*-*) > nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h > immintrin.h x86intrin.h avxintrin.h xopintrin.h > ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h > - abmintrin.h bmiintrin.h" > + abmintrin.h bmiintrin.h tbmintrin.h" > ;; > x86_64-*-*) > cpu_type=i386 > @@ -329,7 +329,7 @@ x86_64-*-*) > nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h > immintrin.h x86intrin.h avxintrin.h xopintrin.h > ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h > - abmintrin.h bmiintrin.h" > + abmintrin.h bmiintrin.h tbmintrin.h" > need_64bit_hwint=yes > ;; > ia64-*-*) > diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h > index 0f1af7f..e9d0fab 100644 > --- a/gcc/config/i386/cpuid.h > +++ b/gcc/config/i386/cpuid.h > @@ -54,6 +54,7 @@ > #define bit_XOP (1 << 11) > #define bit_LWP (1 << 15) > #define bit_FMA4 (1 << 16) > +#define bit_TBM (1 << 21) > > /* %edx */ > #define bit_LM (1 << 29) > diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c > index a7d6808..15d3284 100644 > --- a/gcc/config/i386/driver-i386.c > +++ b/gcc/config/i386/driver-i386.c > @@ -397,7 +397,7 @@ const char *host_detect_local_cpu (int argc, const > char **argv) > unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; > unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; > unsigned int has_fma4 = 0, has_xop = 0; > - unsigned int has_bmi = 0; > + unsigned int has_bmi = 0, has_tbm = 0; > > bool arch; > > @@ -464,6 +464,7 @@ const char *host_detect_local_cpu (int argc, const > char **argv) > has_lwp = ecx & bit_LWP; > has_fma4 = ecx & bit_FMA4; > has_xop = ecx & bit_XOP; > + has_tbm = ecx & bit_TBM; > > has_longmode = edx & bit_LM; > has_3dnowp = edx & bit_3DNOWP; > diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c > index e84347c..666e77e 100644 > --- a/gcc/config/i386/i386-c.c > +++ b/gcc/config/i386/i386-c.c > @@ -246,6 +246,8 @@ ix86_target_macros_internal (int isa_flag, > def_or_undef (parse_in, "__ABM__"); > if (isa_flag & OPTION_MASK_ISA_BMI) > def_or_undef (parse_in, "__BMI__"); > + if (isa_flag & OPTION_MASK_ISA_TBM) > + def_or_undef (parse_in, "__TBM__"); > if (isa_flag & OPTION_MASK_ISA_POPCNT) > def_or_undef (parse_in, "__POPCNT__"); > if (isa_flag & OPTION_MASK_ISA_FSGSBASE) > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index e003ee7..ac0772e 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -2080,6 +2080,7 @@ static int ix86_isa_flags_explicit; > (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) > > #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI > +#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM > > #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT > #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 > @@ -2136,6 +2137,7 @@ static int ix86_isa_flags_explicit; > #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL > #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM > #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI > +#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM > #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT > #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 > #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF > @@ -2446,6 +2448,20 @@ ix86_handle_option (size_t code, const char > *arg ATTRIBUTE_UNUSED, int value) > } > return true; > > + case OPT_mtbm: > + if (value) > + { > + ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET; > + ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET; > + } > + else > + { > + ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET; > + ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET; > + } > + return true; > + > + > case OPT_mpopcnt: > if (value) > { > @@ -2615,6 +2631,7 @@ ix86_target_string (int isa, int flags, const > char *arch, const char *tune, > { "-mmmx", OPTION_MASK_ISA_MMX }, > { "-mabm", OPTION_MASK_ISA_ABM }, > { "-mbmi", OPTION_MASK_ISA_BMI }, > + { "-mtbm", OPTION_MASK_ISA_TBM }, > { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, > { "-mmovbe", OPTION_MASK_ISA_MOVBE }, > { "-mcrc32", OPTION_MASK_ISA_CRC32 }, > @@ -2871,6 +2888,7 @@ ix86_option_override_internal (bool main_args_p) > PTA_RDRND = 1 << 25, > PTA_F16C = 1 << 26, > PTA_BMI = 1 << 27, > + PTA_TBM = 1 << 28, > /* if this reaches 32, need to widen struct pta flags below */ > }; > > @@ -3206,6 +3224,9 @@ ix86_option_override_internal (bool main_args_p) > if (processor_alias_table[i].flags & PTA_BMI > && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) > ix86_isa_flags |= OPTION_MASK_ISA_BMI; > + if (processor_alias_table[i].flags & PTA_TBM > + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) > + ix86_isa_flags |= OPTION_MASK_ISA_TBM; > if (processor_alias_table[i].flags & PTA_CX16 > && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) > ix86_isa_flags |= OPTION_MASK_ISA_CX16; > @@ -3951,6 +3972,7 @@ ix86_valid_target_attribute_inner_p (tree args, > char *p_strings[]) > IX86_ATTR_ISA ("3dnow", OPT_m3dnow), > IX86_ATTR_ISA ("abm", OPT_mabm), > IX86_ATTR_ISA ("bmi", OPT_mbmi), > + IX86_ATTR_ISA ("tbm", OPT_mtbm), > IX86_ATTR_ISA ("aes", OPT_maes), > IX86_ATTR_ISA ("avx", OPT_mavx), > IX86_ATTR_ISA ("mmx", OPT_mmmx), > @@ -22992,6 +23014,28 @@ enum ix86_builtins > IX86_BUILTIN_TZCNT32, > IX86_BUILTIN_TZCNT64, > > + /* TBM instructions. */ > + IX86_BUILTIN_BEXTRI32, > + IX86_BUILTIN_BEXTRI64, > + IX86_BUILTIN_BLCFILL32, > + IX86_BUILTIN_BLCFILL64, > + IX86_BUILTIN_BLCI32, > + IX86_BUILTIN_BLCI64, > + IX86_BUILTIN_BLCIC32, > + IX86_BUILTIN_BLCIC64, > + IX86_BUILTIN_BLCMSK32, > + IX86_BUILTIN_BLCMSK64, > + IX86_BUILTIN_BLCS32, > + IX86_BUILTIN_BLCS64, > + IX86_BUILTIN_BLSFILL32, > + IX86_BUILTIN_BLSFILL64, > + IX86_BUILTIN_BLSIC32, > + IX86_BUILTIN_BLSIC64, > + IX86_BUILTIN_T1MSKC32, > + IX86_BUILTIN_T1MSKC64, > + IX86_BUILTIN_TZMSK32, > + IX86_BUILTIN_TZMSK64, > + > /* FSGSBASE instructions. */ > IX86_BUILTIN_RDFSBASE32, > IX86_BUILTIN_RDFSBASE64, > @@ -23946,6 +23990,28 @@ static const struct builtin_description bdesc_args[] = > { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrsi, > "__builtin_ia32_blsr_u32", IX86_BUILTIN_BLSR32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrdi, > "__builtin_ia32_blsr_u64", IX86_BUILTIN_BLSR64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > > + /* TBM */ > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextrisi, > "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) > UINT_FTYPE_UINT_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextridi, > "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) > UINT64_FTYPE_UINT64_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfillsi, > "__builtin_ia32_blcfill_u32", IX86_BUILTIN_BLCFILL32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfilldi, > "__builtin_ia32_blcfill_u64", IX86_BUILTIN_BLCFILL64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcisi, > "__builtin_ia32_blci_u32", IX86_BUILTIN_BLCI32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcidi, > "__builtin_ia32_blci_u64", IX86_BUILTIN_BLCI64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicsi, > "__builtin_ia32_blcic_u32", IX86_BUILTIN_BLCIC32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicdi, > "__builtin_ia32_blcic_u64", IX86_BUILTIN_BLCIC64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmsksi, > "__builtin_ia32_blcmsk_u32", IX86_BUILTIN_BLCMSK32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmskdi, > "__builtin_ia32_blcmsk_u64", IX86_BUILTIN_BLCMSK64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcssi, > "__builtin_ia32_blcs_u32", IX86_BUILTIN_BLCS32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcsdi, > "__builtin_ia32_blcs_u64", IX86_BUILTIN_BLCS64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfillsi, > "__builtin_ia32_blsfill_u32", IX86_BUILTIN_BLSFILL32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfilldi, > "__builtin_ia32_blsfill_u64", IX86_BUILTIN_BLSFILL64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicsi, > "__builtin_ia32_blsic_u32", IX86_BUILTIN_BLSIC32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicdi, > "__builtin_ia32_blsic_u64", IX86_BUILTIN_BLSIC64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcsi, > "__builtin_ia32_t1mskc_u32", IX86_BUILTIN_T1MSKC32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcdi, > "__builtin_ia32_t1mskc_u64", IX86_BUILTIN_T1MSKC64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmsksi, > "__builtin_ia32_tzmsk_u32", IX86_BUILTIN_TZMSK32, UNKNOWN, (int) > UINT_FTYPE_UINT }, > + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmskdi, > "__builtin_ia32_tzmsk_u64", IX86_BUILTIN_TZMSK64, UNKNOWN, (int) > UINT64_FTYPE_UINT64 }, > + > /* F16C */ > { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, > "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) > V4SF_FTYPE_V8HI }, > { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, > "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, > (int) V8SF_FTYPE_V8HI }, > @@ -26057,6 +26123,25 @@ ix86_expand_builtin (tree exp, rtx target, > rtx subtarget ATTRIBUTE_UNUSED, > emit_insn (gen_lwp_slwpcb (target)); > return target; > > + case IX86_BUILTIN_BEXTRI32: > + case IX86_BUILTIN_BEXTRI64: > + arg0 = CALL_EXPR_ARG (exp, 0); > + arg1 = CALL_EXPR_ARG (exp, 1); > + op0 = expand_normal (arg0); > + op1 = expand_normal (arg1); > + icode = (fcode == IX86_BUILTIN_BEXTRI32 > + ? CODE_FOR_tbm_bextrisi > + : CODE_FOR_tbm_bextridi); > + if (!CONST_INT_P (op1)) > + { > + error ("last argument must be an immediate"); > + enum machine_mode tmode = insn_data[icode].operand[0].mode; > + return gen_reg_rtx(tmode); > + } > + pat = GEN_FCN (icode) (target, op0, op1); > + if (pat) emit_insn (pat); > + return target; > + > default: > break; > } > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 4fba57d..3518bec 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -60,6 +60,7 @@ see the files COPYING3 and COPYING.RUNTIME > respectively. If not, see > #define TARGET_ROUND OPTION_ISA_ROUND > #define TARGET_ABM OPTION_ISA_ABM > #define TARGET_BMI OPTION_ISA_BMI > +#define TARGET_TBM OPTION_ISA_TBM > #define TARGET_POPCNT OPTION_ISA_POPCNT > #define TARGET_SAHF OPTION_ISA_SAHF > #define TARGET_MOVBE OPTION_ISA_MOVBE > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 967886d..ab588e2 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -237,6 +237,18 @@ > UNSPEC_BLSMSK > UNSPEC_BLSR > UNSPEC_TZCNT > + > + ;; For TBM support > + UNSPEC_BEXTRI > + UNSPEC_BLCFILL > + UNSPEC_BLCI > + UNSPEC_BLCIC > + UNSPEC_BLCMSK > + UNSPEC_BLCS > + UNSPEC_BLSFILL > + UNSPEC_BLSIC > + UNSPEC_T1MSKC > + UNSPEC_TZMSK > ]) > > (define_c_enum "unspecv" [ > @@ -11916,6 +11928,99 @@ > [(set_attr "type" "bitmanip") > (set_attr "mode" "<MODE>")]) > > +;; TBM instructions. > +(define_insn "tbm_bextri<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm") > + (match_operand:SI 2 "const_0_to_31_operand" "n")] > + UNSPEC_BEXTRI))] > + "TARGET_TBM" > + "bextr\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_blcfill<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_BLCFILL))] > + "TARGET_TBM" > + "blcfill\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_blci<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_BLCI))] > + "TARGET_TBM" > + "blci\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_blcic<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_BLCIC))] > + "TARGET_TBM" > + "blcic\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_blcmsk<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_BLCMSK))] > + "TARGET_TBM" > + "blcmsk\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_blcs<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_BLCS))] > + "TARGET_TBM" > + "blcs\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_blsfill<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_BLSFILL))] > + "TARGET_TBM" > + "blsfill\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_blsic<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_BLSIC))] > + "TARGET_TBM" > + "blsic\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_t1mskc<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_T1MSKC))] > + "TARGET_TBM" > + "t1mskc\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "tbm_tzmsk<mode>" > + [(set (match_operand:SWI48 0 "register_operand" "=r") > + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] > + UNSPEC_TZMSK))] > + "TARGET_TBM" > + "tzmsk\t{%1, %0|%0, %1}" > + [(set_attr "type" "bitmanip") > + (set_attr "mode" "<MODE>")]) > + > + > (define_insn "bsr_rex64" > [(set (match_operand:DI 0 "register_operand" "=r") > (minus:DI (const_int 63) > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index d808804..33014f4 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -357,6 +357,10 @@ mbmi > Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save > Support BMI built-in functions and code generation > > +mtbm > +Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save > +Support TBM built-in functions and code generation > + > mcx16 > Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save > Support code generation of cmpxchg16b instruction. > diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h > new file mode 100644 > index 0000000..7a623ef > --- /dev/null > +++ b/gcc/config/i386/tbmintrin.h > @@ -0,0 +1,171 @@ > +/* Copyright (C) 2010 Free Software Foundation, Inc. > + > + This file is part of GCC. > + > + GCC is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; either version 3, or (at your option) > + any later version. > + > + GCC is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + Under Section 7 of GPL version 3, you are granted additional > + permissions described in the GCC Runtime Library Exception, version > + 3.1, as published by the Free Software Foundation. > + > + You should have received a copy of the GNU General Public License and > + a copy of the GCC Runtime Library Exception along with this program; > + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > + <http://www.gnu.org/licenses/>. */ > + > +#ifndef _X86INTRIN_H_INCLUDED > +# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead." > +#endif > + > +#ifndef __TBM__ > +# error "TBM instruction set not enabled" > +#endif /* __TBM__ */ > + > +#ifndef _TBMINTRIN_H_INCLUDED > +#define _TBMINTRIN_H_INCLUDED > + > +#ifdef __OPTIMIZE__ > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__bextri_u32 (unsigned int __X, const unsigned int __I) > +{ > + return __builtin_ia32_bextri_u32 (__X, __I); > +} > +#else > +#define __bextri_u32 (X, I) \ > + ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \ > + (unsigned int)(I)) > +#endif /*__OPTIMIZE__ */ > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcfill_u32 (unsigned int __X) > +{ > + return __builtin_ia32_blcfill_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blsfill_u32 (unsigned int __X) > +{ > + return __builtin_ia32_blsfill_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcs_u32 (unsigned int __X) > +{ > + return __builtin_ia32_blcs_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__tzmsk_u32 (unsigned int __X) > +{ > + return __builtin_ia32_tzmsk_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcic_u32 (unsigned int __X) > +{ > + return __builtin_ia32_blcic_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blsic_u32 (unsigned int __X) > +{ > + return __builtin_ia32_blsic_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__t1mskc_u32 (unsigned int __X) > +{ > + return __builtin_ia32_t1mskc_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcmsk_u32 (unsigned int __X) > +{ > + return __builtin_ia32_blcmsk_u32 (__X); > +} > + > +extern __inline unsigned int __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blci_u32 (unsigned int __X) > +{ > + return __builtin_ia32_blci_u32 (__X); > +} > + > + > +#ifdef __x86_64__ > +#ifdef __OPTIMIZE__ > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__bextri_u64 (unsigned long long __X, const unsigned int __Y) > +{ > + return __builtin_ia32_bextri_u64 (__X, __Y); > +} > +#else > +#define __bextri_u64 (X, I) > \ > + ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long > long)(X), \ > + (unsigned > long long)(I)) > +#endif /*__OPTIMIZE__ */ > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcfill_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_blcfill_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blsfill_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_blsfill_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcs_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_blcs_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__tzmsk_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_tzmsk_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcic_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_blcic_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blsic_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_blsic_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__t1mskc_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_t1mskc_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blcmsk_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_blcmsk_u64 (__X); > +} > + > +extern __inline unsigned long long __attribute__((__gnu_inline__, > __always_inline__, __artificial__)) > +__blci_u64 (unsigned long long __X) > +{ > + return __builtin_ia32_blci_u64 (__X); > +} > +#endif /* __x86_64__ */ > + > +#endif /* _TBMINTRIN_H_INCLUDED */ > + > diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h > index 9a7366b..07074ae 100644 > --- a/gcc/config/i386/x86intrin.h > +++ b/gcc/config/i386/x86intrin.h > @@ -85,6 +85,10 @@ > #include <bmiintrin.h> > #endif > > +#ifdef __BMI__ > +#include <tbmintrin.h> > +#endif > + > #ifdef __POPCNT__ > #include <popcntintrin.h> > #endif > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index a7be54b..e6ba951 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -9370,6 +9370,31 @@ unsigned int __builtin_ia32_blsi_u32 (unsigned int); > unsigned long long __builtin_ia32_blsi_u64 (unsigned long long); > @end smallexample > > +The following built-in function is available when @option{-mtbm} is used. > +All of them generate the machine instruction that is part of the name. > +@smallexample > +unsigned int __builtin_ia32_bextri_u32 (unsigned int, unsigned int); > +unsigned long long __builtin_ia32_bextri_u64 (unsigned long long, > const unsigned long long); > +unsigned int __builtin_ia32_blcfill_u32 (unsigned int); > +unsigned long long __builtin_ia32_blcfill_u64 (unsigned long long); > +unsigned int __builtin_ia32_blsfill_u32 (unsigned int); > +unsigned long long __builtin_ia32_blsfill_u64 (unsigned long long); > +unsigned int __builtin_ia32_blcs_u32 (unsigned int); > +unsigned long long __builtin_ia32_blcs_u64 (unsigned long long); > +unsigned int __builtin_ia32_tzmsk_u32 (unsigned int); > +unsigned long long __builtin_ia32_tzmsk_u64 (unsigned long long); > +unsigned int __builtin_ia32_blcic_u32 (unsigned int); > +unsigned long long __builtin_ia32_blcic_u64 (unsigned long long); > +unsigned int __builtin_ia32_blsic_u32 (unsigned int); > +unsigned long long __builtin_ia32_blsic_u64 (unsigned long long); > +unsigned int __builtin_ia32_t1mskc_u32 (unsigned int); > +unsigned long long __builtin_ia32_t1mskc_u64 (unsigned long long); > +unsigned int __builtin_ia32_blcmsk_u32 (unsigned int); > +unsigned long long __builtin_ia32_blcmsk_u64 (unsigned long long); > +unsigned int __builtin_ia32_blci_u32 (unsigned int); > +unsigned long long __builtin_ia32_blci_u64 (unsigned long long); > +@end smallexample > + > The following built-in functions are available when @option{-m3dnow} is used. > All of them generate the machine instruction that is part of the name. > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 071ad27..d4eaea6 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -597,7 +597,7 @@ Objective-C and Objective-C++ Dialects}. > -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol > -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol > -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol > --msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol > +-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlwp @gol > -mthreads -mno-align-stringops -minline-all-stringops @gol > -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol > -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol > @@ -12420,6 +12420,8 @@ preferred alignment to > @option{-mpreferred-stack-boundary=2}. > @itemx -mno-abm > @itemx -mbmi > @itemx -mno-bmi > +@itemx -mtbm > +@itemx -mno-tbm > @opindex mmmx > @opindex mno-mmx > @opindex msse > diff --git a/gcc/testsuite/g++.dg/other/i386-2.C > b/gcc/testsuite/g++.dg/other/i386-2.C > index f0a382a..6f7ca84 100644 > --- a/gcc/testsuite/g++.dg/other/i386-2.C > +++ b/gcc/testsuite/g++.dg/other/i386-2.C > @@ -1,9 +1,9 @@ > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ > -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop > -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } > */ > +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop > -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd > -mf16c" } */ > > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h, > - bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with > - -O -pedantic-errors. */ > + bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h > + are usable with -O -pedantic-errors. */ > > #include <x86intrin.h> > > diff --git a/gcc/testsuite/g++.dg/other/i386-3.C > b/gcc/testsuite/g++.dg/other/i386-3.C > index 4b27372..fe2a097 100644 > --- a/gcc/testsuite/g++.dg/other/i386-3.C > +++ b/gcc/testsuite/g++.dg/other/i386-3.C > @@ -1,8 +1,8 @@ > /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ > -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx > -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd > -mf16c" } */ > +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx > -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase > -mrdrnd -mf16c" } */ > > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h, > - bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with > + bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h > -O -fkeep-inline-functions. */ > > #include <x86intrin.h> > diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c > b/gcc/testsuite/gcc.target/i386/funcspec-5.c > index 5e07d85..1e18dcf 100644 > --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c > +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c > @@ -19,6 +19,7 @@ extern void test_sse4_2 > (void) __attribute__((__target__("sse4.2"))); > extern void test_sse4a (void) __attribute__((__target__("sse4a"))); > extern void test_fma4 (void) __attribute__((__target__("fma4"))); > extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); > +extern void test_tbm (void) __attribute__((__target__("tbm"))); > > extern void test_no_abm (void) __attribute__((__target__("no-abm"))); > extern void test_no_aes (void) __attribute__((__target__("no-aes"))); > @@ -36,6 +37,7 @@ extern void test_no_sse4_2 > (void) __attribute__((__target__("no-sse4.2"))); > extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a"))); > extern void test_no_fma4 (void) __attribute__((__target__("no-fma4"))); > extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3"))); > +extern void test_no_tbm (void) __attribute__((__target__("no-tbm"))); > > extern void test_arch_i386 (void) __attribute__((__target__("arch=i386"))); > extern void test_arch_i486 (void) __attribute__((__target__("arch=i486"))); > diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c > b/gcc/testsuite/gcc.target/i386/funcspec-6.c > index 81c831c..92a3cb5 100644 > --- a/gcc/testsuite/gcc.target/i386/funcspec-6.c > +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c > @@ -19,6 +19,7 @@ extern void test_sse4_2 > (void) __attribute__((__target__("sse4.2"))); > extern void test_sse4a (void) __attribute__((__target__("sse4a"))); > extern void test_fma4 (void) __attribute__((__target__("fma4"))); > extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); > +extern void test_tbm (void) __attribute__((__target__("tbm"))); > > extern void test_no_abm (void) __attribute__((__target__("no-abm"))); > extern void test_no_aes (void) __attribute__((__target__("no-aes"))); > @@ -36,6 +37,7 @@ extern void test_no_sse4_2 > (void) __attribute__((__target__("no-sse4.2"))); > extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a"))); > extern void test_no_fma4 (void) __attribute__((__target__("no-fma4"))); > extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3"))); > +extern void test_no_tbm (void) __attribute__((__target__("no-tbm"))); > > extern void test_arch_nocona > (void) __attribute__((__target__("arch=nocona"))); > extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); > diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c > b/gcc/testsuite/gcc.target/i386/sse-12.c > index d59777b..eee7b29 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-12.c > +++ b/gcc/testsuite/gcc.target/i386/sse-12.c > @@ -1,9 +1,9 @@ > /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h, > - fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h, > + fma4intrin.h, abmintrin.h, bmiintrin.h, tbmiintrin.h, lwpintrin.h, > popcntintrin.h and mm_malloc.h are usable > with -O -std=c89 -pedantic-errors. */ > /* { dg-do compile } */ > -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx > -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase > -mrdrnd -mf16c" } */ > +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx > -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp > -mfsgsbase -mrdrnd -mf16c" } */ > > #include <x86intrin.h> > > diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c > b/gcc/testsuite/gcc.target/i386/sse-13.c > index 01809d0..3fb7eff 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-13.c > +++ b/gcc/testsuite/gcc.target/i386/sse-13.c > @@ -1,13 +1,13 @@ > /* { dg-do compile } */ > -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 > -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase > -mrdrnd -mf16c" } */ > +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 > -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp > -mfsgsbase -mrdrnd -mf16c" } */ > > #include <mm_malloc.h> > > /* Test that the intrinsics compile with optimization. All of them > are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, > - xopintrin.h, abmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h > - that reference the proper builtin functions. Defining away > - "extern" and "__inline" results in all of them being compiled as > + xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h > + and mm3dnow.h that reference the proper builtin functions. Defining > + away "extern" and "__inline" results in all of them being compiled as > proper functions. */ > > #define extern > @@ -141,4 +141,8 @@ > #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1) > #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1) > > +/* tbmintrin.h */ > +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextri_u32 (X, 1) > +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextri_u64 (X, 1) > + > #include <x86intrin.h> > diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c > b/gcc/testsuite/gcc.target/i386/sse-14.c > index d256e68..41bde1b 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-14.c > +++ b/gcc/testsuite/gcc.target/i386/sse-14.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 > -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mlwp > -mfsgsbase -mrdrnd -mf16c" } */ > +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 > -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp > -mfsgsbase -mrdrnd -mf16c" } */ > > #include <mm_malloc.h> > > @@ -177,3 +177,9 @@ test_2 ( __lwpins32, unsigned char, unsigned int, > unsigned int, 1) > test_2 ( __lwpval64, void, unsigned long long, unsigned int, 1) > test_2 ( __lwpins64, unsigned char, unsigned long long, unsigned int, 1) > #endif > + > +/* tbmintrin.h */ > +test_1 ( __bextri_u32, unsigned int, unsigned int, 1) > +#ifdef __x86_64__ > +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1) > +#endif > diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c > b/gcc/testsuite/gcc.target/i386/sse-22.c > index bb0472d..e940ef8 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-22.c > +++ b/gcc/testsuite/gcc.target/i386/sse-22.c > @@ -6,10 +6,10 @@ > > /* Test that the intrinsics compile without optimization. All of them > are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, > - xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that > - reference the proper builtin functions. Defining away "extern" and > - "__inline" results in all of them being compiled as proper > - functions. */ > + xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h > + and mm3dnow.h that reference the proper builtin functions. Defining > + away "extern" and "__inline" results in all of them being compiled as > + proper functions. */ > > #define extern > #define __inline > @@ -39,7 +39,7 @@ > > > #ifndef DIFFERENT_PRAGMAS > -#pragma GCC target > ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,lwp,fsgsbase,rdrnd,f16c") > +#pragma GCC target > ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,tbm,lwp,fsgsbase,rdrnd,f16c") > #endif > > /* Following intrinsics require immediate arguments. They > @@ -188,3 +188,14 @@ test_2 ( __lwpins64, unsigned char, unsigned long > long, unsigned int, 1) > test_1 (_cvtss_sh, unsigned short, float, 1) > test_1 (_mm_cvtps_ph, __m128i, __m128, 1) > test_1 (_mm256_cvtps_ph, __m128i, __m256, 1) > + > + > +/* tbmintrin.h (TBM). */ > +#ifdef DIFFERENT_PRAGMAS > +#pragma GCC target ("tbm") > +#endif > +#include <tbmintrin.h> > +test_1 ( __bextri_u32, unsigned int, unsigned int, 1) > +#ifdef __x86_64__ > +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1) > +#endif > diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c > b/gcc/testsuite/gcc.target/i386/sse-23.c > index 0e15bb2..6a7b854 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-23.c > +++ b/gcc/testsuite/gcc.target/i386/sse-23.c > @@ -5,7 +5,7 @@ > > /* Test that the intrinsics compile with optimization. All of them > are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, > - xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that > + xopintrin.h, lwpintrin.h, tbmintrin.h, popcntintrin.h and mm3dnow.h that > reference the proper builtin functions. Defining away "extern" and > "__inline" results in all of them being compiled as proper > functions. */ > @@ -141,7 +141,11 @@ > #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1) > #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1) > > -#pragma GCC target > ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,fsgsbase,rdrnd,f16c") > +/* tbmintrin.h */ > +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextr_u32 (X, 1) > +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextr_u64 (X, 1) > + > +#pragma GCC target > ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,tbm,fsgsbase,rdrnd,f16c") > #include <wmmintrin.h> > #include <smmintrin.h> > #include <mm3dnow.h> > diff --git a/gcc/testsuite/gcc.target/i386/tbm-1.c > b/gcc/testsuite/gcc.target/i386/tbm-1.c > new file mode 100644 > index 0000000..4dddafc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/tbm-1.c > @@ -0,0 +1,74 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mtbm" } */ > +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "blcfill\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)eax" } } */ > +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)eax" } } */ > + > +#include <x86intrin.h> > + > +unsigned int > +func_bextri32 (unsigned int X) > +{ > + return __bextri_u32 (X, 1); > +} > + > +unsigned int > +func_blcfill32 (unsigned int X) > +{ > + return __blcfill_u32 (X); > +} > + > +unsigned int > +func_blci32 (unsigned int X) > +{ > + return __blci_u32 (X); > +} > + > +unsigned int > +func_blcic32 (unsigned int X) > +{ > + return __blcic_u32 (X); > +} > + > +unsigned int > +func_blcmsk32 (unsigned int X) > +{ > + return __blcmsk_u32 (X); > +} > + > +unsigned int > +func_blcs32 (unsigned int X) > +{ > + return __blcs_u32 (X); > +} > + > +unsigned int > +func_blsfill32 (unsigned int X) > +{ > + return __blsfill_u32 (X); > +} > + > +unsigned int > +func_blsic32 (unsigned int X) > +{ > + return __blsic_u32 (X); > +} > + > +unsigned int > +func_t1mskc32 (unsigned int X) > +{ > + return __t1mskc_u32 (X); > +} > + > +unsigned int > +func_tzmsk32 (unsigned int X) > +{ > + return __tzmsk_u32 (X); > +} > diff --git a/gcc/testsuite/gcc.target/i386/tbm-2.c > b/gcc/testsuite/gcc.target/i386/tbm-2.c > new file mode 100644 > index 0000000..e3ba375 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/tbm-2.c > @@ -0,0 +1,74 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target lp64 } */ > +/* { dg-options "-O2 -mtbm" } */ > +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)rax" } } */ > +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)rax" } } */ > + > +#include <x86intrin.h> > + > +unsigned long long > +func_bextri64 (unsigned long long X) > +{ > + return __bextri_u64 (X, 1); > +} > + > +unsigned long long > +func_blcfill64 (unsigned long long X) > +{ > + return __blcfill_u64 (X); > +} > + > +unsigned long long > +func_blci64 (unsigned long long X) > +{ > + return __blci_u64 (X); > +} > + > +unsigned long long > +func_blcic64 (unsigned long long X) > +{ > + return __blcic_u64 (X); > +} > + > +unsigned long long > +func_blcmsk64 (unsigned long long X) > +{ > + return __blcmsk_u64 (X); > +} > + > +unsigned long long > +func_blcs64 (unsigned long long X) > +{ > + return __blcs_u64 (X); > +} > + > +unsigned long long > +func_blsfill64 (unsigned long long X) > +{ > + return __blsfill_u64 (X); > +} > + > +unsigned long long > +func_blsic64 (unsigned long long X) > +{ > + return __blsic_u64 (X); > +} > + > +unsigned long long > +func_t1mskc64 (unsigned long long X) > +{ > + return __t1mskc_u64 (X); > +} > + > +unsigned long long > +func_tzmsk64 (unsigned long long X) > +{ > + return __tzmsk_u64 (X); > +} > This patch also passes "make check -k RUNTESTFLAGS=i386.exp" in x86-64, working on a full bootstrap test. Ok to commit? -- Quentin
diff --git a/gcc/config.gcc b/gcc/config.gcc index 4034241..f923990 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -318,7 +318,7 @@ i[34567]86-*-*) nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h - abmintrin.h bmiintrin.h" + abmintrin.h bmiintrin.h tbmintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -329,7 +329,7 @@ x86_64-*-*) nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h - abmintrin.h bmiintrin.h" + abmintrin.h bmiintrin.h tbmintrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 0f1af7f..e9d0fab 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -54,6 +54,7 @@ #define bit_XOP (1 << 11) #define bit_LWP (1 << 15) #define bit_FMA4 (1 << 16) +#define bit_TBM (1 << 21) /* %edx */ #define bit_LM (1 << 29) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index a7d6808..15d3284 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -397,7 +397,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; unsigned int has_fma4 = 0, has_xop = 0; - unsigned int has_bmi = 0; + unsigned int has_bmi = 0, has_tbm = 0; bool arch; @@ -464,6 +464,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_lwp = ecx & bit_LWP; has_fma4 = ecx & bit_FMA4; has_xop = ecx & bit_XOP; + has_tbm = ecx & bit_TBM; has_longmode = edx & bit_LM; has_3dnowp = edx & bit_3DNOWP; diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index e84347c..666e77e 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -246,6 +246,8 @@ ix86_target_macros_internal (int isa_flag, def_or_undef (parse_in, "__ABM__"); if (isa_flag & OPTION_MASK_ISA_BMI) def_or_undef (parse_in, "__BMI__"); + if (isa_flag & OPTION_MASK_ISA_TBM) + def_or_undef (parse_in, "__TBM__"); if (isa_flag & OPTION_MASK_ISA_POPCNT) def_or_undef (parse_in, "__POPCNT__"); if (isa_flag & OPTION_MASK_ISA_FSGSBASE) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e003ee7..ac0772e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2080,6 +2080,7 @@ static int ix86_isa_flags_explicit; (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI +#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 @@ -2136,6 +2137,7 @@ static int ix86_isa_flags_explicit; #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI +#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF @@ -2446,6 +2448,20 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) } return true; + case OPT_mtbm: + if (value) + { + ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET; + } + else + { + ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET; + } + return true; + + case OPT_mpopcnt: if (value) { @@ -2615,6 +2631,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, { "-mmmx", OPTION_MASK_ISA_MMX }, { "-mabm", OPTION_MASK_ISA_ABM }, { "-mbmi", OPTION_MASK_ISA_BMI }, + { "-mtbm", OPTION_MASK_ISA_TBM }, { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mcrc32", OPTION_MASK_ISA_CRC32 }, @@ -2871,6 +2888,7 @@ ix86_option_override_internal (bool main_args_p) PTA_RDRND = 1 << 25, PTA_F16C = 1 << 26, PTA_BMI = 1 << 27, + PTA_TBM = 1 << 28, /* if this reaches 32, need to widen struct pta flags below */ }; @@ -3206,6 +3224,9 @@ ix86_option_override_internal (bool main_args_p) if (processor_alias_table[i].flags & PTA_BMI && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) ix86_isa_flags |= OPTION_MASK_ISA_BMI; + if (processor_alias_table[i].flags & PTA_TBM + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) + ix86_isa_flags |= OPTION_MASK_ISA_TBM; if (processor_alias_table[i].flags & PTA_CX16 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) ix86_isa_flags |= OPTION_MASK_ISA_CX16; @@ -3951,6 +3972,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) IX86_ATTR_ISA ("3dnow", OPT_m3dnow), IX86_ATTR_ISA ("abm", OPT_mabm), IX86_ATTR_ISA ("bmi", OPT_mbmi), + IX86_ATTR_ISA ("tbm", OPT_mtbm), IX86_ATTR_ISA ("aes", OPT_maes), IX86_ATTR_ISA ("avx", OPT_mavx), IX86_ATTR_ISA ("mmx", OPT_mmmx), @@ -22992,6 +23014,28 @@ enum ix86_builtins IX86_BUILTIN_TZCNT32, IX86_BUILTIN_TZCNT64, + /* TBM instructions. */ + IX86_BUILTIN_BEXTRI32, + IX86_BUILTIN_BEXTRI64, + IX86_BUILTIN_BLCFILL32, + IX86_BUILTIN_BLCFILL64, + IX86_BUILTIN_BLCI32, + IX86_BUILTIN_BLCI64, + IX86_BUILTIN_BLCIC32, + IX86_BUILTIN_BLCIC64, + IX86_BUILTIN_BLCMSK32, + IX86_BUILTIN_BLCMSK64, + IX86_BUILTIN_BLCS32, + IX86_BUILTIN_BLCS64, + IX86_BUILTIN_BLSFILL32, + IX86_BUILTIN_BLSFILL64, + IX86_BUILTIN_BLSIC32, + IX86_BUILTIN_BLSIC64, + IX86_BUILTIN_T1MSKC32, + IX86_BUILTIN_T1MSKC64, + IX86_BUILTIN_TZMSK32, + IX86_BUILTIN_TZMSK64, + /* FSGSBASE instructions. */ IX86_BUILTIN_RDFSBASE32, IX86_BUILTIN_RDFSBASE64, @@ -23946,6 +23990,28 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrsi, "__builtin_ia32_blsr_u32", IX86_BUILTIN_BLSR32, UNKNOWN, (int) UINT_FTYPE_UINT }, { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_blsrdi, "__builtin_ia32_blsr_u64", IX86_BUILTIN_BLSR64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + /* TBM */ + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextrisi, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextridi, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfillsi, "__builtin_ia32_blcfill_u32", IX86_BUILTIN_BLCFILL32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcfilldi, "__builtin_ia32_blcfill_u64", IX86_BUILTIN_BLCFILL64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcisi, "__builtin_ia32_blci_u32", IX86_BUILTIN_BLCI32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcidi, "__builtin_ia32_blci_u64", IX86_BUILTIN_BLCI64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicsi, "__builtin_ia32_blcic_u32", IX86_BUILTIN_BLCIC32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcicdi, "__builtin_ia32_blcic_u64", IX86_BUILTIN_BLCIC64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmsksi, "__builtin_ia32_blcmsk_u32", IX86_BUILTIN_BLCMSK32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcmskdi, "__builtin_ia32_blcmsk_u64", IX86_BUILTIN_BLCMSK64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcssi, "__builtin_ia32_blcs_u32", IX86_BUILTIN_BLCS32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blcsdi, "__builtin_ia32_blcs_u64", IX86_BUILTIN_BLCS64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfillsi, "__builtin_ia32_blsfill_u32", IX86_BUILTIN_BLSFILL32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsfilldi, "__builtin_ia32_blsfill_u64", IX86_BUILTIN_BLSFILL64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicsi, "__builtin_ia32_blsic_u32", IX86_BUILTIN_BLSIC32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_blsicdi, "__builtin_ia32_blsic_u64", IX86_BUILTIN_BLSIC64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcsi, "__builtin_ia32_t1mskc_u32", IX86_BUILTIN_T1MSKC32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_t1mskcdi, "__builtin_ia32_t1mskc_u64", IX86_BUILTIN_T1MSKC64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmsksi, "__builtin_ia32_tzmsk_u32", IX86_BUILTIN_TZMSK32, UNKNOWN, (int) UINT_FTYPE_UINT }, + { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_tzmskdi, "__builtin_ia32_tzmsk_u64", IX86_BUILTIN_TZMSK64, UNKNOWN, (int) UINT64_FTYPE_UINT64 }, + /* F16C */ { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI }, { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI }, @@ -26057,6 +26123,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, emit_insn (gen_lwp_slwpcb (target)); return target; + case IX86_BUILTIN_BEXTRI32: + case IX86_BUILTIN_BEXTRI64: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + icode = (fcode == IX86_BUILTIN_BEXTRI32 + ? CODE_FOR_tbm_bextrisi + : CODE_FOR_tbm_bextridi); + if (!CONST_INT_P (op1)) + { + error ("last argument must be an immediate"); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + return gen_reg_rtx(tmode); + } + pat = GEN_FCN (icode) (target, op0, op1); + if (pat) emit_insn (pat); + return target; + default: break; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 4fba57d..3518bec 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -60,6 +60,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_ROUND OPTION_ISA_ROUND #define TARGET_ABM OPTION_ISA_ABM #define TARGET_BMI OPTION_ISA_BMI +#define TARGET_TBM OPTION_ISA_TBM #define TARGET_POPCNT OPTION_ISA_POPCNT #define TARGET_SAHF OPTION_ISA_SAHF #define TARGET_MOVBE OPTION_ISA_MOVBE diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 967886d..ab588e2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -237,6 +237,18 @@ UNSPEC_BLSMSK UNSPEC_BLSR UNSPEC_TZCNT + + ;; For TBM support + UNSPEC_BEXTRI + UNSPEC_BLCFILL + UNSPEC_BLCI + UNSPEC_BLCIC + UNSPEC_BLCMSK + UNSPEC_BLCS + UNSPEC_BLSFILL + UNSPEC_BLSIC + UNSPEC_T1MSKC + UNSPEC_TZMSK ]) (define_c_enum "unspecv" [ @@ -11916,6 +11928,99 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +;; TBM instructions. +(define_insn "tbm_bextri<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_0_to_31_operand" "n")] + UNSPEC_BEXTRI))] + "TARGET_TBM" + "bextr\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_blcfill<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_BLCFILL))] + "TARGET_TBM" + "blcfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_blci<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_BLCI))] + "TARGET_TBM" + "blci\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_blcic<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_BLCIC))] + "TARGET_TBM" + "blcic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_blcmsk<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_BLCMSK))] + "TARGET_TBM" + "blcmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_blcs<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_BLCS))] + "TARGET_TBM" + "blcs\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_blsfill<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_BLSFILL))] + "TARGET_TBM" + "blsfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_blsic<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_BLSIC))] + "TARGET_TBM" + "blsic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_t1mskc<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_T1MSKC))] + "TARGET_TBM" + "t1mskc\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "tbm_tzmsk<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] + UNSPEC_TZMSK))] + "TARGET_TBM" + "tzmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + + (define_insn "bsr_rex64" [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (const_int 63) diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index d808804..33014f4 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -357,6 +357,10 @@ mbmi Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save Support BMI built-in functions and code generation +mtbm +Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save +Support TBM built-in functions and code generation + mcx16 Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save Support code generation of cmpxchg16b instruction. diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h new file mode 100644 index 0000000..7a623ef --- /dev/null +++ b/gcc/config/i386/tbmintrin.h @@ -0,0 +1,171 @@ +/* Copyright (C) 2010 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _X86INTRIN_H_INCLUDED +# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef __TBM__ +# error "TBM instruction set not enabled" +#endif /* __TBM__ */ + +#ifndef _TBMINTRIN_H_INCLUDED +#define _TBMINTRIN_H_INCLUDED + +#ifdef __OPTIMIZE__ +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextri_u32 (unsigned int __X, const unsigned int __I) +{ + return __builtin_ia32_bextri_u32 (__X, __I); +} +#else +#define __bextri_u32 (X, I) \ + ((unsigned int)__builtin_ia32_bextri_u32 ((unsigned int)(X), \ + (unsigned int)(I)) +#endif /*__OPTIMIZE__ */ + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcfill_u32 (unsigned int __X) +{ + return __builtin_ia32_blcfill_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsfill_u32 (unsigned int __X) +{ + return __builtin_ia32_blsfill_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcs_u32 (unsigned int __X) +{ + return __builtin_ia32_blcs_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzmsk_u32 (unsigned int __X) +{ + return __builtin_ia32_tzmsk_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcic_u32 (unsigned int __X) +{ + return __builtin_ia32_blcic_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsic_u32 (unsigned int __X) +{ + return __builtin_ia32_blsic_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__t1mskc_u32 (unsigned int __X) +{ + return __builtin_ia32_t1mskc_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcmsk_u32 (unsigned int __X) +{ + return __builtin_ia32_blcmsk_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blci_u32 (unsigned int __X) +{ + return __builtin_ia32_blci_u32 (__X); +} + + +#ifdef __x86_64__ +#ifdef __OPTIMIZE__ +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextri_u64 (unsigned long long __X, const unsigned int __Y) +{ + return __builtin_ia32_bextri_u64 (__X, __Y); +} +#else +#define __bextri_u64 (X, I) \ + ((unsigned long long)__builtin_ia32_bextri_u64 ((unsigned long long)(X), \ + (unsigned long long)(I)) +#endif /*__OPTIMIZE__ */ + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcfill_u64 (unsigned long long __X) +{ + return __builtin_ia32_blcfill_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsfill_u64 (unsigned long long __X) +{ + return __builtin_ia32_blsfill_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcs_u64 (unsigned long long __X) +{ + return __builtin_ia32_blcs_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzmsk_u64 (unsigned long long __X) +{ + return __builtin_ia32_tzmsk_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcic_u64 (unsigned long long __X) +{ + return __builtin_ia32_blcic_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsic_u64 (unsigned long long __X) +{ + return __builtin_ia32_blsic_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__t1mskc_u64 (unsigned long long __X) +{ + return __builtin_ia32_t1mskc_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blcmsk_u64 (unsigned long long __X) +{ + return __builtin_ia32_blcmsk_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blci_u64 (unsigned long long __X) +{ + return __builtin_ia32_blci_u64 (__X); +} +#endif /* __x86_64__ */ + +#endif /* _TBMINTRIN_H_INCLUDED */ + diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h index 9a7366b..07074ae 100644 --- a/gcc/config/i386/x86intrin.h +++ b/gcc/config/i386/x86intrin.h @@ -85,6 +85,10 @@ #include <bmiintrin.h> #endif +#ifdef __BMI__ +#include <tbmintrin.h> +#endif + #ifdef __POPCNT__ #include <popcntintrin.h> #endif diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index a7be54b..e6ba951 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -9370,6 +9370,31 @@ unsigned int __builtin_ia32_blsi_u32 (unsigned int); unsigned long long __builtin_ia32_blsi_u64 (unsigned long long); @end smallexample +The following built-in function is available when @option{-mtbm} is used. +All of them generate the machine instruction that is part of the name. +@smallexample +unsigned int __builtin_ia32_bextri_u32 (unsigned int, unsigned int); +unsigned long long __builtin_ia32_bextri_u64 (unsigned long long, const unsigned long long); +unsigned int __builtin_ia32_blcfill_u32 (unsigned int); +unsigned long long __builtin_ia32_blcfill_u64 (unsigned long long); +unsigned int __builtin_ia32_blsfill_u32 (unsigned int); +unsigned long long __builtin_ia32_blsfill_u64 (unsigned long long); +unsigned int __builtin_ia32_blcs_u32 (unsigned int); +unsigned long long __builtin_ia32_blcs_u64 (unsigned long long); +unsigned int __builtin_ia32_tzmsk_u32 (unsigned int); +unsigned long long __builtin_ia32_tzmsk_u64 (unsigned long long); +unsigned int __builtin_ia32_blcic_u32 (unsigned int); +unsigned long long __builtin_ia32_blcic_u64 (unsigned long long); +unsigned int __builtin_ia32_blsic_u32 (unsigned int); +unsigned long long __builtin_ia32_blsic_u64 (unsigned long long); +unsigned int __builtin_ia32_t1mskc_u32 (unsigned int); +unsigned long long __builtin_ia32_t1mskc_u64 (unsigned long long); +unsigned int __builtin_ia32_blcmsk_u32 (unsigned int); +unsigned long long __builtin_ia32_blcmsk_u64 (unsigned long long); +unsigned int __builtin_ia32_blci_u32 (unsigned int); +unsigned long long __builtin_ia32_blci_u64 (unsigned long long); +@end smallexample + The following built-in functions are available when @option{-m3dnow} is used. All of them generate the machine instruction that is part of the name. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 071ad27..d4eaea6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -597,7 +597,7 @@ Objective-C and Objective-C++ Dialects}. -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfused-madd @gol --msse4a -m3dnow -mpopcnt -mabm -mbmi -mfma4 -mxop -mlwp @gol +-msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlwp @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol @@ -12420,6 +12420,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-abm @itemx -mbmi @itemx -mno-bmi +@itemx -mtbm +@itemx -mno-tbm @opindex mmmx @opindex mno-mmx @opindex msse diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index f0a382a..6f7ca84 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,9 +1,9 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h, - bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with - -O -pedantic-errors. */ + bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h + are usable with -O -pedantic-errors. */ #include <x86intrin.h> diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 4b27372..fe2a097 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,8 +1,8 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, abmintrin.h, - bmiintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h are usable with + bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h -O -fkeep-inline-functions. */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/funcspec-5.c b/gcc/testsuite/gcc.target/i386/funcspec-5.c index 5e07d85..1e18dcf 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-5.c +++ b/gcc/testsuite/gcc.target/i386/funcspec-5.c @@ -19,6 +19,7 @@ extern void test_sse4_2 (void) __attribute__((__target__("sse4.2"))); extern void test_sse4a (void) __attribute__((__target__("sse4a"))); extern void test_fma4 (void) __attribute__((__target__("fma4"))); extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); +extern void test_tbm (void) __attribute__((__target__("tbm"))); extern void test_no_abm (void) __attribute__((__target__("no-abm"))); extern void test_no_aes (void) __attribute__((__target__("no-aes"))); @@ -36,6 +37,7 @@ extern void test_no_sse4_2 (void) __attribute__((__target__("no-sse4.2"))); extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a"))); extern void test_no_fma4 (void) __attribute__((__target__("no-fma4"))); extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3"))); +extern void test_no_tbm (void) __attribute__((__target__("no-tbm"))); extern void test_arch_i386 (void) __attribute__((__target__("arch=i386"))); extern void test_arch_i486 (void) __attribute__((__target__("arch=i486"))); diff --git a/gcc/testsuite/gcc.target/i386/funcspec-6.c b/gcc/testsuite/gcc.target/i386/funcspec-6.c index 81c831c..92a3cb5 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-6.c +++ b/gcc/testsuite/gcc.target/i386/funcspec-6.c @@ -19,6 +19,7 @@ extern void test_sse4_2 (void) __attribute__((__target__("sse4.2"))); extern void test_sse4a (void) __attribute__((__target__("sse4a"))); extern void test_fma4 (void) __attribute__((__target__("fma4"))); extern void test_ssse3 (void) __attribute__((__target__("ssse3"))); +extern void test_tbm (void) __attribute__((__target__("tbm"))); extern void test_no_abm (void) __attribute__((__target__("no-abm"))); extern void test_no_aes (void) __attribute__((__target__("no-aes"))); @@ -36,6 +37,7 @@ extern void test_no_sse4_2 (void) __attribute__((__target__("no-sse4.2"))); extern void test_no_sse4a (void) __attribute__((__target__("no-sse4a"))); extern void test_no_fma4 (void) __attribute__((__target__("no-fma4"))); extern void test_no_ssse3 (void) __attribute__((__target__("no-ssse3"))); +extern void test_no_tbm (void) __attribute__((__target__("no-tbm"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index d59777b..eee7b29 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -1,9 +1,9 @@ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h, - fma4intrin.h, abmintrin.h, bmiintrin.h, lwpintrin.h, + fma4intrin.h, abmintrin.h, bmiintrin.h, tbmiintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 01809d0..3fb7eff 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,13 +1,13 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ #include <mm_malloc.h> /* Test that the intrinsics compile with optimization. All of them are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, - xopintrin.h, abmintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h - that reference the proper builtin functions. Defining away - "extern" and "__inline" results in all of them being compiled as + xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h + and mm3dnow.h that reference the proper builtin functions. Defining + away "extern" and "__inline" results in all of them being compiled as proper functions. */ #define extern @@ -141,4 +141,8 @@ #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1) #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1) +/* tbmintrin.h */ +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextri_u32 (X, 1) +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextri_u64 (X, 1) + #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index d256e68..41bde1b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -msse4a -maes -mpclmul -mpopcnt -mabm -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ #include <mm_malloc.h> @@ -177,3 +177,9 @@ test_2 ( __lwpins32, unsigned char, unsigned int, unsigned int, 1) test_2 ( __lwpval64, void, unsigned long long, unsigned int, 1) test_2 ( __lwpins64, unsigned char, unsigned long long, unsigned int, 1) #endif + +/* tbmintrin.h */ +test_1 ( __bextri_u32, unsigned int, unsigned int, 1) +#ifdef __x86_64__ +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1) +#endif diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index bb0472d..e940ef8 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -6,10 +6,10 @@ /* Test that the intrinsics compile without optimization. All of them are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, - xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that - reference the proper builtin functions. Defining away "extern" and - "__inline" results in all of them being compiled as proper - functions. */ + xopintrin.h, abmintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h + and mm3dnow.h that reference the proper builtin functions. Defining + away "extern" and "__inline" results in all of them being compiled as + proper functions. */ #define extern #define __inline @@ -39,7 +39,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,lwp,fsgsbase,rdrnd,f16c") +#pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop,popcnt,abm,tbm,lwp,fsgsbase,rdrnd,f16c") #endif /* Following intrinsics require immediate arguments. They @@ -188,3 +188,14 @@ test_2 ( __lwpins64, unsigned char, unsigned long long, unsigned int, 1) test_1 (_cvtss_sh, unsigned short, float, 1) test_1 (_mm_cvtps_ph, __m128i, __m128, 1) test_1 (_mm256_cvtps_ph, __m128i, __m256, 1) + + +/* tbmintrin.h (TBM). */ +#ifdef DIFFERENT_PRAGMAS +#pragma GCC target ("tbm") +#endif +#include <tbmintrin.h> +test_1 ( __bextri_u32, unsigned int, unsigned int, 1) +#ifdef __x86_64__ +test_1 ( __bextri_u64, unsigned long long, unsigned long long, 1) +#endif diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 0e15bb2..6a7b854 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -5,7 +5,7 @@ /* Test that the intrinsics compile with optimization. All of them are defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, - xopintrin.h, lwpintrin.h, popcntintrin.h and mm3dnow.h that + xopintrin.h, lwpintrin.h, tbmintrin.h, popcntintrin.h and mm3dnow.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -141,7 +141,11 @@ #define __builtin_ia32_lwpins32 (D2, D1, F) __builtin_ia32_lwpins32 (D2, D1, 1) #define __builtin_ia32_lwpins64 (D2, D1, F) __builtin_ia32_lwpins64 (D2, D1, 1) -#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,fsgsbase,rdrnd,f16c") +/* tbmintrin.h */ +#define __builtin_ia32_bextri_u32 (X, Y) __builtin_ia32_bextr_u32 (X, 1) +#define __builtin_ia32_bextri_u64 (X, Y) __builtin_ia32_bextr_u64 (X, 1) + +#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul,xop,abm,popcnt,lwp,tbm,fsgsbase,rdrnd,f16c") #include <wmmintrin.h> #include <smmintrin.h> #include <mm3dnow.h> diff --git a/gcc/testsuite/gcc.target/i386/tbm-1.c b/gcc/testsuite/gcc.target/i386/tbm-1.c new file mode 100644 index 0000000..4dddafc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/tbm-1.c @@ -0,0 +1,74 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtbm" } */ +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blcfill\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)eax" } } */ +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)eax" } } */ + +#include <x86intrin.h> + +unsigned int +func_bextri32 (unsigned int X) +{ + return __bextri_u32 (X, 1); +} + +unsigned int +func_blcfill32 (unsigned int X) +{ + return __blcfill_u32 (X); +} + +unsigned int +func_blci32 (unsigned int X) +{ + return __blci_u32 (X); +} + +unsigned int +func_blcic32 (unsigned int X) +{ + return __blcic_u32 (X); +} + +unsigned int +func_blcmsk32 (unsigned int X) +{ + return __blcmsk_u32 (X); +} + +unsigned int +func_blcs32 (unsigned int X) +{ + return __blcs_u32 (X); +} + +unsigned int +func_blsfill32 (unsigned int X) +{ + return __blsfill_u32 (X); +} + +unsigned int +func_blsic32 (unsigned int X) +{ + return __blsic_u32 (X); +} + +unsigned int +func_t1mskc32 (unsigned int X) +{ + return __t1mskc_u32 (X); +} + +unsigned int +func_tzmsk32 (unsigned int X) +{ + return __tzmsk_u32 (X); +} diff --git a/gcc/testsuite/gcc.target/i386/tbm-2.c b/gcc/testsuite/gcc.target/i386/tbm-2.c new file mode 100644 index 0000000..e3ba375 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/tbm-2.c @@ -0,0 +1,74 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mtbm" } */ +/* { dg-final { scan-assembler "bextr\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blci\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blcic\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blcmsk\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blcs\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blsfill\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "blsic\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "t1mskc\[^\\n]*(%|)rax" } } */ +/* { dg-final { scan-assembler "tzmsk\[^\\n]*(%|)rax" } } */ + +#include <x86intrin.h> + +unsigned long long +func_bextri64 (unsigned long long X) +{ + return __bextri_u64 (X, 1); +} + +unsigned long long +func_blcfill64 (unsigned long long X) +{ + return __blcfill_u64 (X); +} + +unsigned long long +func_blci64 (unsigned long long X) +{ + return __blci_u64 (X); +} + +unsigned long long +func_blcic64 (unsigned long long X) +{ + return __blcic_u64 (X); +} + +unsigned long long +func_blcmsk64 (unsigned long long X) +{ + return __blcmsk_u64 (X); +} + +unsigned long long +func_blcs64 (unsigned long long X) +{ + return __blcs_u64 (X); +} + +unsigned long long +func_blsfill64 (unsigned long long X) +{ + return __blsfill_u64 (X); +} + +unsigned long long +func_blsic64 (unsigned long long X) +{ + return __blsic_u64 (X); +} + +unsigned long long +func_t1mskc64 (unsigned long long X) +{ + return __t1mskc_u64 (X); +} + +unsigned long long +func_tzmsk64 (unsigned long long X) +{ + return __tzmsk_u64 (X); +}