Message ID | 1534524362.5679.11.camel@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | [rs6000] Add builtins for accessing the FPSCR | expand |
GCC maintainers: > In addition to listing > the builtin, I added a C style comment to describe the builtin a > little. I don't see any of the other builtins documented like this. > But I felt some explanation of the builtins were > helpful. Suggestions > on a better way to add the comments on the builtins would be > appreciated. > I spent some more time looking at the documentation file and decided my first approach really isn't correct. The needed comments should be placed in a paragraph below where the list of builtins. I updated the patch with this change to the doc/extend.texi file. Carl Love ------------------------------------------------------------------- gcc/ChangeLog: 2018-08-17 Carl Love <cel@us.ibm.com> * config/rs6000/rs6000-builtin.def: Add definitions for __builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn, __builtin_set_fpscr_drn. * config/rs6000.c: Add functions rs6000_expand_mtfsb0_mtfsb1_builtin, rs6000_expand_set_fpscr_rn_builtin, rs6000_expand_set_fpscr_drn_builtin. Add case statement entries for the new builtins. * config/rs6000.md: Add define_insn for rs6000_mtfsb0_si, rs6000_mtfsb1_si, rs6000_mffscrn, rs6000_mffscdrn. Add define_expand for rs6000_set_fpscr_rn and rs6000_set_fpscr_drn. * doc/extend.texi: Add documentation for the builtins. gcc/testsuite/ChangeLog: 2018-08-16 Carl Love <cel@us.ibm.com> * testsuite/gcc.target/powerpc/test_mffsl-p9.c: New file. * testsuite/gcc.target/powerpc/test_fpscr_builtins.c: New file. * testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c: New file. --- gcc/config/rs6000/rs6000-builtin.def | 23 ++ gcc/config/rs6000/rs6000.c | 151 ++++++++++ gcc/config/rs6000/rs6000.md | 149 ++++++++- gcc/doc/extend.texi | 36 ++- .../gcc.target/powerpc/test_fpscr_builtins.c | 282 ++++++++++++++++++ .../powerpc/test_fpscr_builtins_error.c | 26 ++ .../gcc.target/powerpc/test_mffsl-p9.c | 36 +++ 7 files changed, 701 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index f79968154..a50236e77 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb", BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) +BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf", RS6000_BTM_ALWAYS, RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, CODE_FOR_rs6000_mtfsf) +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0_SI, "__builtin_mtfsb0", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_mtfsb0_si) + +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1_SI, "__builtin_mtfsb1", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_mtfsb1_si) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_set_fpscr_rn) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_set_fpscr_drn) + BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index aa707b255..7db9c10a9 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -13356,6 +13356,113 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp) return NULL_RTX; } +static rtx +rs6000_expand_mtfsb0_mtfsb1_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* Only allow bit numbers 0 to 31. */ + if (GET_CODE (op0) != CONST_INT || INTVAL (op0) < 0 || INTVAL (op0) > 31) + { + error ("Argument must be a constant between 0 and 31."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + +static rtx +rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Agrument can only be a + 2-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. + */ + if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 3)) + { + error ("Argument must be a value between 0 and 3."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} +static rtx +rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Agrument can only be a + 3-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. + */ + if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 7)) + { + error ("Argument must be a value between 0 and 7."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + static rtx rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) { @@ -15987,6 +16094,26 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case RS6000_BUILTIN_MFFS: return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target); + case RS6000_BUILTIN_MTFSB0_SI: + return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb0_si, + exp); + + case RS6000_BUILTIN_MTFSB1_SI: + return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb1_si, + exp); + + case RS6000_BUILTIN_SET_FPSCR_RN: + return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn, + exp); + + case RS6000_BUILTIN_SET_FPSCR_DRN: + return + rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn, + exp); + + case RS6000_BUILTIN_MFFSL: + return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target); + case RS6000_BUILTIN_MTFSF: return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp); @@ -16370,6 +16497,30 @@ rs6000_init_builtins (void) ftype = build_function_type_list (double_type_node, NULL_TREE); def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); + ftype = build_function_type_list (double_type_node, NULL_TREE); + def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + + def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0_SI); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1_SI); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN); + ftype = build_function_type_list (void_type_node, intSI_type_node, double_type_node, NULL_TREE); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d70b01b8c..7714aacd8 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -163,7 +163,13 @@ UNSPECV_MFTB ; move from time base UNSPECV_NLGR ; non-local goto receiver UNSPECV_MFFS ; Move from FPSCR - UNSPECV_MTFSF ; Move to FPSCR Fields + UNSPECV_MFFSL ; Move from FPSCR light instruction version + UNSPECV_MFFSCRN ; Move from FPSCR float rounding mode + UNSPECV_MFFSCDRN ; Move from FPSCR decimal float rounding mode + UNSPECV_MTFSF ; Move to FPSCR Fields 8 to 15 + UNSPECV_MTFSF_L0W1 ; Move to FPSCR Fields 0 to 7 + UNSPECV_MTFSFB0 ; Set FPSCR Field bit to 0 + UNSPECV_MTFSFB1 ; Set FPSCR Field bit to 1 UNSPECV_SPLIT_STACK_RETURN ; A camouflaged return UNSPECV_SPEC_BARRIER ; Speculation barrier ]) @@ -5823,6 +5829,115 @@ xscvdpuxds %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "rs6000_mtfsb0_si" + [(use (match_operand:SI 0 "short_cint_operand" "n")) + (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB0)] + "TARGET_HARD_FLOAT" + "mtfsb0 %0") + +(define_insn "rs6000_mtfsb1_si" + [(use (match_operand:SI 0 "short_cint_operand" "n")) + (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB1)] + "TARGET_HARD_FLOAT" + "mtfsb1 %0") + +(define_insn "rs6000_mffscrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCRN)) + (use (match_operand:DF 1 "gpc_reg_operand" "d"))] + "TARGET_HARD_FLOAT" + "mffscrn %0,%1") + +(define_insn "rs6000_mffscdrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN)) + (use (match_operand:DF 1 "gpc_reg_operand" "d"))] + "TARGET_HARD_FLOAT" + "mffscdrn %0,%1") + +(define_expand "rs6000_set_fpscr_rn" + [(match_operand:DI 0 "gpc_reg_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The floating point rounding control bits are FPSCR[62:63]. Put the + new rounding mode bits from operands[0][62:63] into FPSCR[62:63]. */ + if (TARGET_P9_VECTOR) + { + rtx src_df = gen_reg_rtx (DImode); + + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscrn (tmp_df, src_df)); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new RN mode from operand. */ + emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x3))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field k=15. The fields are [0:15]. Hence with L=0, + W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W). FLM is an 8 bit + field[0:7]. Need to set the bit that corresponds to the value of i + that you want [0:7]. + */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df)); + } + DONE; +}) + +(define_expand "rs6000_set_fpscr_drn" + [(match_operand:DI 0 "gpc_reg_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the + new rounding mode bits from operands[0][61:63] into FPSCR[29:31]. */ + + if (TARGET_P9_VECTOR) + { + rtx src_df = gen_reg_rtx (DFmode); + + emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32))); + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df)); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new DRN mode from operand. */ + emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x7))); + emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFF8FFFFFFFF))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field k=7. The fields are [0:15]. Hence with L=0, + W=1, FLM_i must be equal to 7, 16 = i + 8*(1-W). FLM is an 8 bit + field[0:7]. Need to set the bit that corresponds to the value of i + that you want [0:7]. + */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf_L0W1 (GEN_INT (0x01), tmp_df)); + } + DONE; +}) + ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) ;; rather than (set (subreg:SI (reg)) (fix:SI ...)) ;; because the first makes it clear that operand 0 is not live @@ -13602,6 +13717,31 @@ }) +;; The ISA 3.0 mffsl instruction is a lower latency instruction +;; for reading the FPSCR +(define_insn "rs6000_mffsl0" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT && TARGET_P9_MISC" + "mffsl %0") + +(define_expand "rs6000_mffsl" + [(set (match_operand:DF 0 "gpc_reg_operand") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT && TARGET_P9_MISC" +{ + /* If the low latency mffsl instruction (ISA 3.0) is available use it, + otherwise fall back to the older mffs instruction which does the same + thing but with a little more latency. */ + + if (TARGET_P9_VECTOR) + emit_insn (gen_rs6000_mffsl0 (operands[0])); + else + emit_insn (gen_rs6000_mffs (operands[0])); + + DONE; +}) + (define_insn "rs6000_mffs" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))] @@ -13615,6 +13755,13 @@ "TARGET_HARD_FLOAT" "mtfsf %0,%1") +(define_insn "rs6000_mtfsf_L0W1" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i") + (match_operand:DF 1 "gpc_reg_operand" "d")] + UNSPECV_MTFSF_L0W1)] + "TARGET_HARD_FLOAT" + "mtfsf %0,%1,0,1") + ;; Power8 fusion support for fusing an addis instruction with a D-form load of ;; a GPR. The addis instruction must be adjacent to the load, and use the same diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 130f6a671..e654f41be 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15745,6 +15745,10 @@ uint64_t __builtin_ppc_get_timebase (); unsigned long __builtin_ppc_mftb (); __ibm128 __builtin_unpack_ibm128 (__ibm128, int); __ibm128 __builtin_pack_ibm128 (double, double); +double __builtin_mffs(void); +void __builtin_mtfsb0(const int); +void __builtin_mtfsb1(const int); +void __builtin_set_fpscr_rn(int); @end smallexample The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb} @@ -15753,7 +15757,19 @@ functions generate instructions to read the Time Base Register. The instructions and always returns the 64 bits of the Time Base Register. The @code{__builtin_ppc_mftb} function always generates one instruction and returns the Time Base Register value as an unsigned long, throwing away -the most significant word on 32-bit environments. +the most significant word on 32-bit environments. The @code{__builtin_mffs} +return the value of the FPSCR register. Note, ISA 3.0 supports the +@code{__builtin_mffsl()} which is a lower latency version of this builtin. The +@code{__builtin_mtfsb0} and @code{__builtin_mtfsb1} take the bit to change +as an argument. The valid bit range is between 0 and 31. The builtins map to +the @code{mtfsb0} and @code{mtfsb1} instructions which take the argument and +add 32. Hence these instructions only modify the FPSCR[32:63] bits by +changing the specified bit to a zero or one respectively. The +@code{__builtin_set_fpscr_rn} builtin allows changing both of the floating +point rounding mode bits. The argument is a 2-bit value. The argument can +either be a const int or stored in a variable. The builtin uses the ISA 3.0 +instruction @code{mffscrn} if available, otherwise it reads the FPSCR, masks +the current rounding mode bits out and OR's in the new value. @node Basic PowerPC Built-in Functions Available on ISA 2.05 @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.05 @@ -15789,6 +15805,7 @@ The following built-in functions are available when hardware decimal floating point (@option{-mhard-dfp}) is available: @smallexample +void __builtin_set_fpscr_drn(int); _Decimal64 __builtin_ddedpd (int, _Decimal64); _Decimal128 __builtin_ddedpdq (int, _Decimal128); _Decimal64 __builtin_denbcd (int, _Decimal64); @@ -15803,6 +15820,14 @@ long long __builtin_dxex (_Decimal64); long long __builtin_dxexq (_Decimal128); _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long); unsigned long long __builtin_unpack_dec128 (_Decimal128, int); + +The @code{__builtin_set_fpscr_drn} builtin allows changing the three decimal +floating point rounding mode bits. The argument is a 3-bit value. The +argument can either be a const int or the value can be stored in a variable. +The builtin uses the ISA 3.0 instruction @code{mffscdrn} if available. +Otherwise the builtin reads the FPSCR, masks the current decimal rounding +mode bits out and OR's in the new value. + @end smallexample The following functions require @option{-mhard-float}, @@ -16004,6 +16029,9 @@ int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value); int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value); + +double __builtin_mffsl(void); + @end smallexample The @code{__builtin_byte_in_set} function requires a 64-bit environment supporting ISA 3.0 or later. This function returns @@ -16055,6 +16083,12 @@ The @code{__builtin_dfp_dtstsfi_ov_dd} and require that the type of the @code{value} argument be @code{__Decimal64} and @code{__Decimal128} respectively. +The @code{__builtin_mffsl} uses the ISA 3.0 @code{mffsl} instruction to read +the FPSCR. The instruction is a lower latency version of the @code{mffs} +instruction. If the @code{mffsl} instruction is not available, then the +builtin uses the older @code{mffs} instruction to read the FPSCR. + + @node PowerPC AltiVec/VSX Built-in Functions @subsection PowerPC AltiVec/VSX Built-in Functions diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c new file mode 100644 index 000000000..4f77078c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c @@ -0,0 +1,282 @@ +/* { dg-do run { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-pedantic" } */ + +#include <altivec.h> + +#define DEBUG 1 + +#ifdef DEBUG +#include <stdio.h> +#endif + +#define RN_MASK 0x3LL /* RN field mask */ +#define DRN_MASK 0x700000000LL /* DRN field mask */ + +void abort (void); + +int main () +{ + int i; + int val, bit; + double fpscr_val; + union blah { + double d; + unsigned long long ll; + } conv_val; + + unsigned long long ll_value; + register double f14; + + /* __builtin_set_fpscr_rn() builtin can take a const or a variable + value between 0 and 3 as the argument. + __builtin_set_fpscr_drn() builtin can take a const or a variable + value between 0 and 7 as the argument. + __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant + 30 or 31. + */ + + /* Test reading the FPSCR register */ + asm volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffs()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffs(), conv_val.d); +#else + abort(); +#endif + } + + /* Test float rounding mode builtin with const value argument. */ + __builtin_set_fpscr_rn(3); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != 3) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n", + ll_value, 3); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n", + val, ll_value, val); +#else + abort(); +#endif + } + + /* Reset to 0 for testing */ + val = 0; + __builtin_set_fpscr_rn(val); + + __builtin_mtfsb1(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 1) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 2) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != (0x1LL << (31-0))) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + + /* Test builtin float rounding mode with variable as argument. */ + val = 0; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } + + val = 3; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } + + /* Test builtin decimal float rounding mode with const argument. */ + __builtin_set_fpscr_drn(7); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x700000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(2); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x200000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(5); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x500000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n"); +#else + abort(); +#endif + } + + /* Test builtin decimal float rounding mode with variable as argument. */ + val = 7; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 0; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c new file mode 100644 index 000000000..10de0be44 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target powerpc*-*-* } } */ + +#include <altivec.h> + +int main () +{ + + /* Test builin with out of range arguments. Can only test for constant + int arguments. The builtins __builtin_set_fpscr_rn(), + __builtin_set_fpscr_drn() also support a variable as an argument but + can't test variable value at compile time. */ + + __builtin_mtfsb0(-1); /* { dg-error "Argument must be a constant between 0 and 31." } */ + __builtin_mtfsb0(32); /* { dg-error "Argument must be a constant between 0 and 31." } */ + + __builtin_mtfsb1(-1); /* { dg-error "Argument must be a constant between 0 and 31." } */ + __builtin_mtfsb1(32); /* { dg-error "Argument must be a constant between 0 and 31." } */ + + __builtin_set_fpscr_rn(-1); /* { dg-error "Argument must be a value between 0 and 3." } */ + __builtin_set_fpscr_rn(4); /* { dg-error "Argument must be a value between 0 and 3." } */ + + __builtin_set_fpscr_drn(-1); /* { dg-error "Argument must be a value between 0 and 7." } */ + __builtin_set_fpscr_drn(8); /* { dg-error "Argument must be a value between 0 and 7." } */ + +} + diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c new file mode 100644 index 000000000..dc4f863ca --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-pedantic -mcpu=power9" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int main () +{ + + register double f14; + union blah { + double d; + unsigned long long ll; + } conv_val; + + /* Test reading the FPSCR register. */ + asm volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffsl()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffsl(), conv_val.d); +#else + abort(); +#endif + } +}
Hi Carl, On Fri, Aug 17, 2018 at 11:46:06AM -0700, Carl Love wrote: > > In addition to listing > > the builtin, I added a C style comment to describe the builtin a > > little. I don't see any of the other builtins documented like this. > > But I felt some explanation of the builtins were > > helpful. Suggestions > > on a better way to add the comments on the builtins would be > > appreciated. I think this is fine. > * config/rs6000/rs6000-builtin.def: Add definitions for __builtin_mffsl, > __builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn, > __builtin_set_fpscr_drn. * config/rs6000/rs6000-builtin.def (__builtin_mffsl): New. (__builtin_mtfsb0): New. (__builtin_mtfsb1): New. (__builtin_set_fpscr_rn): New. (__builtin_set_fpscr_drn): New. or * config/rs6000/rs6000-builtin.def (__builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn, __builtin_set_fpscr_drn): New. > * config/rs6000.c: Add functions rs6000_expand_mtfsb0_mtfsb1_builtin, > rs6000_expand_set_fpscr_rn_builtin, rs6000_expand_set_fpscr_drn_builtin. Same here (and further on). > Add case statement entries for the new builtins. To what function(s)? > * testsuite/gcc.target/powerpc/test_mffsl-p9.c: New file. > * testsuite/gcc.target/powerpc/test_fpscr_builtins.c: New file. > * testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c: New file. testsuite/ has its own changelog. Entries in there do not include "testsuite/". > --- a/gcc/config/rs6000/rs6000-builtin.def > +++ b/gcc/config/rs6000/rs6000-builtin.def > @@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb", > BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs", > RS6000_BTM_ALWAYS, RS6000_BTC_MISC) > > +BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl", > + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) Should this be RS6000_BTM_MISC_P9 (or similar) instead? Same for the other ISA 3.0 ops. > +static rtx > +rs6000_expand_mtfsb0_mtfsb1_builtin (enum insn_code icode, tree exp) I'd call this rs6000_expand_mtfsb_builtin, but please use which you think is clearest. > + /* Only allow bit numbers 0 to 31. */ > + if (GET_CODE (op0) != CONST_INT || INTVAL (op0) < 0 || INTVAL (op0) > 31) if (!u5bit_cint_operand (op0, VOIDmode)) should do the trick I think. > + { > + error ("Argument must be a constant between 0 and 31."); > + return const0_rtx; > + } > + > + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) > + op0 = copy_to_mode_reg (mode0, op0); Is this correct? It must be a constant integer already, and if it fails copying it into a register is surely not the right thing to do. > + /* If the argument is a constant, check the range. Agrument can only be a > + 2-bit value. Unfortunately, can't check the range of the value at > + compile time if the argument is a variable. > + */ > + if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 3)) const_0_to_3_operand > + /* Builtin not supported on this processor. */ > + return 0; > + > + /* If we got invalid arguments bail out before generating bad rtl. */ > + if (arg0 == error_mark_node) > + return const0_rtx; > + > + /* If the argument is a constant, check the range. Agrument can only be a > + 3-bit value. Unfortunately, can't check the range of the value at > + compile time if the argument is a variable. > + */ > + if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 7)) (Typo, "argument"). const_0_to_7_operand or u3bit_cint_operand (both exist, and they are identical. Hrm.) > > @@ -16370,6 +16497,30 @@ rs6000_init_builtins (void) > ftype = build_function_type_list (double_type_node, NULL_TREE); > def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); > > + ftype = build_function_type_list (double_type_node, NULL_TREE); > + def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL); > + > + ftype = build_function_type_list (void_type_node, > + intSI_type_node, > + NULL_TREE); > + > + def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0_SI); No blank line between ftype and def_builtin please? > +(define_insn "rs6000_mtfsb0_si" Why the _si? Won't just rs6000_mtfsb0 do? > + [(use (match_operand:SI 0 "short_cint_operand" "n")) > + (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB0)] UNSPECV_MTFSB0 please. operands[0] should be an argument of the unspec... so something like (define_insn "rs6000_mtfsb0" [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")] UNSPECV_MTFSB0)] "TARGET_HARD_FLOAT" "mtfsb0 %0") (and you should set the "type" attribute to something useful, ideally). > +(define_insn "rs6000_mffscrn" > + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") > + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCRN)) > + (use (match_operand:DF 1 "gpc_reg_operand" "d"))] > + "TARGET_HARD_FLOAT" > + "mffscrn %0,%1") (define_insn "rs6000_mffscrn" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec_volatile:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPECV_MFFSCRN))] "TARGET_HARD_FLOAT" "mffscrn %0,%1") (you also need a check for ISA 3.0). > +(define_expand "rs6000_set_fpscr_rn" > + [(match_operand:DI 0 "gpc_reg_operand")] > + "TARGET_HARD_FLOAT" > +{ > + rtx tmp_df = gen_reg_rtx (DFmode); > + > + /* The floating point rounding control bits are FPSCR[62:63]. Put the > + new rounding mode bits from operands[0][62:63] into FPSCR[62:63]. */ > + if (TARGET_P9_VECTOR) It does not depend on vector stuff (say, you use -mcpu=power9 -mno-altivec). > + { > + rtx tmp_rn = gen_reg_rtx (DImode); > + rtx tmp_di = gen_reg_rtx (DImode); > + > + /* Extract new RN mode from operand. */ > + emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x3))); This doesn't work for -m32 afaics. Either disallow it, or make it work? > + /* Insert new RN mode into FSCPR. */ > + emit_insn (gen_rs6000_mffs (tmp_df)); > + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); > + emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC))); > + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); > + > + /* Need to write to field k=15. The fields are [0:15]. Hence with L=0, > + W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W). FLM is an 8 bit > + field[0:7]. Need to set the bit that corresponds to the value of i > + that you want [0:7]. > + */ (The */ should not go on a new line). The derivation isn't super clear to me, but 1 is the correct mask, yes. > + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); > + emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df)); > +(define_expand "rs6000_set_fpscr_drn" > + /* Insert new RN mode into FSCPR. */ > + emit_insn (gen_rs6000_mffs (tmp_df)); > + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); > + emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFF8FFFFFFFF))); > + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); Why is this masking off the top 16 bits of the original contents? That seems wrong. > +;; The ISA 3.0 mffsl instruction is a lower latency instruction > +;; for reading the FPSCR For reading _a part_ of the FPSCR, the other bits are set to 0 in the result. This matters, because otherwise we should just use __builtin_mffs always; but it does not do the same thing, so we cannot. > +(define_insn "rs6000_mffsl0" > + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") > + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] > + "TARGET_HARD_FLOAT && TARGET_P9_MISC" > + "mffsl %0") (Please use a better name than that "0"... We have used "_hw" before). > +(define_expand "rs6000_mffsl" > + [(set (match_operand:DF 0 "gpc_reg_operand") > + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] > + "TARGET_HARD_FLOAT && TARGET_P9_MISC" You don't want the latter... > +{ > + /* If the low latency mffsl instruction (ISA 3.0) is available use it, > + otherwise fall back to the older mffs instruction which does the same > + thing but with a little more latency. */ > + > + if (TARGET_P9_VECTOR) ... but you want it here (instead of the _VECTOR). > + emit_insn (gen_rs6000_mffsl0 (operands[0])); > + else > + emit_insn (gen_rs6000_mffs (operands[0])); > +(define_insn "rs6000_mtfsf_L0W1" > + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i") > + (match_operand:DF 1 "gpc_reg_operand" "d")] > + UNSPECV_MTFSF_L0W1)] > + "TARGET_HARD_FLOAT" > + "mtfsf %0,%1,0,1") Maybe name it rs6000_mtsfs_high? L0W1 reads like "low" :-) > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -15745,6 +15745,10 @@ uint64_t __builtin_ppc_get_timebase (); > unsigned long __builtin_ppc_mftb (); > __ibm128 __builtin_unpack_ibm128 (__ibm128, int); > __ibm128 __builtin_pack_ibm128 (double, double); > +double __builtin_mffs(void); > +void __builtin_mtfsb0(const int); > +void __builtin_mtfsb1(const int); > +void __builtin_set_fpscr_rn(int); > @end smallexample (space before opening paren) > +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c > @@ -0,0 +1,282 @@ > +/* { dg-do run { target { powerpc64*-*-* && lp64 } } } */ > +/* { dg-require-effective-target lp64 } */ You have "lp64" in the selector already, repeating it here doesn't do anything. > +/* { dg-options "-pedantic" } */ Why is this? Segher
Segher: This is an updated patch to add the following builtins: __builtin_mffsl, __builtin_set_fpscr_rn, __builtin_set_fpscr_rn, __builtin_mtfsb0, __builtin_mtfsb1. I have addressed you comments with regards to the change log entries. I have also addressed the various comments about the code, function names etc. I have also addressed your comment about the builtins not working in 32-bit mode. The builtins __builtin_mffsl, __builtin_set_fpscr_rn, __builtin_mtfsb0, __builtin_mtfsb1 are supported in 32-bit mode. Builtin __builtin_set_fpscr_drn is only supported in 64-bit mode. Note, rs6000_mffsl, builtins __builtin_set_fpscr_rn, __builtin_set_fpscr_drn use the ISA 3.0 instructions if compiling for ISA 3.0 or beyond. Otherwise, they use logical operations to emulate the ISA 3.0 instructions. The the tests for the __builtin_set_fpscr_drn builtin were separated into separate files since they are only supported in 64-bit mode. The patch has been tested on powerpc64le-unknown-linux-gnu (Power 8 LE 64-bit mode only) powerpc64-unknown-linux-gnu (Power 8 BE 32-bit and 64-bit modes) powerpc64le-unknown-linux-gnu (Power 9 LE 64-bit mode only) With no regressions. Please let me know if the patch looks OK for trunk. Carl Love -------------------------------------------------------------------- gcc/ChangeLog: 2018-09-17 Carl Love <cel@us.ibm.com> * config/rs6000/rs6000-builtin.def (__builtin_mffsl): New. (__builtin_mtfsb0): New. (__builtin_mtfsb1): New. ( __builtin_set_fpscr_rn): New. (__builtin_set_fpscr_drn): New. * config/rs6000.c (rs6000_expand_mtfsb0_mtfsb1_builtin): Add. (rs6000_expand_set_fpscr_rn_builtin): Add. (rs6000_expand_set_fpscr_drn_builtin): Add. (rs6000_expand_builtin): Add case statement entries for RS6000_BUILTIN_MTFSB0, RS6000_BUILTIN_MTFSB1, RS6000_BUILTIN_SET_FPSCR_RN, RS6000_BUILTIN_SET_FPSCR_DRN, RS6000_BUILTIN_MFFSL. (rs6000_init_builtins): Add ftype initialization and def_builtin calls for __builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn, __builtin_set_fpscr_drn. * config/rs6000.md (rs6000_mtfsb0, rs6000_mtfsb1, rs6000_mffscrn, rs6000_mffscdrn): Add define_insn. (rs6000_set_fpscr_rn, rs6000_set_fpscr_drn): Add define_expand. * doc/extend.texi: Add documentation for the builtins. gcc/testsuite/ChangeLog: 2018-09-17 Carl Love <cel@us.ibm.com> * gcc.target/powerpc/test_mffsl-p9.c: New file. * gcc.target/powerpc/test_fpscr_rn_builtin.c: New file. * gcc.target/powerpc/test_fpscr_drn_builtin.c: New file. * gcc.target/powerpc/test_fpscr_rn_builtin_error.c: New file. * gcc.target/powerpc/test_fpscr_drn_builtin_error.c: New file. --- gcc/config/rs6000/rs6000-builtin.def | 23 +++ gcc/config/rs6000/rs6000.c | 148 ++++++++++++++++ gcc/config/rs6000/rs6000.md | 160 ++++++++++++++++- gcc/doc/extend.texi | 36 +++- .../gcc.target/powerpc/test_fpscr_drn_builtin.c | 116 +++++++++++++ .../powerpc/test_fpscr_drn_builtin_error.c | 17 ++ .../gcc.target/powerpc/test_fpscr_rn_builtin.c | 190 +++++++++++++++++++++ .../powerpc/test_fpscr_rn_builtin_error.c | 22 +++ gcc/testsuite/gcc.target/powerpc/test_mffsl.c | 34 ++++ 9 files changed, 744 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl.c diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index f799681..9e960eb 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb", BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) +BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf", RS6000_BTM_ALWAYS, RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, CODE_FOR_rs6000_mtfsf) +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_mtfsb0) + +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1, "__builtin_mtfsb1", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_mtfsb1) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_set_fpscr_rn) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTM_64BIT | RS6000_BTC_UNARY, + CODE_FOR_rs6000_set_fpscr_drn) + BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2b736d7..2c39f80 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -13600,6 +13600,113 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp) } static rtx +rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* Only allow bit numbers 0 to 31. */ + if (!u5bit_cint_operand (op0, VOIDmode)) + { + error ("Argument must be a constant between 0 and 31."); + return const0_rtx; + } + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + +static rtx +rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Argument can only be a + 2-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. */ + if (GET_CODE (op0) == CONST_INT && !const_0_to_3_operand(op0, VOIDmode)) + { + error ("Argument must be a value between 0 and 3."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} +static rtx +rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (TARGET_32BIT) + /* Builtin not supported in 32-bit mode. */ + fatal_error (input_location, + "__builtin_set_fpscr_drn is not supported in 32-bit mode."); + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Agrument can only be a + 3-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. + */ + if (GET_CODE (op0) == CONST_INT && !const_0_to_7_operand(op0, VOIDmode )) + { + error ("Argument must be a value between 0 and 7."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + +static rtx rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat; @@ -16069,6 +16176,24 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case RS6000_BUILTIN_MFFS: return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target); + case RS6000_BUILTIN_MTFSB0: + return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp); + + case RS6000_BUILTIN_MTFSB1: + return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp); + + case RS6000_BUILTIN_SET_FPSCR_RN: + return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn, + exp); + + case RS6000_BUILTIN_SET_FPSCR_DRN: + return + rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn, + exp); + + case RS6000_BUILTIN_MFFSL: + return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target); + case RS6000_BUILTIN_MTFSF: return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp); @@ -16452,6 +16577,29 @@ rs6000_init_builtins (void) ftype = build_function_type_list (double_type_node, NULL_TREE); def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); + ftype = build_function_type_list (double_type_node, NULL_TREE); + def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN); + ftype = build_function_type_list (void_type_node, intSI_type_node, double_type_node, NULL_TREE); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 68ba5fd..e2c0142 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -164,7 +164,13 @@ UNSPECV_MFTB ; move from time base UNSPECV_NLGR ; non-local goto receiver UNSPECV_MFFS ; Move from FPSCR - UNSPECV_MTFSF ; Move to FPSCR Fields + UNSPECV_MFFSL ; Move from FPSCR light instruction version + UNSPECV_MFFSCRN ; Move from FPSCR float rounding mode + UNSPECV_MFFSCDRN ; Move from FPSCR decimal float rounding mode + UNSPECV_MTFSF ; Move to FPSCR Fields 8 to 15 + UNSPECV_MTFSF_HI ; Move to FPSCR Fields 0 to 7 + UNSPECV_MTFSB0 ; Set FPSCR Field bit to 0 + UNSPECV_MTFSB1 ; Set FPSCR Field bit to 1 UNSPECV_SPLIT_STACK_RETURN ; A camouflaged return UNSPECV_SPEC_BARRIER ; Speculation barrier ]) @@ -5824,6 +5830,115 @@ xscvdpuxds %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "rs6000_mtfsb0" + [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")] + UNSPECV_MTFSB0)] + "TARGET_HARD_FLOAT" + "mtfsb0 %0" + [(set_attr "type" "fp")]) + +(define_insn "rs6000_mtfsb1" + [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")] + UNSPECV_MTFSB1)] + "TARGET_HARD_FLOAT" + "mtfsb1 %0" + [(set_attr "type" "fp")]) + +(define_insn "rs6000_mffscrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] + UNSPECV_MFFSCRN))] + "TARGET_P9_MISC" + "mffscrn %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "rs6000_mffscdrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN)) + (use (match_operand:DF 1 "gpc_reg_operand" "d"))] + "TARGET_P9_MISC" + "mffscdrn %0,%1" + [(set_attr "type" "fp")]) + +(define_expand "rs6000_set_fpscr_rn" + [(match_operand:DI 0 "gpc_reg_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The floating point rounding control bits are FPSCR[62:63]. Put the + new rounding mode bits from operands[0][62:63] into FPSCR[62:63]. */ + if (TARGET_P9_MISC) + { + rtx src_df = gen_reg_rtx (DImode); + + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscrn (tmp_df, src_df)); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new RN mode from operand. */ + emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field k=15. The fields are [0:15]. Hence with L=0, + W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W). FLM is an 8 bit + field[0:7]. Need to set the bit that corresponds to the value of i + that you want [0:7]. */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df)); + } + DONE; +}) + +(define_expand "rs6000_set_fpscr_drn" + [(match_operand:DI 0 "gpc_reg_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the + new rounding mode bits from operands[0][61:63] into FPSCR[29:31]. */ + if (TARGET_P9_MISC) + { + rtx src_df = gen_reg_rtx (DFmode); + + emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32))); + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df)); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new DRN mode from operand. */ + emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x7))); + emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFF8FFFFFFFF))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field 7. The fields are [0:15]. The equation to + select the field is i + 8*(1-W). Hence with L=0 and W=1, need to set + i to 0x1 to get field 7 where i selects the field. */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf_hi (GEN_INT (0x01), tmp_df)); + } + DONE; +}) + ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) ;; rather than (set (subreg:SI (reg)) (fix:SI ...)) ;; because the first makes it clear that operand 0 is not live @@ -13603,6 +13718,42 @@ }) +;; The ISA 3.0 mffsl instruction is a lower latency instruction +;; for reading bits [29:31], [45:51] and [56:63] of the FPSCR. +(define_insn "rs6000_mffsl_hw" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT" + "mffsl %0") + +(define_expand "rs6000_mffsl" + [(set (match_operand:DF 0 "gpc_reg_operand") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT" +{ + /* If the low latency mffsl instruction (ISA 3.0) is available use it, + otherwise fall back to the older mffs instruction to emulate the mffsl + instruction. */ + + if (TARGET_P9_MISC) + emit_insn (gen_rs6000_mffsl_hw (operands[0])); + else + { + rtx tmp_di = gen_reg_rtx (DImode); + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The mffs instruction reads the entire FPSCR. Emulate the mffsl + instruction using the mffs instruction and masking off the the bits + the mmsl instruciton actually reads, . */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0x70007F0FFLL))); + + operands[0] = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + } + DONE; +}) + (define_insn "rs6000_mffs" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))] @@ -13616,6 +13767,13 @@ "TARGET_HARD_FLOAT" "mtfsf %0,%1") +(define_insn "rs6000_mtfsf_hi" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i") + (match_operand:DF 1 "gpc_reg_operand" "d")] + UNSPECV_MTFSF_HI)] + "TARGET_HARD_FLOAT" + "mtfsf %0,%1,0,1") + ;; Power8 fusion support for fusing an addis instruction with a D-form load of ;; a GPR. The addis instruction must be adjacent to the load, and use the same diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7b471ec..a6ff134 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15632,6 +15632,10 @@ uint64_t __builtin_ppc_get_timebase (); unsigned long __builtin_ppc_mftb (); __ibm128 __builtin_unpack_ibm128 (__ibm128, int); __ibm128 __builtin_pack_ibm128 (double, double); +double __builtin_mffs (void); +void __builtin_mtfsb0 (const int); +void __builtin_mtfsb1 (const int); +void __builtin_set_fpscr_rn (int); @end smallexample The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb} @@ -15640,7 +15644,19 @@ functions generate instructions to read the Time Base Register. The instructions and always returns the 64 bits of the Time Base Register. The @code{__builtin_ppc_mftb} function always generates one instruction and returns the Time Base Register value as an unsigned long, throwing away -the most significant word on 32-bit environments. +the most significant word on 32-bit environments. The @code{__builtin_mffs} +return the value of the FPSCR register. Note, ISA 3.0 supports the +@code{__builtin_mffsl()} which is a lower latency version of this builtin. The +@code{__builtin_mtfsb0} and @code{__builtin_mtfsb1} take the bit to change +as an argument. The valid bit range is between 0 and 31. The builtins map to +the @code{mtfsb0} and @code{mtfsb1} instructions which take the argument and +add 32. Hence these instructions only modify the FPSCR[32:63] bits by +changing the specified bit to a zero or one respectively. The +@code{__builtin_set_fpscr_rn} builtin allows changing both of the floating +point rounding mode bits. The argument is a 2-bit value. The argument can +either be a const int or stored in a variable. The builtin uses the ISA 3.0 +instruction @code{mffscrn} if available, otherwise it reads the FPSCR, masks +the current rounding mode bits out and OR's in the new value. @node Basic PowerPC Built-in Functions Available on ISA 2.05 @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.05 @@ -15676,6 +15692,7 @@ The following built-in functions are available when hardware decimal floating point (@option{-mhard-dfp}) is available: @smallexample +void __builtin_set_fpscr_drn(int); _Decimal64 __builtin_ddedpd (int, _Decimal64); _Decimal128 __builtin_ddedpdq (int, _Decimal128); _Decimal64 __builtin_denbcd (int, _Decimal64); @@ -15690,6 +15707,14 @@ long long __builtin_dxex (_Decimal64); long long __builtin_dxexq (_Decimal128); _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long); unsigned long long __builtin_unpack_dec128 (_Decimal128, int); + +The @code{__builtin_set_fpscr_drn} builtin allows changing the three decimal +floating point rounding mode bits. The argument is a 3-bit value. The +argument can either be a const int or the value can be stored in a variable. +The builtin uses the ISA 3.0 instruction @code{mffscdrn} if available. +Otherwise the builtin reads the FPSCR, masks the current decimal rounding +mode bits out and OR's in the new value. + @end smallexample The following functions require @option{-mhard-float}, @@ -15891,6 +15916,9 @@ int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value); int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value); + +double __builtin_mffsl(void); + @end smallexample The @code{__builtin_byte_in_set} function requires a 64-bit environment supporting ISA 3.0 or later. This function returns @@ -15942,6 +15970,12 @@ The @code{__builtin_dfp_dtstsfi_ov_dd} and require that the type of the @code{value} argument be @code{__Decimal64} and @code{__Decimal128} respectively. +The @code{__builtin_mffsl} uses the ISA 3.0 @code{mffsl} instruction to read +the FPSCR. The instruction is a lower latency version of the @code{mffs} +instruction. If the @code{mffsl} instruction is not available, then the +builtin uses the older @code{mffs} instruction to read the FPSCR. + + @node PowerPC AltiVec/VSX Built-in Functions @subsection PowerPC AltiVec/VSX Built-in Functions diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c new file mode 100644 index 0000000..13933c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c @@ -0,0 +1,116 @@ +/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +#define DRN_MASK 0x700000000LL /* DRN field mask */ + +void abort (void); + +int main () +{ + int i; + int val, bit; + double fpscr_val; + union blah { + double d; + unsigned long long ll; + } conv_val; + + unsigned long long ll_value; + register double f14; + + /* __builtin_set_fpscr_drn() builtin can take a const or a variable + value between 0 and 7 as the argument. + */ + + /* Test builtin decimal float rounding mode with const argument. */ + __builtin_set_fpscr_drn(7); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x700000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(2); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x200000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(5); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x500000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n"); +#else + abort(); +#endif + } + + /* Test builtin decimal float rounding mode with variable as argument. */ + val = 7; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 0; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c new file mode 100644 index 0000000..04e9f03 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +int main () +{ + + /* Test builin with out of range arguments. The builtin + __builtin_set_fpscr_drn() also support a variable as an argument but + can't test variable value at compile time. */ + + __builtin_set_fpscr_drn(-1); /* { dg-error "Argument must be a value between 0 and 7" } */ + __builtin_set_fpscr_drn(8); /* { dg-error "Argument must be a value between 0 and 7" } */ + +} + diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c new file mode 100644 index 0000000..2a15585 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c @@ -0,0 +1,190 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +#define RN_MASK 0x3LL /* RN field mask */ + +void abort (void); + +int main () +{ + int i; + int val, bit; + double fpscr_val; + union blah { + double d; + unsigned long long ll; + } conv_val; + + unsigned long long ll_value; + register double f14; + + /* __builtin_set_fpscr_rn() builtin can take a const or a variable + value between 0 and 3 as the argument. + __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant + 30 or 31. + */ + + /* Test reading the FPSCR register */ + __asm __volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffs()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffs(), conv_val.d); +#else + abort(); +#endif + } + + /* Test float rounding mode builtin with const value argument. */ + __builtin_set_fpscr_rn(3); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != 3) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n", + ll_value, 3); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n", + val, ll_value, val); +#else + abort(); +#endif + } + + /* Reset to 0 for testing */ + val = 0; + __builtin_set_fpscr_rn(val); + + __builtin_mtfsb1(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 1) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 2) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != (0x1LL << (31-0))) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + + /* Test builtin float rounding mode with variable as argument. */ + val = 0; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } + + val = 3; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c new file mode 100644 index 0000000..4835dce --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +int main () +{ + + /* Test builin with out of range arguments. Can only test for constant + int arguments. The builtins __builtin_set_fpscr_rn() also supports a + variable as an argument but can't test variable value at compile time. */ + + __builtin_mtfsb0(-1); /* { dg-error "Argument must be a constant between 0 and 31" } */ + __builtin_mtfsb0(32); /* { dg-error "Argument must be a constant between 0 and 31" } */ + + __builtin_mtfsb1(-1); /* { dg-error "Argument must be a constant between 0 and 31" } */ + __builtin_mtfsb1(32); /* { dg-error "Argument must be a constant between 0 and 31" } */ + + __builtin_set_fpscr_rn(-1); /* { dg-error "Argument must be a value between 0 and 3" } */ + __builtin_set_fpscr_rn(4); /* { dg-error "Argument must be a value between 0 and 3" } */ +} + diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c new file mode 100644 index 0000000..9a4d86b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int main () +{ + + register double f14; + union blah { + double d; + unsigned long long ll; + } conv_val; + + /* Test reading the FPSCR register. */ + __asm __volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffsl()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffsl(), conv_val.d); +#else + abort(); +#endif + } +}
Hi Carl, Sorry for the late review. On Mon, Sep 17, 2018 at 03:03:28PM -0700, Carl Love wrote: > * config/rs6000.c (rs6000_expand_mtfsb0_mtfsb1_builtin): Add. config/rs6000/rs6000.c > +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0", > + RS6000_BTM_ALWAYS, > + RS6000_BTC_MISC | RS6000_BTC_UNARY, > + CODE_FOR_rs6000_mtfsb0) I think you need RS6000_BTC_VOID on most of these calls? > static rtx > +rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp) The changelog has the function name wrong? > + pat = GEN_FCN (icode) (op0); > + if (! pat) No space after ! (or any other prefix operator that doesn't have letters in its name, i.e. casts, sizeof, etc.) > +static rtx > +rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp) > + /* If the argument is a constant, check the range. Argument can only be a > + 2-bit value. Unfortunately, can't check the range of the value at > + compile time if the argument is a variable. */ So what do we do for variable args? Mask off the bits? > +static rtx > +rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp) > + /* If the argument is a constant, check the range. Agrument can only be a > + 3-bit value. Unfortunately, can't check the range of the value at > + compile time if the argument is a variable. > + */ Don't put */ on a separate line please. > + if (GET_CODE (op0) == CONST_INT && !const_0_to_7_operand(op0, VOIDmode )) Stray space after VOIDmode. > +(define_insn "rs6000_mtfsb0" > + [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")] > + UNSPECV_MTFSB0)] > + "TARGET_HARD_FLOAT" > + "mtfsb0 %0" > + [(set_attr "type" "fp")]) Hrm... Does all of this work with -msoft-float? Does it not ICE at least? > +(define_expand "rs6000_set_fpscr_rn" > + [(match_operand:DI 0 "gpc_reg_operand")] (Two spaces.) You could handle immediate operands separately: you need only two mtfsbN instructions for that, which is smaller and faster than the "variable" sequence. Well, for non-P9 anyway. > +(define_expand "rs6000_mffsl" > + [(set (match_operand:DF 0 "gpc_reg_operand") > + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] > + "TARGET_HARD_FLOAT" > +{ > + /* If the low latency mffsl instruction (ISA 3.0) is available use it, > + otherwise fall back to the older mffs instruction to emulate the mffsl > + instruction. */ > + > + if (TARGET_P9_MISC) > + emit_insn (gen_rs6000_mffsl_hw (operands[0])); The indent is incorrect. But you could just not do anything if TARGET_P9_MISC: the RTL pattern above already is exactly what you need. So "if (!TARGET_P9_MISC)" and then that block with the DONE moved in there, and the TARGET_P9_MISC case can just fall through. > + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0x70007F0FFLL))); Write numbers in lower case please (for readability). 0x70007f0ffLL > +(define_insn "rs6000_mtfsf_hi" > + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i") const_int_operand is "n": an actual number, not e.g. a constant address (like "i" allows). It doesn't make a real difference in some cases, but it's best to get it right. > +the most significant word on 32-bit environments. The @code{__builtin_mffs} > +return the value of the FPSCR register. Note, ISA 3.0 supports the > +@code{__builtin_mffsl()} which is a lower latency version of this builtin. The mffsl does not return the whole fpscr, just the more useful (and cheaper to access!) fields: rn and drn, the exception enables, the non-sticky exception flags. > +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c > @@ -0,0 +1,116 @@ > +/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */ Why only linux? Okay for trunk with the nits fixed; the things that are more like extensions can happen later (if at all). Thanks! Segher
Segher: I have addressed the various formatting and other minor issues. I checked to see if the builtins worked with -msoft-float, which they didn't. I added checks for the -msoft-float into the functions: rs6000_expand_set_fpscr_rn_builtin(), rs6000_expand_set_fpscr_drn_builtin(), rs6000_expand_mtfsb_builtin() in file cc/config/rs6000/rs6000.c to exit if the option was set on the command line. Per your suggestion, I added code to use the mtfsb[0|1] instructions in define_expand rs6000_set_fpscr_rn if the argument is a constant. I verified that mtfsb[0|1] instructions are generated for constant arguments vi objdump. I have rerun the regression tests on powerpc64le-unknown-linux-gnu (Power 8 LE 64-bit mode only) powerpc64-unknown-linux-gnu (Power 8 BE 32-bit and 64-bit modes) powerpc64le-unknown-linux-gnu (Power 9 LE 64-bit mode only) With no regressions. Please let me know if the patch looks OK for trunk. Thanks for your help on this patch. Carl Love --------------------------------------------------------------- gcc/ChangeLog: 2018-09-27 Carl Love <cel@us.ibm.com> * config/rs6000/rs6000-builtin.def (__builtin_mffsl): New. (__builtin_mtfsb0): New. (__builtin_mtfsb1): New. ( __builtin_set_fpscr_rn): New. (__builtin_set_fpscr_drn): New. * config/rs6000/rs6000.c (rs6000_expand_mtfsb_builtin): Add. (rs6000_expand_set_fpscr_rn_builtin): Add. (rs6000_expand_set_fpscr_drn_builtin): Add. (rs6000_expand_builtin): Add case statement entries for RS6000_BUILTIN_MTFSB0, RS6000_BUILTIN_MTFSB1, RS6000_BUILTIN_SET_FPSCR_RN, RS6000_BUILTIN_SET_FPSCR_DRN, RS6000_BUILTIN_MFFSL. (rs6000_init_builtins): Add ftype initialization and def_builtin calls for __builtin_mffsl, __builtin_mtfsb0, __builtin_mtfsb1, __builtin_set_fpscr_rn, __builtin_set_fpscr_drn. * config/rs6000.md (rs6000_mtfsb0, rs6000_mtfsb1, rs6000_mffscrn, rs6000_mffscdrn): Add define_insn. (rs6000_set_fpscr_rn, rs6000_set_fpscr_drn): Add define_expand. * doc/extend.texi: Add documentation for the builtins. gcc/testsuite/ChangeLog: 2018-09-27 Carl Love <cel@us.ibm.com> * gcc.target/powerpc/test_mffsl-p9.c: New file. * gcc.target/powerpc/test_fpscr_rn_builtin.c: New file. * gcc.target/powerpc/test_fpscr_drn_builtin.c: New file. * gcc.target/powerpc/test_fpscr_rn_builtin_error.c: New file. * gcc.target/powerpc/test_fpscr_drn_builtin_error.c: New file. --- gcc/config/rs6000/rs6000-builtin.def | 24 +++ gcc/config/rs6000/rs6000.c | 168 ++++++++++++++++++ gcc/config/rs6000/rs6000.md | 176 ++++++++++++++++++- gcc/doc/extend.texi | 38 ++++- .../gcc.target/powerpc/test_fpscr_drn_builtin.c | 116 +++++++++++++ .../powerpc/test_fpscr_drn_builtin_error.c | 17 ++ .../gcc.target/powerpc/test_fpscr_rn_builtin.c | 190 +++++++++++++++++++++ .../powerpc/test_fpscr_rn_builtin_error.c | 22 +++ gcc/testsuite/gcc.target/powerpc/test_mffsl.c | 34 ++++ 9 files changed, 783 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c create mode 100644 gcc/testsuite/gcc.target/powerpc/test_mffsl.c diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index f799681..976c36b 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2486,11 +2486,35 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb", BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) +BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf", RS6000_BTM_ALWAYS, RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, CODE_FOR_rs6000_mtfsf) +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0, "__builtin_mtfsb0", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, + CODE_FOR_rs6000_mtfsb0) + +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1, "__builtin_mtfsb1", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, + CODE_FOR_rs6000_mtfsb1) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY| RS6000_BTC_VOID, + CODE_FOR_rs6000_set_fpscr_rn) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTM_64BIT | RS6000_BTC_UNARY + | RS6000_BTC_VOID, + CODE_FOR_rs6000_set_fpscr_drn) + BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2b736d7..3ab8920 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -13544,6 +13544,11 @@ rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target) /* Builtin not supported on this processor. */ return 0; + if (icode == CODE_FOR_rs6000_mffsl + && rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT) + fatal_error (input_location, + "__builtin_mffsl() not supported with -msoft-float"); + if (target == 0 || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) @@ -13592,6 +13597,128 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp) op1 = copy_to_mode_reg (mode1, op1); pat = GEN_FCN (icode) (op0, op1); + if (!pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + +static rtx +rs6000_expand_mtfsb_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT) + fatal_error (input_location, + "__builtin_mtfsb0 and __builtin_mtfsb1 not supported with -msoft-float"); + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* Only allow bit numbers 0 to 31. */ + if (!u5bit_cint_operand (op0, VOIDmode)) + { + error ("Argument must be a constant between 0 and 31."); + return const0_rtx; + } + + pat = GEN_FCN (icode) (op0); + if (!pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + +static rtx +rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT) + fatal_error (input_location, + "__builtin_set_fpscr_rn not supported with -msoft-float"); + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Argument can only be a + 2-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. The least significant two + bits of the argument, regardless of type, are used to set the rounding + mode. All other bits are ignored. */ + if (GET_CODE (op0) == CONST_INT && !const_0_to_3_operand(op0, VOIDmode)) + { + error ("Argument must be a value between 0 and 3."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (!pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} +static rtx +rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (TARGET_32BIT) + /* Builtin not supported in 32-bit mode. */ + fatal_error (input_location, + "__builtin_set_fpscr_drn is not supported in 32-bit mode."); + + if (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT) + fatal_error (input_location, + "__builtin_set_fpscr_drn not supported with -msoft-float"); + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Agrument can only be a + 3-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. The least significant two + bits of the argument, regardless of type, are used to set the rounding + mode. All other bits are ignored. */ + if (GET_CODE (op0) == CONST_INT && !const_0_to_7_operand(op0, VOIDmode)) + { + error ("Argument must be a value between 0 and 7."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); if (! pat) return const0_rtx; emit_insn (pat); @@ -16069,6 +16196,24 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case RS6000_BUILTIN_MFFS: return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target); + case RS6000_BUILTIN_MTFSB0: + return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp); + + case RS6000_BUILTIN_MTFSB1: + return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp); + + case RS6000_BUILTIN_SET_FPSCR_RN: + return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn, + exp); + + case RS6000_BUILTIN_SET_FPSCR_DRN: + return + rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn, + exp); + + case RS6000_BUILTIN_MFFSL: + return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target); + case RS6000_BUILTIN_MTFSF: return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp); @@ -16452,6 +16597,29 @@ rs6000_init_builtins (void) ftype = build_function_type_list (double_type_node, NULL_TREE); def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); + ftype = build_function_type_list (double_type_node, NULL_TREE); + def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN); + ftype = build_function_type_list (void_type_node, intSI_type_node, double_type_node, NULL_TREE); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 68ba5fd..0535075 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -164,7 +164,13 @@ UNSPECV_MFTB ; move from time base UNSPECV_NLGR ; non-local goto receiver UNSPECV_MFFS ; Move from FPSCR - UNSPECV_MTFSF ; Move to FPSCR Fields + UNSPECV_MFFSL ; Move from FPSCR light instruction version + UNSPECV_MFFSCRN ; Move from FPSCR float rounding mode + UNSPECV_MFFSCDRN ; Move from FPSCR decimal float rounding mode + UNSPECV_MTFSF ; Move to FPSCR Fields 8 to 15 + UNSPECV_MTFSF_HI ; Move to FPSCR Fields 0 to 7 + UNSPECV_MTFSB0 ; Set FPSCR Field bit to 0 + UNSPECV_MTFSB1 ; Set FPSCR Field bit to 1 UNSPECV_SPLIT_STACK_RETURN ; A camouflaged return UNSPECV_SPEC_BARRIER ; Speculation barrier ]) @@ -5824,6 +5830,130 @@ xscvdpuxds %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "rs6000_mtfsb0" + [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")] + UNSPECV_MTFSB0)] + "TARGET_HARD_FLOAT" + "mtfsb0 %0" + [(set_attr "type" "fp")]) + +(define_insn "rs6000_mtfsb1" + [(unspec_volatile [(match_operand:SI 0 "u5bit_cint_operand" "n")] + UNSPECV_MTFSB1)] + "TARGET_HARD_FLOAT" + "mtfsb1 %0" + [(set_attr "type" "fp")]) + +(define_insn "rs6000_mffscrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] + UNSPECV_MFFSCRN))] + "TARGET_P9_MISC" + "mffscrn %0,%1" + [(set_attr "type" "fp")]) + +(define_insn "rs6000_mffscdrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN)) + (use (match_operand:DF 1 "gpc_reg_operand" "d"))] + "TARGET_P9_MISC" + "mffscdrn %0,%1" + [(set_attr "type" "fp")]) + +(define_expand "rs6000_set_fpscr_rn" + [(match_operand 0 "reg_or_cint_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The floating point rounding control bits are FPSCR[62:63]. Put the + new rounding mode bits from operands[0][62:63] into FPSCR[62:63]. */ + if (TARGET_P9_MISC) + { + rtx src_df = gen_reg_rtx (DImode); + + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscrn (tmp_df, src_df)); + } + else + { + if (CONST_INT_P (operands[0])) + { + if ((INTVAL (operands[0]) & 0x1) == 0x1) + emit_insn (gen_rs6000_mtfsb1 (GEN_INT (31))); + else + emit_insn (gen_rs6000_mtfsb0 (GEN_INT (31))); + + if ((INTVAL (operands[0]) & 0x2) == 0x2) + emit_insn (gen_rs6000_mtfsb1 (GEN_INT (30))); + else + emit_insn (gen_rs6000_mtfsb0 (GEN_INT (30))); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new RN mode from operand. */ + emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field k=15. The fields are [0:15]. Hence with + L=0, W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W). FLM is an + 8-bit field[0:7]. Need to set the bit that corresponds to the + value of i that you want [0:7]. */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df)); + } + } + DONE; +}) + +(define_expand "rs6000_set_fpscr_drn" + [(match_operand:DI 0 "gpc_reg_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the + new rounding mode bits from operands[0][61:63] into FPSCR[29:31]. */ + if (TARGET_P9_MISC) + { + rtx src_df = gen_reg_rtx (DFmode); + + emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32))); + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df)); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new DRN mode from operand. */ + emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x7))); + emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFF8FFFFFFFF))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field 7. The fields are [0:15]. The equation to + select the field is i + 8*(1-W). Hence with L=0 and W=1, need to set + i to 0x1 to get field 7 where i selects the field. */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf_hi (GEN_INT (0x01), tmp_df)); + } + DONE; +}) + ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) ;; rather than (set (subreg:SI (reg)) (fix:SI ...)) ;; because the first makes it clear that operand 0 is not live @@ -13603,6 +13733,43 @@ }) +;; The ISA 3.0 mffsl instruction is a lower latency instruction +;; for reading bits [29:31], [45:51] and [56:63] of the FPSCR. +(define_insn "rs6000_mffsl_hw" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT" + "mffsl %0") + +(define_expand "rs6000_mffsl" + [(set (match_operand:DF 0 "gpc_reg_operand") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT" +{ + /* If the low latency mffsl instruction (ISA 3.0) is available use it, + otherwise fall back to the older mffs instruction to emulate the mffsl + instruction. */ + + if (!TARGET_P9_MISC) + { + rtx tmp_di = gen_reg_rtx (DImode); + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The mffs instruction reads the entire FPSCR. Emulate the mffsl + instruction using the mffs instruction and masking off the bits + the mmsl instruciton actually reads. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0x70007f0ffLL))); + + operands[0] = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + DONE; + } + + emit_insn (gen_rs6000_mffsl_hw (operands[0])); + DONE; +}) + (define_insn "rs6000_mffs" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))] @@ -13616,6 +13783,13 @@ "TARGET_HARD_FLOAT" "mtfsf %0,%1") +(define_insn "rs6000_mtfsf_hi" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n") + (match_operand:DF 1 "gpc_reg_operand" "d")] + UNSPECV_MTFSF_HI)] + "TARGET_HARD_FLOAT" + "mtfsf %0,%1,0,1") + ;; Power8 fusion support for fusing an addis instruction with a D-form load of ;; a GPR. The addis instruction must be adjacent to the load, and use the same diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7b471ec..817c899 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15632,6 +15632,10 @@ uint64_t __builtin_ppc_get_timebase (); unsigned long __builtin_ppc_mftb (); __ibm128 __builtin_unpack_ibm128 (__ibm128, int); __ibm128 __builtin_pack_ibm128 (double, double); +double __builtin_mffs (void); +void __builtin_mtfsb0 (const int); +void __builtin_mtfsb1 (const int); +void __builtin_set_fpscr_rn (int); @end smallexample The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb} @@ -15640,7 +15644,21 @@ functions generate instructions to read the Time Base Register. The instructions and always returns the 64 bits of the Time Base Register. The @code{__builtin_ppc_mftb} function always generates one instruction and returns the Time Base Register value as an unsigned long, throwing away -the most significant word on 32-bit environments. +the most significant word on 32-bit environments. The @code{__builtin_mffs} +return the value of the FPSCR register. Note, ISA 3.0 supports the +@code{__builtin_mffsl()} which permits software to read the control and +non-sticky status bits in the FSPCR without the higher latency associated with +accessing the sticky status bits. The +@code{__builtin_mtfsb0} and @code{__builtin_mtfsb1} take the bit to change +as an argument. The valid bit range is between 0 and 31. The builtins map to +the @code{mtfsb0} and @code{mtfsb1} instructions which take the argument and +add 32. Hence these instructions only modify the FPSCR[32:63] bits by +changing the specified bit to a zero or one respectively. The +@code{__builtin_set_fpscr_rn} builtin allows changing both of the floating +point rounding mode bits. The argument is a 2-bit value. The argument can +either be a const int or stored in a variable. The builtin uses the ISA 3.0 +instruction @code{mffscrn} if available, otherwise it reads the FPSCR, masks +the current rounding mode bits out and OR's in the new value. @node Basic PowerPC Built-in Functions Available on ISA 2.05 @subsubsection Basic PowerPC Built-in Functions Available on ISA 2.05 @@ -15676,6 +15694,7 @@ The following built-in functions are available when hardware decimal floating point (@option{-mhard-dfp}) is available: @smallexample +void __builtin_set_fpscr_drn(int); _Decimal64 __builtin_ddedpd (int, _Decimal64); _Decimal128 __builtin_ddedpdq (int, _Decimal128); _Decimal64 __builtin_denbcd (int, _Decimal64); @@ -15690,6 +15709,14 @@ long long __builtin_dxex (_Decimal64); long long __builtin_dxexq (_Decimal128); _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long); unsigned long long __builtin_unpack_dec128 (_Decimal128, int); + +The @code{__builtin_set_fpscr_drn} builtin allows changing the three decimal +floating point rounding mode bits. The argument is a 3-bit value. The +argument can either be a const int or the value can be stored in a variable. +The builtin uses the ISA 3.0 instruction @code{mffscdrn} if available. +Otherwise the builtin reads the FPSCR, masks the current decimal rounding +mode bits out and OR's in the new value. + @end smallexample The following functions require @option{-mhard-float}, @@ -15891,6 +15918,9 @@ int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value); int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value); + +double __builtin_mffsl(void); + @end smallexample The @code{__builtin_byte_in_set} function requires a 64-bit environment supporting ISA 3.0 or later. This function returns @@ -15942,6 +15972,12 @@ The @code{__builtin_dfp_dtstsfi_ov_dd} and require that the type of the @code{value} argument be @code{__Decimal64} and @code{__Decimal128} respectively. +The @code{__builtin_mffsl} uses the ISA 3.0 @code{mffsl} instruction to read +the FPSCR. The instruction is a lower latency version of the @code{mffs} +instruction. If the @code{mffsl} instruction is not available, then the +builtin uses the older @code{mffs} instruction to read the FPSCR. + + @node PowerPC AltiVec/VSX Built-in Functions @subsection PowerPC AltiVec/VSX Built-in Functions diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c new file mode 100644 index 0000000..0fb554a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c @@ -0,0 +1,116 @@ +/* { dg-do run { target { powerpc*-*-* && lp64 } } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +#define DRN_MASK 0x700000000LL /* DRN field mask */ + +void abort (void); + +int main () +{ + int i; + int val, bit; + double fpscr_val; + union blah { + double d; + unsigned long long ll; + } conv_val; + + unsigned long long ll_value; + register double f14; + + /* __builtin_set_fpscr_drn() builtin can take a const or a variable + value between 0 and 7 as the argument. + */ + + /* Test builtin decimal float rounding mode with const argument. */ + __builtin_set_fpscr_drn(7); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x700000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(2); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x200000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(5); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x500000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n"); +#else + abort(); +#endif + } + + /* Test builtin decimal float rounding mode with variable as argument. */ + val = 7; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 0; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c new file mode 100644 index 0000000..04e9f03 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin_error.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +int main () +{ + + /* Test builin with out of range arguments. The builtin + __builtin_set_fpscr_drn() also support a variable as an argument but + can't test variable value at compile time. */ + + __builtin_set_fpscr_drn(-1); /* { dg-error "Argument must be a value between 0 and 7" } */ + __builtin_set_fpscr_drn(8); /* { dg-error "Argument must be a value between 0 and 7" } */ + +} + diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c new file mode 100644 index 0000000..2a15585 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin.c @@ -0,0 +1,190 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +#define RN_MASK 0x3LL /* RN field mask */ + +void abort (void); + +int main () +{ + int i; + int val, bit; + double fpscr_val; + union blah { + double d; + unsigned long long ll; + } conv_val; + + unsigned long long ll_value; + register double f14; + + /* __builtin_set_fpscr_rn() builtin can take a const or a variable + value between 0 and 3 as the argument. + __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant + 30 or 31. + */ + + /* Test reading the FPSCR register */ + __asm __volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffs()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffs(), conv_val.d); +#else + abort(); +#endif + } + + /* Test float rounding mode builtin with const value argument. */ + __builtin_set_fpscr_rn(3); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != 3) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n", + ll_value, 3); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n", + val, ll_value, val); +#else + abort(); +#endif + } + + /* Reset to 0 for testing */ + val = 0; + __builtin_set_fpscr_rn(val); + + __builtin_mtfsb1(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 1) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 2) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != (0x1LL << (31-0))) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + + /* Test builtin float rounding mode with variable as argument. */ + val = 0; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } + + val = 3; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c new file mode 100644 index 0000000..4835dce --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_rn_builtin_error.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +int main () +{ + + /* Test builin with out of range arguments. Can only test for constant + int arguments. The builtins __builtin_set_fpscr_rn() also supports a + variable as an argument but can't test variable value at compile time. */ + + __builtin_mtfsb0(-1); /* { dg-error "Argument must be a constant between 0 and 31" } */ + __builtin_mtfsb0(32); /* { dg-error "Argument must be a constant between 0 and 31" } */ + + __builtin_mtfsb1(-1); /* { dg-error "Argument must be a constant between 0 and 31" } */ + __builtin_mtfsb1(32); /* { dg-error "Argument must be a constant between 0 and 31" } */ + + __builtin_set_fpscr_rn(-1); /* { dg-error "Argument must be a value between 0 and 3" } */ + __builtin_set_fpscr_rn(4); /* { dg-error "Argument must be a value between 0 and 3" } */ +} + diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c new file mode 100644 index 0000000..9a4d86b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { powerpc*-*-* } } } */ +/* { dg-options "-std=c99" } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int main () +{ + + register double f14; + union blah { + double d; + unsigned long long ll; + } conv_val; + + /* Test reading the FPSCR register. */ + __asm __volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffsl()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffsl(), conv_val.d); +#else + abort(); +#endif + } +}
Hi! On Thu, Sep 27, 2018 at 04:17:57PM -0700, Carl Love wrote: > + if (icode == CODE_FOR_rs6000_mffsl > + && rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT) > + fatal_error (input_location, > + "__builtin_mffsl() not supported with -msoft-float"); Please use plain "error ()" instead. To keep whatever else here from wreaking havoc, also immediately after the error() do "return const0_rtx"? (Same for all other fatal_error, of course. fatal_error is for when the compiler needs to go down ungracefully, _now_. It is nicer to still try to continue for a little while). > + /* If the argument is a constant, check the range. Argument can only be a > + 2-bit value. Unfortunately, can't check the range of the value at > + compile time if the argument is a variable. The least significant two > + bits of the argument, regardless of type, are used to set the rounding > + mode. All other bits are ignored. */ > + if (GET_CODE (op0) == CONST_INT && !const_0_to_3_operand(op0, VOIDmode)) > + { > + error ("Argument must be a value between 0 and 3."); > + return const0_rtx; > + } These are indented a char too many. > + if (TARGET_P9_MISC) > + { > + rtx src_df = gen_reg_rtx (DImode); > + > + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); > + emit_insn (gen_rs6000_mffscrn (tmp_df, src_df)); > + } > + else This is easier if you write it like: if (...) { emit this; emit that; DONE; } if (...) { emit this; emit that; DONE; } etc. With that style, code that is semantically at the same level has the same indent, instead of wandering further and further to the right. > + { > + rtx tmp_rn = gen_reg_rtx (DImode); > + rtx tmp_di = gen_reg_rtx (DImode); > + > + /* Extract new RN mode from operand. */ > + emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3))); > + > + /* Insert new RN mode into FSCPR. */ > + emit_insn (gen_rs6000_mffs (tmp_df)); > + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); > + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC))); This loses bits 0..31 (the top half of the register). Maybe use GEN_INT (-4) ? > + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); > + > + /* Need to write to field k=15. The fields are [0:15]. Hence with > + L=0, W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W). FLM is an > + 8-bit field[0:7]. Need to set the bit that corresponds to the > + value of i that you want [0:7]. */ > + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); > + emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df)); > + } :-) > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_drn_builtin.c > @@ -0,0 +1,116 @@ > +/* { dg-do run { target { powerpc*-*-* && lp64 } } } */ > +/* { dg-options "-std=c99" } */ You need to require a system that implements the DRN bits... I think you'll need the "dfp_hw" selector. (That's power6 and later, may not be so easy to test this ;-) ) > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl.c > @@ -0,0 +1,34 @@ > +/* { dg-do run { target { powerpc*-*-* } } } */ > +/* { dg-options "-std=c99" } */ Maybe you should do the run tests with -O2? Maybe compile tests, too, come to think of it. With those details fixed, okay for trunk. Thanks! Segher
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index f79968154..a50236e77 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2486,11 +2486,34 @@ BU_SPECIAL_X (RS6000_BUILTIN_MFTB, "__builtin_ppc_mftb", BU_SPECIAL_X (RS6000_BUILTIN_MFFS, "__builtin_mffs", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) +BU_SPECIAL_X (RS6000_BUILTIN_MFFSL, "__builtin_mffsl", + RS6000_BTM_ALWAYS, RS6000_BTC_MISC) + RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSF, "__builtin_mtfsf", RS6000_BTM_ALWAYS, RS6000_BTC_MISC | RS6000_BTC_UNARY | RS6000_BTC_VOID, CODE_FOR_rs6000_mtfsf) +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB0_SI, "__builtin_mtfsb0", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_mtfsb0_si) + +RS6000_BUILTIN_X (RS6000_BUILTIN_MTFSB1_SI, "__builtin_mtfsb1", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_mtfsb1_si) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_RN, "__builtin_set_fpscr_rn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_set_fpscr_rn) + +RS6000_BUILTIN_X (RS6000_BUILTIN_SET_FPSCR_DRN, "__builtin_set_fpscr_drn", + RS6000_BTM_ALWAYS, + RS6000_BTC_MISC | RS6000_BTC_UNARY, + CODE_FOR_rs6000_set_fpscr_drn) + BU_SPECIAL_X (RS6000_BUILTIN_CPU_INIT, "__builtin_cpu_init", RS6000_BTM_ALWAYS, RS6000_BTC_MISC) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index aa707b255..7db9c10a9 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -13356,6 +13356,113 @@ rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp) return NULL_RTX; } +static rtx +rs6000_expand_mtfsb0_mtfsb1_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* Only allow bit numbers 0 to 31. */ + if (GET_CODE (op0) != CONST_INT || INTVAL (op0) < 0 || INTVAL (op0) > 31) + { + error ("Argument must be a constant between 0 and 31."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + +static rtx +rs6000_expand_set_fpscr_rn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Agrument can only be a + 2-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. + */ + if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 3)) + { + error ("Argument must be a value between 0 and 3."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} +static rtx +rs6000_expand_set_fpscr_drn_builtin (enum insn_code icode, tree exp) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + rtx op0 = expand_normal (arg0); + machine_mode mode0 = insn_data[icode].operand[0].mode; + + if (icode == CODE_FOR_nothing) + /* Builtin not supported on this processor. */ + return 0; + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg0 == error_mark_node) + return const0_rtx; + + /* If the argument is a constant, check the range. Agrument can only be a + 3-bit value. Unfortunately, can't check the range of the value at + compile time if the argument is a variable. + */ + if (GET_CODE (op0) == CONST_INT && (INTVAL (op0) < 0 || INTVAL (op0) > 7)) + { + error ("Argument must be a value between 0 and 7."); + return const0_rtx; + } + + if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + pat = GEN_FCN (icode) (op0); + if (! pat) + return const0_rtx; + emit_insn (pat); + + return NULL_RTX; +} + static rtx rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) { @@ -15987,6 +16094,26 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case RS6000_BUILTIN_MFFS: return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target); + case RS6000_BUILTIN_MTFSB0_SI: + return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb0_si, + exp); + + case RS6000_BUILTIN_MTFSB1_SI: + return rs6000_expand_mtfsb0_mtfsb1_builtin (CODE_FOR_rs6000_mtfsb1_si, + exp); + + case RS6000_BUILTIN_SET_FPSCR_RN: + return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn, + exp); + + case RS6000_BUILTIN_SET_FPSCR_DRN: + return + rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn, + exp); + + case RS6000_BUILTIN_MFFSL: + return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target); + case RS6000_BUILTIN_MTFSF: return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp); @@ -16370,6 +16497,30 @@ rs6000_init_builtins (void) ftype = build_function_type_list (double_type_node, NULL_TREE); def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); + ftype = build_function_type_list (double_type_node, NULL_TREE); + def_builtin ("__builtin_mffsl", ftype, RS6000_BUILTIN_MFFSL); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + + def_builtin ("__builtin_mtfsb0", ftype, RS6000_BUILTIN_MTFSB0_SI); + + ftype = build_function_type_list (void_type_node, + intSI_type_node, + NULL_TREE); + def_builtin ("__builtin_mtfsb1", ftype, RS6000_BUILTIN_MTFSB1_SI); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_rn", ftype, RS6000_BUILTIN_SET_FPSCR_RN); + + ftype = build_function_type_list (void_type_node, + intDI_type_node, + NULL_TREE); + def_builtin ("__builtin_set_fpscr_drn", ftype, RS6000_BUILTIN_SET_FPSCR_DRN); + ftype = build_function_type_list (void_type_node, intSI_type_node, double_type_node, NULL_TREE); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index d70b01b8c..7714aacd8 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -163,7 +163,13 @@ UNSPECV_MFTB ; move from time base UNSPECV_NLGR ; non-local goto receiver UNSPECV_MFFS ; Move from FPSCR - UNSPECV_MTFSF ; Move to FPSCR Fields + UNSPECV_MFFSL ; Move from FPSCR light instruction version + UNSPECV_MFFSCRN ; Move from FPSCR float rounding mode + UNSPECV_MFFSCDRN ; Move from FPSCR decimal float rounding mode + UNSPECV_MTFSF ; Move to FPSCR Fields 8 to 15 + UNSPECV_MTFSF_L0W1 ; Move to FPSCR Fields 0 to 7 + UNSPECV_MTFSFB0 ; Set FPSCR Field bit to 0 + UNSPECV_MTFSFB1 ; Set FPSCR Field bit to 1 UNSPECV_SPLIT_STACK_RETURN ; A camouflaged return UNSPECV_SPEC_BARRIER ; Speculation barrier ]) @@ -5823,6 +5829,115 @@ xscvdpuxds %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "rs6000_mtfsb0_si" + [(use (match_operand:SI 0 "short_cint_operand" "n")) + (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB0)] + "TARGET_HARD_FLOAT" + "mtfsb0 %0") + +(define_insn "rs6000_mtfsb1_si" + [(use (match_operand:SI 0 "short_cint_operand" "n")) + (unspec_volatile:SI [(const_int 0)] UNSPECV_MTFSFB1)] + "TARGET_HARD_FLOAT" + "mtfsb1 %0") + +(define_insn "rs6000_mffscrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCRN)) + (use (match_operand:DF 1 "gpc_reg_operand" "d"))] + "TARGET_HARD_FLOAT" + "mffscrn %0,%1") + +(define_insn "rs6000_mffscdrn" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN)) + (use (match_operand:DF 1 "gpc_reg_operand" "d"))] + "TARGET_HARD_FLOAT" + "mffscdrn %0,%1") + +(define_expand "rs6000_set_fpscr_rn" + [(match_operand:DI 0 "gpc_reg_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The floating point rounding control bits are FPSCR[62:63]. Put the + new rounding mode bits from operands[0][62:63] into FPSCR[62:63]. */ + if (TARGET_P9_VECTOR) + { + rtx src_df = gen_reg_rtx (DImode); + + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscrn (tmp_df, src_df)); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new RN mode from operand. */ + emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x3))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFFFFFFC))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field k=15. The fields are [0:15]. Hence with L=0, + W=0, FLM_i must be equal to 8, 16 = i + 8*(1-W). FLM is an 8 bit + field[0:7]. Need to set the bit that corresponds to the value of i + that you want [0:7]. + */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf (GEN_INT (0x01), tmp_df)); + } + DONE; +}) + +(define_expand "rs6000_set_fpscr_drn" + [(match_operand:DI 0 "gpc_reg_operand")] + "TARGET_HARD_FLOAT" +{ + rtx tmp_df = gen_reg_rtx (DFmode); + + /* The decimal floating point rounding control bits are FPSCR[29:31]. Put the + new rounding mode bits from operands[0][61:63] into FPSCR[29:31]. */ + + if (TARGET_P9_VECTOR) + { + rtx src_df = gen_reg_rtx (DFmode); + + emit_insn (gen_ashldi3 (operands[0], operands[0], GEN_INT (32))); + src_df = simplify_gen_subreg (DFmode, operands[0], DImode, 0); + emit_insn (gen_rs6000_mffscdrn (tmp_df, src_df)); + } + else + { + rtx tmp_rn = gen_reg_rtx (DImode); + rtx tmp_di = gen_reg_rtx (DImode); + + /* Extract new DRN mode from operand. */ + emit_insn (gen_anddi3_mask (tmp_rn, operands[0], GEN_INT (0x7))); + emit_insn (gen_ashldi3 (tmp_rn, tmp_rn, GEN_INT (32))); + + /* Insert new RN mode into FSCPR. */ + emit_insn (gen_rs6000_mffs (tmp_df)); + tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); + emit_insn (gen_anddi3_mask (tmp_di, tmp_di, GEN_INT (0xFFF8FFFFFFFF))); + emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); + + /* Need to write to field k=7. The fields are [0:15]. Hence with L=0, + W=1, FLM_i must be equal to 7, 16 = i + 8*(1-W). FLM is an 8 bit + field[0:7]. Need to set the bit that corresponds to the value of i + that you want [0:7]. + */ + tmp_df = simplify_gen_subreg (DFmode, tmp_di, DImode, 0); + emit_insn (gen_rs6000_mtfsf_L0W1 (GEN_INT (0x01), tmp_df)); + } + DONE; +}) + ;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ)) ;; rather than (set (subreg:SI (reg)) (fix:SI ...)) ;; because the first makes it clear that operand 0 is not live @@ -13602,6 +13717,31 @@ }) +;; The ISA 3.0 mffsl instruction is a lower latency instruction +;; for reading the FPSCR +(define_insn "rs6000_mffsl0" + [(set (match_operand:DF 0 "gpc_reg_operand" "=d") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT && TARGET_P9_MISC" + "mffsl %0") + +(define_expand "rs6000_mffsl" + [(set (match_operand:DF 0 "gpc_reg_operand") + (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSL))] + "TARGET_HARD_FLOAT && TARGET_P9_MISC" +{ + /* If the low latency mffsl instruction (ISA 3.0) is available use it, + otherwise fall back to the older mffs instruction which does the same + thing but with a little more latency. */ + + if (TARGET_P9_VECTOR) + emit_insn (gen_rs6000_mffsl0 (operands[0])); + else + emit_insn (gen_rs6000_mffs (operands[0])); + + DONE; +}) + (define_insn "rs6000_mffs" [(set (match_operand:DF 0 "gpc_reg_operand" "=d") (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFS))] @@ -13615,6 +13755,13 @@ "TARGET_HARD_FLOAT" "mtfsf %0,%1") +(define_insn "rs6000_mtfsf_L0W1" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "i") + (match_operand:DF 1 "gpc_reg_operand" "d")] + UNSPECV_MTFSF_L0W1)] + "TARGET_HARD_FLOAT" + "mtfsf %0,%1,0,1") + ;; Power8 fusion support for fusing an addis instruction with a D-form load of ;; a GPR. The addis instruction must be adjacent to the load, and use the same diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 130f6a671..7c6279ec2 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15745,6 +15745,16 @@ uint64_t __builtin_ppc_get_timebase (); unsigned long __builtin_ppc_mftb (); __ibm128 __builtin_unpack_ibm128 (__ibm128, int); __ibm128 __builtin_pack_ibm128 (double, double); +double __builtin_mffs(void); /* Return value of the FPSCR register. + Note, ISA 3.0 supports __builtin_mffsl() + which is a lower latency version of this + builtin. */ +void __builtin_mtfsb0(const int); /* Argument can be 0 to 31. Sets + the specified bit in the FPSCR to 0. */ +void __builtin_mtfsb1(const int); /* Argument can be 0 to 31. Sets + the specified bit in the FPSCR to 1. */ +void __builtin_set_fpscr_rn(int); /* The argument is a the 2-bit value to set + the Floating point rounding mode to. */ @end smallexample The @code{__builtin_ppc_get_timebase} and @code{__builtin_ppc_mftb} @@ -15803,6 +15813,10 @@ long long __builtin_dxex (_Decimal64); long long __builtin_dxexq (_Decimal128); _Decimal128 __builtin_pack_dec128 (unsigned long long, unsigned long long); unsigned long long __builtin_unpack_dec128 (_Decimal128, int); +void __builtin_set_fpscr_drn(int); /* The argument is a the 3-bit value to + set the Decimal Floating point rounding + mode to. */ + @end smallexample The following functions require @option{-mhard-float}, @@ -16004,6 +16018,12 @@ int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov (unsigned int comparison, _Decimal128 value); int __builtin_dfp_dtstsfi_ov_dd (unsigned int comparison, _Decimal64 value); int __builtin_dfp_dtstsfi_ov_td (unsigned int comparison, _Decimal128 value); + +double __builtin_mffsl(void); /* Return value of the FPSCR register. Uses + lower latency version of the mffs + instruction. Defaults to mffs for pre + ISA 3.0. */ + @end smallexample The @code{__builtin_byte_in_set} function requires a 64-bit environment supporting ISA 3.0 or later. This function returns diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c new file mode 100644 index 000000000..4f77078c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins.c @@ -0,0 +1,282 @@ +/* { dg-do run { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-pedantic" } */ + +#include <altivec.h> + +#define DEBUG 1 + +#ifdef DEBUG +#include <stdio.h> +#endif + +#define RN_MASK 0x3LL /* RN field mask */ +#define DRN_MASK 0x700000000LL /* DRN field mask */ + +void abort (void); + +int main () +{ + int i; + int val, bit; + double fpscr_val; + union blah { + double d; + unsigned long long ll; + } conv_val; + + unsigned long long ll_value; + register double f14; + + /* __builtin_set_fpscr_rn() builtin can take a const or a variable + value between 0 and 3 as the argument. + __builtin_set_fpscr_drn() builtin can take a const or a variable + value between 0 and 7 as the argument. + __builtin_mtfsb0 and __builtin_mtfsb1 argument must be a constant + 30 or 31. + */ + + /* Test reading the FPSCR register */ + asm volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffs()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffs() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffs(), conv_val.d); +#else + abort(); +#endif + } + + /* Test float rounding mode builtin with const value argument. */ + __builtin_set_fpscr_rn(3); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != 3) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(3) returned 0x%llx, not the expecected value 0x%x\n", + ll_value, 3); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) returned 0x%llx, not the expecected value 0x%x\n", + val, ll_value, val); +#else + abort(); +#endif + } + + /* Reset to 0 for testing */ + val = 0; + __builtin_set_fpscr_rn(val); + + __builtin_mtfsb1(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 1) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(31); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x1LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 2) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(30); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & 0x2LL; + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(31) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb1(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != (0x1LL << (31-0))) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb1(0) did not set the bit to a 1.\n"); +#else + abort(); +#endif + } + + __builtin_mtfsb0(0); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & (0x1LL << (31-0)); + + if (ll_value != 0) + { +#ifdef DEBUG + printf("ERROR, __builtin_mtfsb0(0) did not set the bit to a 0.\n"); +#else + abort(); +#endif + } + + + /* Test builtin float rounding mode with variable as argument. */ + val = 0; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } + + val = 3; + __builtin_set_fpscr_rn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & RN_MASK; + + if (ll_value != val) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_rn(val=%d) did not set rounding mode to %x.\n", + val, val); +#else + abort(); +#endif + } + + /* Test builtin decimal float rounding mode with const argument. */ + __builtin_set_fpscr_drn(7); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x700000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(7) did not set rounding mode to 7.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(2); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x200000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(2) did not set rounding mode to 2.\n"); +#else + abort(); +#endif + } + + __builtin_set_fpscr_drn(5); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != 0x500000000) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(5) did not set rounding mode to 5.\n"); +#else + abort(); +#endif + } + + /* Test builtin decimal float rounding mode with variable as argument. */ + val = 7; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 0; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } + + val = 2; + __builtin_set_fpscr_drn(val); + conv_val.d = __builtin_mffs(); + ll_value = conv_val.ll & DRN_MASK; + + if (ll_value != ((unsigned long long)val << 32)) + { +#ifdef DEBUG + printf("ERROR, __builtin_set_fpscr_drn(val=%d) did not set rounding mode to %d.\n", + val, val); +#else + abort(); +#endif + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c new file mode 100644 index 000000000..10de0be44 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_fpscr_builtins_error.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target powerpc*-*-* } } */ + +#include <altivec.h> + +int main () +{ + + /* Test builin with out of range arguments. Can only test for constant + int arguments. The builtins __builtin_set_fpscr_rn(), + __builtin_set_fpscr_drn() also support a variable as an argument but + can't test variable value at compile time. */ + + __builtin_mtfsb0(-1); /* { dg-error "Argument must be a constant between 0 and 31." } */ + __builtin_mtfsb0(32); /* { dg-error "Argument must be a constant between 0 and 31." } */ + + __builtin_mtfsb1(-1); /* { dg-error "Argument must be a constant between 0 and 31." } */ + __builtin_mtfsb1(32); /* { dg-error "Argument must be a constant between 0 and 31." } */ + + __builtin_set_fpscr_rn(-1); /* { dg-error "Argument must be a value between 0 and 3." } */ + __builtin_set_fpscr_rn(4); /* { dg-error "Argument must be a value between 0 and 3." } */ + + __builtin_set_fpscr_drn(-1); /* { dg-error "Argument must be a value between 0 and 7." } */ + __builtin_set_fpscr_drn(8); /* { dg-error "Argument must be a value between 0 and 7." } */ + +} + diff --git a/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c new file mode 100644 index 000000000..dc4f863ca --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/test_mffsl-p9.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-pedantic -mcpu=power9" } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ + +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int main () +{ + + register double f14; + union blah { + double d; + unsigned long long ll; + } conv_val; + + /* Test reading the FPSCR register. */ + asm volatile ("mffs %0" : "=f"(f14)); + conv_val.d = f14; + + if (conv_val.d != __builtin_mffsl()) + { +#ifdef DEBUG + printf("ERROR, __builtin_mffsl() returned 0x%llx, not the expecected value 0x%llx\n", + __builtin_mffsl(), conv_val.d); +#else + abort(); +#endif + } +}