Message ID | mptfsj61qnl.fsf@arm.com |
---|---|
State | New |
Headers | show |
Series | Add internal functions for iround etc. [PR106253] | expand |
On Tue, Jul 12, 2022 at 2:07 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > The PR is about the aarch64 port using an ACLE built-in function > to vectorise a scalar function call, even though the ECF_* flags for > the ACLE function didn't match the ECF_* flags for the scalar call. > > To some extent that kind of difference is inevitable, since the > ACLE intrinsics are supposed to follow the behaviour of the > underlying instruction as closely as possible. Also, using > target-specific builtins has the drawback of limiting further > gimple optimisation, since the gimple optimisers won't know what > the function does. > > We handle several other maths functions, including round, floor > and ceil, by defining directly-mapped internal functions that > are linked to the associated built-in functions. This has two > main advantages: > > - it means that, internally, we are not restricted to the set of > scalar types that happen to have associated C/C++ functions > > - the functions (and thus the underlying optabs) extend naturally > to vectors > > This patch takes the same approach for the remaining functions > handled by aarch64_builtin_vectorized_function. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? OK. Thanks, Richard. > Richard > > > gcc/ > PR target/106253 > * predict.h (insn_optimization_type): Declare. > * predict.cc (insn_optimization_type): New function. > * internal-fn.def (IFN_ICEIL, IFN_IFLOOR, IFN_IRINT, IFN_IROUND) > (IFN_LCEIL, IFN_LFLOOR, IFN_LRINT, IFN_LROUND, IFN_LLCEIL) > (IFN_LLFLOOR, IFN_LLRINT, IFN_LLROUND): New internal functions. > * internal-fn.cc (unary_convert_direct): New macro. > (expand_convert_optab_fn): New function. > (expand_unary_convert_optab_fn): New macro. > (direct_unary_convert_optab_supported_p): Likewise. > * optabs.cc (expand_sfix_optab): Pass insn_optimization_type to > convert_optab_handler. > * config/aarch64/aarch64-protos.h > (aarch64_builtin_vectorized_function): Delete. > * config/aarch64/aarch64-builtins.cc > (aarch64_builtin_vectorized_function): Delete. > * config/aarch64/aarch64.cc > (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Delete. > * config/i386/i386.cc (ix86_optab_supported_p): Handle lround_optab. > * config/i386/i386.md (lround<X87MODEF:mode><SWI248x:mode>2): Remove > optimize_insn_for_size_p test. > > gcc/testsuite/ > PR target/106253 > * gcc.target/aarch64/vect_unary_1.c: Add tests for iroundf, > llround, iceilf, llceil, ifloorf, llfloor, irintf and llrint. > * gfortran.dg/vect/pr106253.f: New test. > --- > gcc/config/aarch64/aarch64-builtins.cc | 83 ------------------- > gcc/config/aarch64/aarch64-protos.h | 1 - > gcc/config/aarch64/aarch64.cc | 4 - > gcc/config/i386/i386.cc | 1 + > gcc/config/i386/i386.md | 3 - > gcc/internal-fn.cc | 20 +++++ > gcc/internal-fn.def | 23 +++++ > gcc/optabs.cc | 3 +- > gcc/predict.cc | 11 +++ > gcc/predict.h | 1 + > .../gcc.target/aarch64/vect_unary_1.c | 65 ++++++++++++++- > gcc/testsuite/gfortran.dg/vect/pr106253.f | 35 ++++++++ > 12 files changed, 157 insertions(+), 93 deletions(-) > create mode 100644 gcc/testsuite/gfortran.dg/vect/pr106253.f > > diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc > index a486321e10f..adfddb8b215 100644 > --- a/gcc/config/aarch64/aarch64-builtins.cc > +++ b/gcc/config/aarch64/aarch64-builtins.cc > @@ -2555,89 +2555,6 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, > gcc_unreachable (); > } > > -tree > -aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, > - tree type_in) > -{ > - machine_mode in_mode, out_mode; > - > - if (TREE_CODE (type_out) != VECTOR_TYPE > - || TREE_CODE (type_in) != VECTOR_TYPE) > - return NULL_TREE; > - > - out_mode = TYPE_MODE (type_out); > - in_mode = TYPE_MODE (type_in); > - > -#undef AARCH64_CHECK_BUILTIN_MODE > -#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 > -#define AARCH64_FIND_FRINT_VARIANT(N) \ > - (AARCH64_CHECK_BUILTIN_MODE (2, D) \ > - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ > - : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ > - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ > - : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ > - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ > - : NULL_TREE))) > - switch (fn) > - { > -#undef AARCH64_CHECK_BUILTIN_MODE > -#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ > - (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) > - CASE_CFN_IFLOOR: > - CASE_CFN_LFLOOR: > - CASE_CFN_LLFLOOR: > - { > - enum aarch64_builtins builtin; > - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; > - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; > - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; > - else > - return NULL_TREE; > - > - return aarch64_builtin_decls[builtin]; > - } > - CASE_CFN_ICEIL: > - CASE_CFN_LCEIL: > - CASE_CFN_LLCEIL: > - { > - enum aarch64_builtins builtin; > - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; > - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; > - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; > - else > - return NULL_TREE; > - > - return aarch64_builtin_decls[builtin]; > - } > - CASE_CFN_IROUND: > - CASE_CFN_LROUND: > - CASE_CFN_LLROUND: > - { > - enum aarch64_builtins builtin; > - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; > - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; > - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) > - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; > - else > - return NULL_TREE; > - > - return aarch64_builtin_decls[builtin]; > - } > - default: > - return NULL_TREE; > - } > - > - return NULL_TREE; > -} > - > /* Return builtin for reciprocal square root. */ > > tree > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index dabd047d7ba..19c9d3cb179 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -986,7 +986,6 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *, > rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); > tree aarch64_general_builtin_decl (unsigned, bool); > tree aarch64_general_builtin_rsqrt (unsigned int); > -tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); > void handle_arm_acle_h (void); > void handle_arm_neon_h (void); > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index d049f9a9819..25f4cbb466d 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -27584,10 +27584,6 @@ aarch64_libgcc_floating_mode_supported_p > #undef TARGET_VECTORIZE_BUILTINS > #define TARGET_VECTORIZE_BUILTINS > > -#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION > -#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ > - aarch64_builtin_vectorized_function > - > #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES > #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ > aarch64_autovectorize_vector_modes > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 95cb1e2ce70..3a3c7299eb4 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -24004,6 +24004,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, > case ldexp_optab: > case scalb_optab: > case round_optab: > + case lround_optab: > return opt_type == OPTIMIZE_FOR_SPEED; > > case rint_optab: > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 3b02d0cd567..bf29f444382 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -19926,9 +19926,6 @@ (define_expand "lround<X87MODEF:mode><SWI248x:mode>2" > && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) > && !flag_trapping_math && !flag_rounding_math)" > { > - if (optimize_insn_for_size_p ()) > - FAIL; > - > if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH > && <SWI248x:MODE>mode != HImode > && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc > index d666ccccf67..28973d957fb 100644 > --- a/gcc/internal-fn.cc > +++ b/gcc/internal-fn.cc > @@ -120,6 +120,7 @@ init_internal_fns () > #define len_store_direct { 3, 3, false } > #define vec_set_direct { 3, 3, false } > #define unary_direct { 0, 0, true } > +#define unary_convert_direct { -1, 0, true } > #define binary_direct { 0, 0, true } > #define ternary_direct { 0, 0, true } > #define cond_unary_direct { 1, 1, true } > @@ -3679,6 +3680,19 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) > emit_move_insn (lhs_rtx, ops[0].value); > } > > +/* Expand a call to a convert-like optab using the operands in STMT. > + FN has a single output operand and NARGS input operands. */ > + > +static void > +expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, > + unsigned int nargs) > +{ > + tree_pair types = direct_internal_fn_types (fn, stmt); > + insn_code icode = convert_optab_handler (optab, TYPE_MODE (types.first), > + TYPE_MODE (types.second)); > + expand_fn_using_insn (stmt, icode, 1, nargs); > +} > + > /* Expanders for optabs that can use expand_direct_optab_fn. */ > > #define expand_unary_optab_fn(FN, STMT, OPTAB) \ > @@ -3711,6 +3725,11 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) > #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \ > expand_direct_optab_fn (FN, STMT, OPTAB, 4) > > +/* Expanders for optabs that can use expand_convert_optab_fn. */ > + > +#define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \ > + expand_convert_optab_fn (FN, STMT, OPTAB, 1) > + > /* RETURN_TYPE and ARGS are a return type and argument list that are > in principle compatible with FN (which satisfies direct_internal_fn_p). > Return the types that should be used to determine whether the > @@ -3783,6 +3802,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, > } > > #define direct_unary_optab_supported_p direct_optab_supported_p > +#define direct_unary_convert_optab_supported_p convert_optab_supported_p > #define direct_binary_optab_supported_p direct_optab_supported_p > #define direct_ternary_optab_supported_p direct_optab_supported_p > #define direct_cond_unary_optab_supported_p direct_optab_supported_p > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def > index d2d550d3586..7c398baadc8 100644 > --- a/gcc/internal-fn.def > +++ b/gcc/internal-fn.def > @@ -61,6 +61,9 @@ along with GCC; see the file COPYING3. If not see > - binary: a normal binary optab, such as vec_interleave_lo_<mode> > - ternary: a normal ternary optab, such as fma<mode>4 > > + - unary_convert: a single-input conversion optab, such as > + lround<srcmode><dstmode>2. > + > - cond_binary: a conditional binary optab, such as cond_add<mode> > - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode> > > @@ -267,6 +270,26 @@ DEF_INTERNAL_FLT_FLOATN_FN (SQRT, ECF_CONST, sqrt, unary) > DEF_INTERNAL_FLT_FN (TAN, ECF_CONST, tan, unary) > DEF_INTERNAL_FLT_FN (TANH, ECF_CONST, tanh, unary) > > +/* Floating-point to integer conversions. > + > + ??? Here we preserve the I/L/LL prefix convention from the > + corresponding built-in functions, rather than make the internal > + functions polymorphic in both the argument and the return types. > + Perhaps an alternative would be to pass a zero of the required > + return type as a second parameter. */ > +DEF_INTERNAL_FLT_FN (ICEIL, ECF_CONST, lceil, unary_convert) > +DEF_INTERNAL_FLT_FN (IFLOOR, ECF_CONST, lfloor, unary_convert) > +DEF_INTERNAL_FLT_FN (IRINT, ECF_CONST, lrint, unary_convert) > +DEF_INTERNAL_FLT_FN (IROUND, ECF_CONST, lround, unary_convert) > +DEF_INTERNAL_FLT_FN (LCEIL, ECF_CONST, lceil, unary_convert) > +DEF_INTERNAL_FLT_FN (LFLOOR, ECF_CONST, lfloor, unary_convert) > +DEF_INTERNAL_FLT_FN (LRINT, ECF_CONST, lrint, unary_convert) > +DEF_INTERNAL_FLT_FN (LROUND, ECF_CONST, lround, unary_convert) > +DEF_INTERNAL_FLT_FN (LLCEIL, ECF_CONST, lceil, unary_convert) > +DEF_INTERNAL_FLT_FN (LLFLOOR, ECF_CONST, lfloor, unary_convert) > +DEF_INTERNAL_FLT_FN (LLRINT, ECF_CONST, lrint, unary_convert) > +DEF_INTERNAL_FLT_FN (LLROUND, ECF_CONST, lround, unary_convert) > + > /* FP rounding. */ > DEF_INTERNAL_FLT_FLOATN_FN (CEIL, ECF_CONST, ceil, unary) > DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary) > diff --git a/gcc/optabs.cc b/gcc/optabs.cc > index a50dd798f2a..165f8d1fa22 100644 > --- a/gcc/optabs.cc > +++ b/gcc/optabs.cc > @@ -5828,7 +5828,8 @@ expand_sfix_optab (rtx to, rtx from, convert_optab tab) > FOR_EACH_MODE_FROM (fmode, GET_MODE (from)) > FOR_EACH_MODE_FROM (imode, GET_MODE (to)) > { > - icode = convert_optab_handler (tab, imode, fmode); > + icode = convert_optab_handler (tab, imode, fmode, > + insn_optimization_type ()); > if (icode != CODE_FOR_nothing) > { > rtx_insn *last = get_last_insn (); > diff --git a/gcc/predict.cc b/gcc/predict.cc > index b36caa3ae82..1bc7ab94454 100644 > --- a/gcc/predict.cc > +++ b/gcc/predict.cc > @@ -362,6 +362,17 @@ optimize_insn_for_speed_p (void) > return !optimize_insn_for_size_p (); > } > > +/* Return the optimization type that should be used for the current > + instruction. */ > + > +optimization_type > +insn_optimization_type () > +{ > + return (optimize_insn_for_speed_p () > + ? OPTIMIZE_FOR_SPEED > + : OPTIMIZE_FOR_SIZE); > +} > + > /* Return TRUE if LOOP should be optimized for size. */ > > optimize_size_level > diff --git a/gcc/predict.h b/gcc/predict.h > index 864997498ec..25484373769 100644 > --- a/gcc/predict.h > +++ b/gcc/predict.h > @@ -68,6 +68,7 @@ extern enum optimize_size_level optimize_edge_for_size_p (edge); > extern bool optimize_edge_for_speed_p (edge); > extern enum optimize_size_level optimize_insn_for_size_p (void); > extern bool optimize_insn_for_speed_p (void); > +extern optimization_type insn_optimization_type (); > extern enum optimize_size_level optimize_loop_for_size_p (class loop *); > extern bool optimize_loop_for_speed_p (class loop *); > extern bool optimize_loop_nest_for_speed_p (class loop *); > diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c > index 8516808becf..94d9af1a55d 100644 > --- a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c > @@ -1,4 +1,4 @@ > -/* { dg-options "-O3 --save-temps" } */ > +/* { dg-options "-O3 -fno-math-errno --save-temps" } */ > /* { dg-final { check-function-bodies "**" "" "" } } */ > > #include <stdint.h> > @@ -184,3 +184,66 @@ TEST2 (int, ctz, int) > ** ret > */ > TEST4 (int, ctz, int) > + > +/* > +** test2_int_iroundf_float: > +** fcvtas v0.2s, v1.2s > +** ret > +*/ > +TEST2 (int, iroundf, float) > + > +/* > +** test2_int64_t_llround_double: > +** fcvtas v0.2d, v1.2d > +** ret > +*/ > +TEST2 (int64_t, llround, double) > + > +/* > +** test4_int_iroundf_float: > +** fcvtas v0.4s, v1.4s > +** ret > +*/ > +TEST4 (int, iroundf, float) > + > +/* > +** test2_int_ifloorf_float: > +** fcvtms v0.2s, v1.2s > +** ret > +*/ > +TEST2 (int, ifloorf, float) > + > +/* > +** test2_int64_t_llfloor_double: > +** fcvtms v0.2d, v1.2d > +** ret > +*/ > +TEST2 (int64_t, llfloor, double) > + > +/* > +** test4_int_ifloorf_float: > +** fcvtms v0.4s, v1.4s > +** ret > +*/ > +TEST4 (int, ifloorf, float) > + > +/* > +** test2_int_iceilf_float: > +** fcvtps v0.2s, v1.2s > +** ret > +*/ > +TEST2 (int, iceilf, float) > + > +/* > +** test2_int64_t_llceil_double: > +** fcvtps v0.2d, v1.2d > +** ret > +*/ > +TEST2 (int64_t, llceil, double) > + > +/* > +** test4_int_iceilf_float: > +** fcvtps v0.4s, v1.4s > +** ret > +*/ > +TEST4 (int, iceilf, float) > diff --git a/gcc/testsuite/gfortran.dg/vect/pr106253.f b/gcc/testsuite/gfortran.dg/vect/pr106253.f > new file mode 100644 > index 00000000000..1b6b7e892f2 > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/vect/pr106253.f > @@ -0,0 +1,35 @@ > +! { dg-do compile } > + > + SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, & > + & BETA, Y, INCY ) > + LOGICAL LSAME > + IF ( .NOT.LSAME( TRANS, 'N' ).AND. & > + & .NOT.LSAME( TRANS, 'C' ) )THEN > + END IF > + END > + subroutine evlrnf (ptrs0t, nclsm, prnf0t) > + real, dimension (1:nclsm,1:nclsm), intent (in) :: ptrs0t > + real, dimension (1:nclsm,1:nclsm), intent (out):: prnf0t > + real, allocatable, dimension (:,:) :: utrsft ! probas up > + real, allocatable, dimension (:,:) :: dtrsft ! probas down > + real, allocatable, dimension (:,:) :: xwrkt ! matrice > + do icls = 1, nclsm > + do ival = ipic - 1, 1, -1 > + xwrkt = trs2a2 (ival, ipic, utrsft, dtrsft, ncls) > + enddo > + enddo > + contains > + function trs2a2 (j, k, u, d, m) > + real, dimension (1:m,1:m) :: trs2a2 ! resultat > + real, dimension (1:m,1:m) :: u, d ! matrices utrsft, dtrsft > + end function trs2a2 > + end > + program rnflow > + integer, parameter :: ncls = 256 ! nombre de classes > + integer, dimension (1:ncls,1:ncls) :: mrnftt ! matrice theorique > + real, dimension (1:ncls,1:ncls) :: ptrst ! matrice Markov > + real, dimension (1:ncls,1:ncls) :: prnft ! matrice Rainflow > + call evlrnf (ptrst, ncls, prnft) > + mrnftt = nint (real (nsim) * real (npic) * prnft) > + call cmpmat (mrnftt, mrnfst) > + end program rnflow > -- > 2.25.1 >
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index a486321e10f..adfddb8b215 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -2555,89 +2555,6 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, gcc_unreachable (); } -tree -aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, - tree type_in) -{ - machine_mode in_mode, out_mode; - - if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE) - return NULL_TREE; - - out_mode = TYPE_MODE (type_out); - in_mode = TYPE_MODE (type_in); - -#undef AARCH64_CHECK_BUILTIN_MODE -#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 -#define AARCH64_FIND_FRINT_VARIANT(N) \ - (AARCH64_CHECK_BUILTIN_MODE (2, D) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ - : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ - : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ - : NULL_TREE))) - switch (fn) - { -#undef AARCH64_CHECK_BUILTIN_MODE -#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ - (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) - CASE_CFN_IFLOOR: - CASE_CFN_LFLOOR: - CASE_CFN_LLFLOOR: - { - enum aarch64_builtins builtin; - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; - else - return NULL_TREE; - - return aarch64_builtin_decls[builtin]; - } - CASE_CFN_ICEIL: - CASE_CFN_LCEIL: - CASE_CFN_LLCEIL: - { - enum aarch64_builtins builtin; - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; - else - return NULL_TREE; - - return aarch64_builtin_decls[builtin]; - } - CASE_CFN_IROUND: - CASE_CFN_LROUND: - CASE_CFN_LLROUND: - { - enum aarch64_builtins builtin; - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; - else - return NULL_TREE; - - return aarch64_builtin_decls[builtin]; - } - default: - return NULL_TREE; - } - - return NULL_TREE; -} - /* Return builtin for reciprocal square root. */ tree diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index dabd047d7ba..19c9d3cb179 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -986,7 +986,6 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *, rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); tree aarch64_general_builtin_decl (unsigned, bool); tree aarch64_general_builtin_rsqrt (unsigned int); -tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); void handle_arm_acle_h (void); void handle_arm_neon_h (void); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index d049f9a9819..25f4cbb466d 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -27584,10 +27584,6 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_VECTORIZE_BUILTINS #define TARGET_VECTORIZE_BUILTINS -#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION -#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ - aarch64_builtin_vectorized_function - #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ aarch64_autovectorize_vector_modes diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 95cb1e2ce70..3a3c7299eb4 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -24004,6 +24004,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, case ldexp_optab: case scalb_optab: case round_optab: + case lround_optab: return opt_type == OPTIMIZE_FOR_SPEED; case rint_optab: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3b02d0cd567..bf29f444382 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -19926,9 +19926,6 @@ (define_expand "lround<X87MODEF:mode><SWI248x:mode>2" && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) && !flag_trapping_math && !flag_rounding_math)" { - if (optimize_insn_for_size_p ()) - FAIL; - if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH && <SWI248x:MODE>mode != HImode && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index d666ccccf67..28973d957fb 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -120,6 +120,7 @@ init_internal_fns () #define len_store_direct { 3, 3, false } #define vec_set_direct { 3, 3, false } #define unary_direct { 0, 0, true } +#define unary_convert_direct { -1, 0, true } #define binary_direct { 0, 0, true } #define ternary_direct { 0, 0, true } #define cond_unary_direct { 1, 1, true } @@ -3679,6 +3680,19 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) emit_move_insn (lhs_rtx, ops[0].value); } +/* Expand a call to a convert-like optab using the operands in STMT. + FN has a single output operand and NARGS input operands. */ + +static void +expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, + unsigned int nargs) +{ + tree_pair types = direct_internal_fn_types (fn, stmt); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (types.first), + TYPE_MODE (types.second)); + expand_fn_using_insn (stmt, icode, 1, nargs); +} + /* Expanders for optabs that can use expand_direct_optab_fn. */ #define expand_unary_optab_fn(FN, STMT, OPTAB) \ @@ -3711,6 +3725,11 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 4) +/* Expanders for optabs that can use expand_convert_optab_fn. */ + +#define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \ + expand_convert_optab_fn (FN, STMT, OPTAB, 1) + /* RETURN_TYPE and ARGS are a return type and argument list that are in principle compatible with FN (which satisfies direct_internal_fn_p). Return the types that should be used to determine whether the @@ -3783,6 +3802,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, } #define direct_unary_optab_supported_p direct_optab_supported_p +#define direct_unary_convert_optab_supported_p convert_optab_supported_p #define direct_binary_optab_supported_p direct_optab_supported_p #define direct_ternary_optab_supported_p direct_optab_supported_p #define direct_cond_unary_optab_supported_p direct_optab_supported_p diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index d2d550d3586..7c398baadc8 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -61,6 +61,9 @@ along with GCC; see the file COPYING3. If not see - binary: a normal binary optab, such as vec_interleave_lo_<mode> - ternary: a normal ternary optab, such as fma<mode>4 + - unary_convert: a single-input conversion optab, such as + lround<srcmode><dstmode>2. + - cond_binary: a conditional binary optab, such as cond_add<mode> - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode> @@ -267,6 +270,26 @@ DEF_INTERNAL_FLT_FLOATN_FN (SQRT, ECF_CONST, sqrt, unary) DEF_INTERNAL_FLT_FN (TAN, ECF_CONST, tan, unary) DEF_INTERNAL_FLT_FN (TANH, ECF_CONST, tanh, unary) +/* Floating-point to integer conversions. + + ??? Here we preserve the I/L/LL prefix convention from the + corresponding built-in functions, rather than make the internal + functions polymorphic in both the argument and the return types. + Perhaps an alternative would be to pass a zero of the required + return type as a second parameter. */ +DEF_INTERNAL_FLT_FN (ICEIL, ECF_CONST, lceil, unary_convert) +DEF_INTERNAL_FLT_FN (IFLOOR, ECF_CONST, lfloor, unary_convert) +DEF_INTERNAL_FLT_FN (IRINT, ECF_CONST, lrint, unary_convert) +DEF_INTERNAL_FLT_FN (IROUND, ECF_CONST, lround, unary_convert) +DEF_INTERNAL_FLT_FN (LCEIL, ECF_CONST, lceil, unary_convert) +DEF_INTERNAL_FLT_FN (LFLOOR, ECF_CONST, lfloor, unary_convert) +DEF_INTERNAL_FLT_FN (LRINT, ECF_CONST, lrint, unary_convert) +DEF_INTERNAL_FLT_FN (LROUND, ECF_CONST, lround, unary_convert) +DEF_INTERNAL_FLT_FN (LLCEIL, ECF_CONST, lceil, unary_convert) +DEF_INTERNAL_FLT_FN (LLFLOOR, ECF_CONST, lfloor, unary_convert) +DEF_INTERNAL_FLT_FN (LLRINT, ECF_CONST, lrint, unary_convert) +DEF_INTERNAL_FLT_FN (LLROUND, ECF_CONST, lround, unary_convert) + /* FP rounding. */ DEF_INTERNAL_FLT_FLOATN_FN (CEIL, ECF_CONST, ceil, unary) DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary) diff --git a/gcc/optabs.cc b/gcc/optabs.cc index a50dd798f2a..165f8d1fa22 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -5828,7 +5828,8 @@ expand_sfix_optab (rtx to, rtx from, convert_optab tab) FOR_EACH_MODE_FROM (fmode, GET_MODE (from)) FOR_EACH_MODE_FROM (imode, GET_MODE (to)) { - icode = convert_optab_handler (tab, imode, fmode); + icode = convert_optab_handler (tab, imode, fmode, + insn_optimization_type ()); if (icode != CODE_FOR_nothing) { rtx_insn *last = get_last_insn (); diff --git a/gcc/predict.cc b/gcc/predict.cc index b36caa3ae82..1bc7ab94454 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -362,6 +362,17 @@ optimize_insn_for_speed_p (void) return !optimize_insn_for_size_p (); } +/* Return the optimization type that should be used for the current + instruction. */ + +optimization_type +insn_optimization_type () +{ + return (optimize_insn_for_speed_p () + ? OPTIMIZE_FOR_SPEED + : OPTIMIZE_FOR_SIZE); +} + /* Return TRUE if LOOP should be optimized for size. */ optimize_size_level diff --git a/gcc/predict.h b/gcc/predict.h index 864997498ec..25484373769 100644 --- a/gcc/predict.h +++ b/gcc/predict.h @@ -68,6 +68,7 @@ extern enum optimize_size_level optimize_edge_for_size_p (edge); extern bool optimize_edge_for_speed_p (edge); extern enum optimize_size_level optimize_insn_for_size_p (void); extern bool optimize_insn_for_speed_p (void); +extern optimization_type insn_optimization_type (); extern enum optimize_size_level optimize_loop_for_size_p (class loop *); extern bool optimize_loop_for_speed_p (class loop *); extern bool optimize_loop_nest_for_speed_p (class loop *); diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c index 8516808becf..94d9af1a55d 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 --save-temps" } */ +/* { dg-options "-O3 -fno-math-errno --save-temps" } */ /* { dg-final { check-function-bodies "**" "" "" } } */ #include <stdint.h> @@ -184,3 +184,66 @@ TEST2 (int, ctz, int) ** ret */ TEST4 (int, ctz, int) + +/* +** test2_int_iroundf_float: +** fcvtas v0.2s, v1.2s +** ret +*/ +TEST2 (int, iroundf, float) + +/* +** test2_int64_t_llround_double: +** fcvtas v0.2d, v1.2d +** ret +*/ +TEST2 (int64_t, llround, double) + +/* +** test4_int_iroundf_float: +** fcvtas v0.4s, v1.4s +** ret +*/ +TEST4 (int, iroundf, float) + +/* +** test2_int_ifloorf_float: +** fcvtms v0.2s, v1.2s +** ret +*/ +TEST2 (int, ifloorf, float) + +/* +** test2_int64_t_llfloor_double: +** fcvtms v0.2d, v1.2d +** ret +*/ +TEST2 (int64_t, llfloor, double) + +/* +** test4_int_ifloorf_float: +** fcvtms v0.4s, v1.4s +** ret +*/ +TEST4 (int, ifloorf, float) + +/* +** test2_int_iceilf_float: +** fcvtps v0.2s, v1.2s +** ret +*/ +TEST2 (int, iceilf, float) + +/* +** test2_int64_t_llceil_double: +** fcvtps v0.2d, v1.2d +** ret +*/ +TEST2 (int64_t, llceil, double) + +/* +** test4_int_iceilf_float: +** fcvtps v0.4s, v1.4s +** ret +*/ +TEST4 (int, iceilf, float) diff --git a/gcc/testsuite/gfortran.dg/vect/pr106253.f b/gcc/testsuite/gfortran.dg/vect/pr106253.f new file mode 100644 index 00000000000..1b6b7e892f2 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/pr106253.f @@ -0,0 +1,35 @@ +! { dg-do compile } + + SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, & + & BETA, Y, INCY ) + LOGICAL LSAME + IF ( .NOT.LSAME( TRANS, 'N' ).AND. & + & .NOT.LSAME( TRANS, 'C' ) )THEN + END IF + END + subroutine evlrnf (ptrs0t, nclsm, prnf0t) + real, dimension (1:nclsm,1:nclsm), intent (in) :: ptrs0t + real, dimension (1:nclsm,1:nclsm), intent (out):: prnf0t + real, allocatable, dimension (:,:) :: utrsft ! probas up + real, allocatable, dimension (:,:) :: dtrsft ! probas down + real, allocatable, dimension (:,:) :: xwrkt ! matrice + do icls = 1, nclsm + do ival = ipic - 1, 1, -1 + xwrkt = trs2a2 (ival, ipic, utrsft, dtrsft, ncls) + enddo + enddo + contains + function trs2a2 (j, k, u, d, m) + real, dimension (1:m,1:m) :: trs2a2 ! resultat + real, dimension (1:m,1:m) :: u, d ! matrices utrsft, dtrsft + end function trs2a2 + end + program rnflow + integer, parameter :: ncls = 256 ! nombre de classes + integer, dimension (1:ncls,1:ncls) :: mrnftt ! matrice theorique + real, dimension (1:ncls,1:ncls) :: ptrst ! matrice Markov + real, dimension (1:ncls,1:ncls) :: prnft ! matrice Rainflow + call evlrnf (ptrst, ncls, prnft) + mrnftt = nint (real (nsim) * real (npic) * prnft) + call cmpmat (mrnftt, mrnfst) + end program rnflow