Message ID | nycvar.YFH.7.76.2005081457460.4397@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
Series | make minmax detection work with FMIN/FMAX IFNs | expand |
On Fri, May 8, 2020 at 3:02 PM Richard Biener <rguenther@suse.de> wrote: > > > Currently we fail to optimize those which are used when MIN/MAX_EXPR > cannot be used for FP values but the target has IEEE conforming > implementations. > > This patch adds support for fmin/fmax detection to phiopt and > makes the named patterns available in the x86 backend. It also > removes some superfluous restrictions from the SLP vectorizer > to make SLP vectorizing IFN_FMIN/FMAX work. > > This also unblocks the fix for PR94865, restoring > gcc.target/i386/pr54855-[89].c operation. > > Bootstrap & regtest running on x86_64-unknown-linux-gnu. > > The x86 portion did simplify a bit, is it still OK? I verified > vectorization works for SSE, AVX and AVX512 and 3dnow (it doesn't > work for v2sf w/o -m3dnow even though "3dnow-in-sse" would work) Ugh, please don't use named patterns for MMX. The mmx_ prefix is there to *prevent* autovectorization. %mm registers are aliased inito x87 register set and EMMS insn is needed to switch back to x87 mode. "3dnow-in-sse" would not work because in SSE mode, we leave two lanes in XMM register unused. They should be zero, until they are not. There can be many side effects from vector operations with effectively undefined lanes (div-by-zero, exceptions, etc...), so we decided to skip V2SF for mmx-with-sse. Uros. > Thanks, > Richard. > > 2020-05-08 Richard Biener <rguenther@suse.de> > > PR tree-optimization/88540 > * tree-ssa-phiopt.c: Include internal-fn.h and gimple-match.h. > (minmax_replacement): Parse IFN_FMIN/FMAX as MIN/MAX_EXPR. > Code-generate IFN_FMIN/FMAX if honoring NaNs or signed zeros. > * tree-vect-slp.c (vect_build_slp_tree_1): Remove superfluous > tests on calls. > > * config/i386/i386.md (*ieee_s<ieee_maxmin><mode>3): Rename as > (f<ieee_maxmin><mode>3): this. > * config/i386/mmx.md (mmx_<code>v2sf3): Adjust. > (mmx_ieee_<ieee_maxmin>v2sf3): Rename as > (f<ieee_maxmin>v2sf3): this. > * config/i386/sse.md > (<code><mode>3<mask_name><round_saeonly_name>): Adjust. > (ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>): > Rename as > (f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>): this. > > * gcc.target/i386/pr88540-1.c: New testcase. OK with the renamed pattern, as described inline. Thanks, Uros. > --- > gcc/config/i386/i386.md | 2 +- > gcc/config/i386/mmx.md | 4 +- > gcc/config/i386/sse.md | 4 +- > gcc/testsuite/gcc.target/i386/pr88540-1.c | 21 ++++++++++ > gcc/tree-ssa-phiopt.c | 66 +++++++++++++++++++++++++------ > gcc/tree-vect-slp.c | 3 -- > 6 files changed, 80 insertions(+), 20 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr88540-1.c > > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 8bfc9cb0b71..f1496a98456 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -18443,7 +18443,7 @@ > ;; Their operands are not commutative, and thus they may be used in the > ;; presence of -0.0 and NaN. > > -(define_insn "*ieee_s<ieee_maxmin><mode>3" > +(define_insn "f<ieee_maxmin><mode>3" > [(set (match_operand:MODEF 0 "register_operand" "=x,v") > (unspec:MODEF > [(match_operand:MODEF 1 "register_operand" "0,v") > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 472f90f9bc1..422d858f25d 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -301,7 +301,7 @@ > if (!flag_finite_math_only || flag_signed_zeros) > { > operands[1] = force_reg (V2SFmode, operands[1]); > - emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3 > + emit_insn (gen_f<maxmin_float>v2sf3 > (operands[0], operands[1], operands[2])); > DONE; > } > @@ -331,7 +331,7 @@ > ;; Their operands are not commutative, and thus they may be used in the > ;; presence of -0.0 and NaN. > > -(define_insn "mmx_ieee_<ieee_maxmin>v2sf3" > +(define_insn "f<ieee_maxmin>v2sf3" Please name this pattern "mmx_f<ieee_maxmin>v2sf" and update the caller above. > [(set (match_operand:V2SF 0 "register_operand" "=y") > (unspec:V2SF > [(match_operand:V2SF 1 "register_operand" "0") > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 7a7ecd4be87..ac350711527 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -2191,7 +2191,7 @@ > if (!flag_finite_math_only || flag_signed_zeros) > { > operands[1] = force_reg (<MODE>mode, operands[1]); > - emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name> > + emit_insn (gen_f<maxmin_float><mode>3<mask_name><round_saeonly_name> > (operands[0], operands[1], operands[2] > <mask_operand_arg34> > <round_saeonly_mask_arg3>)); > @@ -2229,7 +2229,7 @@ > ;; Their operands are not commutative, and thus they may be used in the > ;; presence of -0.0 and NaN. > > -(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>" > +(define_insn "f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>" > [(set (match_operand:VF 0 "register_operand" "=x,v") > (unspec:VF > [(match_operand:VF 1 "register_operand" "0,v") > diff --git a/gcc/testsuite/gcc.target/i386/pr88540-1.c b/gcc/testsuite/gcc.target/i386/pr88540-1.c > new file mode 100644 > index 00000000000..a66b89ef66c > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr88540-1.c > @@ -0,0 +1,21 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ > + > +void test1(double* __restrict d1, double* __restrict d2, double* __restrict d3) > +{ > + for (int n = 0; n < 2; ++n) > + { > + d3[n] = d1[n] < d2[n] ? d1[n] : d2[n]; > + } > +} > + > +void test2(double* __restrict d1, double* __restrict d2, double* __restrict d3) > +{ > + for (int n = 0; n < 2; ++n) > + { > + d3[n] = d1[n] > d2[n] ? d1[n] : d2[n]; > + } > +} > + > +/* { dg-final { scan-assembler "minpd" } } */ > +/* { dg-final { scan-assembler "maxpd" } } */ > diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c > index b1e0dce93d8..7ceda4a37ab 100644 > --- a/gcc/tree-ssa-phiopt.c > +++ b/gcc/tree-ssa-phiopt.c > @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see > #include "tree-inline.h" > #include "case-cfn-macros.h" > #include "tree-eh.h" > +#include "internal-fn.h" > +#include "gimple-match.h" > > static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); > static bool two_value_replacement (basic_block, basic_block, edge, gphi *, > @@ -1364,7 +1366,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, > { > tree result, type, rhs; > gcond *cond; > - gassign *new_stmt; > edge true_edge, false_edge; > enum tree_code cmp, minmax, ass_code; > tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; > @@ -1373,7 +1374,10 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, > type = TREE_TYPE (PHI_RESULT (phi)); > > /* The optimization may be unsafe due to NaNs. */ > - if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) > + if ((HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) > + && (!direct_internal_fn_supported_p (IFN_FMIN, type, OPTIMIZE_FOR_BOTH) > + || !direct_internal_fn_supported_p (IFN_FMAX, type, > + OPTIMIZE_FOR_BOTH))) > return false; > > cond = as_a <gcond *> (last_stmt (cond_bb)); > @@ -1530,16 +1534,44 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, > gimple *assign = last_and_only_stmt (middle_bb); > tree lhs, op0, op1, bound; > > - if (!assign > - || gimple_code (assign) != GIMPLE_ASSIGN) > + if (!assign) > return false; > > - lhs = gimple_assign_lhs (assign); > - ass_code = gimple_assign_rhs_code (assign); > - if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) > + if (is_gimple_assign (assign)) > + { > + lhs = gimple_assign_lhs (assign); > + ass_code = gimple_assign_rhs_code (assign); > + if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) > + return false; > + op0 = gimple_assign_rhs1 (assign); > + op1 = gimple_assign_rhs2 (assign); > + } > + else if (is_gimple_call (assign)) > + { > + lhs = gimple_call_lhs (assign); > + if (!lhs) > + return false; > + combined_fn cfn = gimple_call_combined_fn (assign); > + switch (cfn) > + { > + CASE_CFN_FMIN: > + CASE_CFN_FMIN_FN: > + ass_code = MIN_EXPR; > + op0 = gimple_call_arg (assign, 0); > + op1 = gimple_call_arg (assign, 1); > + break; > + CASE_CFN_FMAX: > + CASE_CFN_FMAX_FN: > + ass_code = MAX_EXPR; > + op0 = gimple_call_arg (assign, 0); > + op1 = gimple_call_arg (assign, 1); > + break; > + default: > + return false; > + } > + } > + else > return false; > - op0 = gimple_assign_rhs1 (assign); > - op1 = gimple_assign_rhs2 (assign); > > if (true_edge->src == middle_bb) > { > @@ -1574,8 +1606,9 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, > return false; > > /* We need BOUND <= LARGER. */ > - if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, > - bound, larger))) > + if (bound != larger > + && !integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, > + bound, larger))) > return false; > } > else if (operand_equal_for_phi_arg_p (arg_false, smaller) > @@ -1698,7 +1731,16 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, > result = make_ssa_name (TREE_TYPE (result)); > > /* Emit the statement to compute min/max. */ > - new_stmt = gimple_build_assign (result, minmax, arg0, arg1); > + gimple *new_stmt; > + if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) > + { > + new_stmt = gimple_build_call_internal (minmax == MAX_EXPR > + ? IFN_FMAX : IFN_FMIN, > + 2, arg0, arg1); > + gimple_call_set_lhs (new_stmt, result); > + } > + else > + new_stmt = gimple_build_assign (result, minmax, arg0, arg1); > gsi = gsi_last_bb (cond_bb); > gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); > > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index c097840b09a..b1dcaec3d9f 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -849,9 +849,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, > else if ((gimple_call_internal_p (call_stmt) > && (!vectorizable_internal_fn_p > (gimple_call_internal_fn (call_stmt)))) > - || gimple_call_tail_p (call_stmt) > - || gimple_call_noreturn_p (call_stmt) > - || !gimple_call_nothrow_p (call_stmt) > || gimple_call_chain (call_stmt)) > { > if (dump_enabled_p ()) > -- > 2.13.7
On Fri, 8 May 2020, Richard Biener wrote: > > Currently we fail to optimize those which are used when MIN/MAX_EXPR > cannot be used for FP values but the target has IEEE conforming > implementations. i386 ieee_s{min,max} patterns are definitely not IEEE-compliant, their comment alludes to that: ;; These versions of the min/max patterns implement exactly the operations ;; min = (op1 < op2 ? op1 : op2) ;; max = (!(op1 < op2) ? op1 : op2) ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. I don't understand why the patch is correct if the IFNs refer to fully IEEE-compliant operations (which is in itself a bit ambiguous w.r.t behavior when exactly one operand is a NaN). Am I missing something? Alexander
On Fri, May 8, 2020 at 3:46 PM Alexander Monakov <amonakov@ispras.ru> wrote: > > > > On Fri, 8 May 2020, Richard Biener wrote: > > > > > Currently we fail to optimize those which are used when MIN/MAX_EXPR > > cannot be used for FP values but the target has IEEE conforming > > implementations. > > i386 ieee_s{min,max} patterns are definitely not IEEE-compliant, > their comment alludes to that: > > ;; These versions of the min/max patterns implement exactly the operations > ;; min = (op1 < op2 ? op1 : op2) > ;; max = (!(op1 < op2) ? op1 : op2) > ;; Their operands are not commutative, and thus they may be used in the > ;; presence of -0.0 and NaN. > > I don't understand why the patch is correct if the IFNs refer to fully > IEEE-compliant operations (which is in itself a bit ambiguous w.r.t > behavior when exactly one operand is a NaN). The comment says that they may be used in the presence of -0.0 and NaN. Further, ISA says for e.g. maxpd: Performs a SIMD compare of the packed double-precision floating-point values in the first source operand and the second source operand and returns the maximum value for each pair of values to the destination operand. If the values being compared are both 0.0s (of either sign), the value in the second operand (source operand) is returned. If a value in the second operand is an SNaN, then SNaN is forwarded unchanged to the destination (that is, a QNaN version of the SNaN is not returned). If only one value is a NaN (SNaN or QNaN) for this instruction, the second operand (source operand), either a NaN or a valid floating-point value, is written to the result. If instead of this behavior, it is required that the NaN source operand (from either the first or second operand) be returned, the action of MAXPD can be emulated using a sequence of instructions, such as a comparison followed by AND, ANDN and OR. > Am I missing something? Is the above enough to declare min/max as IEEE compliant? Uros.
On Fri, 8 May 2020, Uros Bizjak wrote: > > Am I missing something? > > Is the above enough to declare min/max as IEEE compliant? No. SSE min/max instructions semantics match C expression x < y ? x : y. IEEE min/max operations are commutative when exactly one operand is a NaN, and so are C fmin/fmax functions: fmin(x, NaN) == fmin(NaN, x) == x // x is not a NaN In contrast, (x < y ? x : y) always returns y when x or y is a NaN, and likewise the corresponding SSE instructions are not commutative. Therefore they are explicitly non-compliant in presence of NaNs. I don't know how GCC defines the semantics of GIMPLE min/max IFNs. Alexander
On May 8, 2020 4:28:24 PM GMT+02:00, Alexander Monakov <amonakov@ispras.ru> wrote: >On Fri, 8 May 2020, Uros Bizjak wrote: > >> > Am I missing something? >> >> Is the above enough to declare min/max as IEEE compliant? > >No. SSE min/max instructions semantics match C expression x < y ? x : >y. >IEEE min/max operations are commutative when exactly one operand is a >NaN, >and so are C fmin/fmax functions: > > fmin(x, NaN) == fmin(NaN, x) == x // x is not a NaN > >In contrast, (x < y ? x : y) always returns y when x or y is a NaN, and >likewise the corresponding SSE instructions are not commutative. > >Therefore they are explicitly non-compliant in presence of NaNs. > >I don't know how GCC defines the semantics of GIMPLE min/max IFNs. The IFNs are supposed to match fmin and fmax from the C standard which IIRC have IEEE semantics. Note the ISA likely behaves this way because it matches open coded C semantics. Arm folks added the IFNs so I have to dig up what exactly they were after... I'd hate to add a third variant here... Richard. >Alexander
On Fri, May 8, 2020 at 4:38 PM Richard Biener <rguenther@suse.de> wrote: > > On May 8, 2020 4:28:24 PM GMT+02:00, Alexander Monakov <amonakov@ispras.ru> wrote: > >On Fri, 8 May 2020, Uros Bizjak wrote: > > > >> > Am I missing something? > >> > >> Is the above enough to declare min/max as IEEE compliant? > > > >No. SSE min/max instructions semantics match C expression x < y ? x : > >y. > >IEEE min/max operations are commutative when exactly one operand is a > >NaN, > >and so are C fmin/fmax functions: > > > > fmin(x, NaN) == fmin(NaN, x) == x // x is not a NaN > > > >In contrast, (x < y ? x : y) always returns y when x or y is a NaN, and > >likewise the corresponding SSE instructions are not commutative. > > > >Therefore they are explicitly non-compliant in presence of NaNs. > > > >I don't know how GCC defines the semantics of GIMPLE min/max IFNs. > > The IFNs are supposed to match fmin and fmax from the C standard which IIRC have IEEE semantics. > > Note the ISA likely behaves this way because it matches open coded C semantics. > > Arm folks added the IFNs so I have to dig up what exactly they were after... > > I'd hate to add a third variant here... So, I found [1], that tries to explain this issue. [1] https://2pi.dk/2016/05/ieee-min-max Uros.
On Sun, 10 May 2020, Uros Bizjak wrote: > So, I found [1], that tries to explain this issue. > > [1] https://2pi.dk/2016/05/ieee-min-max I would also recommend reading this report that covers a few more architectures and issues with IEEE754 definitions: http://grouper.ieee.org/groups/msc/ANSI_IEEE-Std-754-2019/background/minNum_maxNum_Removal_Demotion_v3.pdf Alexander
Richard Biener <rguenther@suse.de> writes: > On May 8, 2020 4:28:24 PM GMT+02:00, Alexander Monakov <amonakov@ispras.ru> wrote: >>On Fri, 8 May 2020, Uros Bizjak wrote: >> >>> > Am I missing something? >>> >>> Is the above enough to declare min/max as IEEE compliant? >> >>No. SSE min/max instructions semantics match C expression x < y ? x : >>y. >>IEEE min/max operations are commutative when exactly one operand is a >>NaN, >>and so are C fmin/fmax functions: >> >> fmin(x, NaN) == fmin(NaN, x) == x // x is not a NaN >> >>In contrast, (x < y ? x : y) always returns y when x or y is a NaN, and >>likewise the corresponding SSE instructions are not commutative. >> >>Therefore they are explicitly non-compliant in presence of NaNs. >> >>I don't know how GCC defines the semantics of GIMPLE min/max IFNs. > > The IFNs are supposed to match fmin and fmax from the C standard which IIRC have IEEE semantics. Yeah, that was my understanding too (specifically the 2008 maxNum & minNum rules, since new variants were added in 2019). > Note the ISA likely behaves this way because it matches open coded C semantics. > > Arm folks added the IFNs so I have to dig up what exactly they were after... We wanted it for pretty much exactly the kind of thing you're doing here: having a vectorisable version of the C fmin and fmax functions. FWIW, Alejandro posted a patch for reductions a while back: https://gcc.gnu.org/pipermail/gcc-patches/2018-December/513678.html but it was posted during stage 3 and so kind-of stalled. Like you say, the idea is that since the operation is commutative and is the same in both vector and scalar form, there's no reason to require any -ffast-math flags. Thanks, Richard
On Mon, 11 May 2020, Richard Sandiford wrote: > Like you say, the idea is that since the operation is commutative and > is the same in both vector and scalar form, there's no reason to require > any -ffast-math flags. Note that PR88540 that Richard is referencing uses open-coded x < y ? x : y (non-commutative) and we want to use SSE minpd even without -ffast-math, as SSE min/max insns match semantics of open-coded ternary operators. (unlike Arm SIMD, SSE does not have a way to compute fmin/fmax with a single instruction in presence of NaNs) Alexander
On Mon, 11 May 2020, Alexander Monakov wrote: > On Mon, 11 May 2020, Richard Sandiford wrote: > > > Like you say, the idea is that since the operation is commutative and > > is the same in both vector and scalar form, there's no reason to require > > any -ffast-math flags. > > Note that PR88540 that Richard is referencing uses open-coded x < y ? x : y > (non-commutative) and we want to use SSE minpd even without -ffast-math, as > SSE min/max insns match semantics of open-coded ternary operators. > > (unlike Arm SIMD, SSE does not have a way to compute fmin/fmax with a > single instruction in presence of NaNs) Indeed. So it looks like for SSE we eventually want phiopt to generate a COND_EXPR here and a new optabs cond_fmin cond_fmax that could be used to vectorize this? cond_fmin and cond_fmax can neither be treated as MIN_EXPR or MAX_EXPR nor fmin/fmax since it is not commutative. The reason why I came back to this is that the x86 backend has define_insns that match the conditional form so RTL if-conversions knows how to generate this but after my patch to add some bit-insert/bit-field-ref combining patterns to GIMPLE RTL if-conversion cannot recoginze them anymore. I'm also pretty sure we do not want cond_fmin/max IFNs on GIMPLE early. That said, I can recover the x86 testcases by just adjusting phiopt here to generate a COND_EXPR (vectorizer ifcvt doesn't handle this case either - which is why it is not vectorized I guess). Not sure if it really fits minmax detection since for sure the "combinations" it does do not apply to the conditional form without extra careful checking (is there any difference between x >= y ? x : y and x > y ? x : y?) Anyway, I need to sit down and more closely look at this. So the x86 part of the patch is clearly bogus and the phiopt part to match the conditional form as IFN_FMIN/MAX as well(?) Richard.
Richard Biener <rguenther@suse.de> writes: > On Mon, 11 May 2020, Alexander Monakov wrote: > >> On Mon, 11 May 2020, Richard Sandiford wrote: >> >> > Like you say, the idea is that since the operation is commutative and >> > is the same in both vector and scalar form, there's no reason to require >> > any -ffast-math flags. >> >> Note that PR88540 that Richard is referencing uses open-coded x < y ? x : y >> (non-commutative) and we want to use SSE minpd even without -ffast-math, as >> SSE min/max insns match semantics of open-coded ternary operators. >> >> (unlike Arm SIMD, SSE does not have a way to compute fmin/fmax with a >> single instruction in presence of NaNs) > > Indeed. So it looks like for SSE we eventually want phiopt to generate > a COND_EXPR here and a new optabs cond_fmin cond_fmax that could be > used to vectorize this? cond_fmin and cond_fmax can neither be > treated as MIN_EXPR or MAX_EXPR nor fmin/fmax since it is not > commutative. I know I'm taking the name too literally, but: "cond_foo" in the sense of "IFN_COND_FOO (cond, ..., fallback)" is always supposed to be the equivalent of: cond ? IFN_FOO (...) : fallback So cond_fmin would be the conditional form of fmin. Thanks, Richard
On Fri, 8 May 2020, Richard Biener wrote: > The IFNs are supposed to match fmin and fmax from the C standard which > IIRC have IEEE semantics. fmin and fmax have IEEE (2008) semantics (where an sNaN operand results in a qNaN result with "invalid" raised", but a quiet NaN results in the other operand, if not sNaN, being returned). Not to be confused with any of the new minimum/maximum operations in IEEE (2019) (both variants that treat all NaNs like other arithmetic operations, and variants that can raise "invalid" for sNaN without returning a NaN), for which C bindings under different names are proposed.
On Mon, 11 May 2020, Joseph Myers wrote: > On Fri, 8 May 2020, Richard Biener wrote: > > > The IFNs are supposed to match fmin and fmax from the C standard which > > IIRC have IEEE semantics. > > fmin and fmax have IEEE (2008) semantics (where an sNaN operand results in > a qNaN result with "invalid" raised", but a quiet NaN results in the other > operand, if not sNaN, being returned). Not to be confused with any of the > new minimum/maximum operations in IEEE (2019) (both variants that treat > all NaNs like other arithmetic operations, and variants that can raise > "invalid" for sNaN without returning a NaN), for which C bindings under > different names are proposed. Uh, ok - that makes the situation even more messy in the future (exact rules how to combine maximumX (fmin (X, Y), X) or so...). In the mean time I'm considering the following for my immediate needs which are PR88540 not basic-block vectorizing FP min/max w/o -ffast-math and testcase breakage in gcc.target/i386/pr54855-[89].c where conditional move handling on RTL no longer happens when GIMPLE starts to be more clever. The patch below makes phiopt if-convert min/max-like operations for floating-point types regardless of HONOR_NANS/HONOR_SIGNED_ZEROS because targets likely implement instructions for C conditional operator-style min/max-like operations. Now x86 does so for d1[n] < d2[n] ? d1[n] : d2[n] but not d1[n] <= d2[n] ? d1[n] : d2[n] which is a subtle difference but the patch below still if-converts those. I've chosen not to try to combine with existing min/max operations (nor try detecting the COND_EXPR form as such) but instead give up. I could have given up for <= and >= operations as well (and not if-convert), if there's reasons to believe no targets implement single instructions for those. Note I specifically do not want general if-conversion to COND_EXPRs applied here (so that's a bit of conflicting interest). But there's no standard pattern names for those conditional min/max-like operations (and I'm not sure we need them). Thanks, Richard. [PATCH] tree-optimization/88540 - apply phiopt to min/max-like operation This applies if conversion in phiopt to a < b ? a : b like computations which are not MIN or MAX_EXPR or fmin or fmax due to NaNs and signed zeros. Instead of giving up this makes phiopt emit a COND_EXPR. This helps vectorization of straight-line code and avoids messing up the pattern by PRE. 2020-05-12 Richard Biener <rguenther@suse.de> PR tree-optimization/88540 * tree-ssa-phiopt.c (minmax_replacement): When honoring NaNs or signed zeros instead emit a COND_EXPR. * gcc.target/i386/pr88540-1.c: New testcase. --- gcc/testsuite/gcc.target/i386/pr88540-1.c | 21 +++++++++++++++++++++ gcc/tree-ssa-phiopt.c | 25 ++++++++++++++++++------- 2 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr88540-1.c diff --git a/gcc/testsuite/gcc.target/i386/pr88540-1.c b/gcc/testsuite/gcc.target/i386/pr88540-1.c new file mode 100644 index 00000000000..a66b89ef66c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88540-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +void test1(double* __restrict d1, double* __restrict d2, double* __restrict d3) +{ + for (int n = 0; n < 2; ++n) + { + d3[n] = d1[n] < d2[n] ? d1[n] : d2[n]; + } +} + +void test2(double* __restrict d1, double* __restrict d2, double* __restrict d3) +{ + for (int n = 0; n < 2; ++n) + { + d3[n] = d1[n] > d2[n] ? d1[n] : d2[n]; + } +} + +/* { dg-final { scan-assembler "minpd" } } */ +/* { dg-final { scan-assembler "maxpd" } } */ diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index b1e0dce93d8..eac8dcdd696 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -1369,13 +1369,10 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, enum tree_code cmp, minmax, ass_code; tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; gimple_stmt_iterator gsi, gsi_from; + bool ieee_fp; type = TREE_TYPE (PHI_RESULT (phi)); - - /* The optimization may be unsafe due to NaNs. */ - if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) - return false; - + ieee_fp = (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)); cond = as_a <gcond *> (last_stmt (cond_bb)); cmp = gimple_cond_code (cond); rhs = gimple_cond_rhs (cond); @@ -1527,6 +1524,10 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, b = MAX (a, d); x = MIN (b, u); */ + /* But not when we have to care bout NaNs or singed zeros. */ + if (ieee_fp) + return false; + gimple *assign = last_and_only_stmt (middle_bb); tree lhs, op0, op1, bound; @@ -1697,8 +1698,18 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, else result = make_ssa_name (TREE_TYPE (result)); - /* Emit the statement to compute min/max. */ - new_stmt = gimple_build_assign (result, minmax, arg0, arg1); + /* Emit the statement to compute min/max. If the optimization is + unsafe due to NaNs or signed zeros preserve the not commutative + operation as a COND_EXPR. */ + if (ieee_fp) + new_stmt = gimple_build_assign (result, COND_EXPR, + build2 (gimple_cond_code (cond), + boolean_type_node, + gimple_cond_lhs (cond), + gimple_cond_rhs (cond)), + arg_true, arg_false); + else + new_stmt = gimple_build_assign (result, minmax, arg0, arg1); gsi = gsi_last_bb (cond_bb); gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8bfc9cb0b71..f1496a98456 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18443,7 +18443,7 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "*ieee_s<ieee_maxmin><mode>3" +(define_insn "f<ieee_maxmin><mode>3" [(set (match_operand:MODEF 0 "register_operand" "=x,v") (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "0,v") diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 472f90f9bc1..422d858f25d 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -301,7 +301,7 @@ if (!flag_finite_math_only || flag_signed_zeros) { operands[1] = force_reg (V2SFmode, operands[1]); - emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3 + emit_insn (gen_f<maxmin_float>v2sf3 (operands[0], operands[1], operands[2])); DONE; } @@ -331,7 +331,7 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "mmx_ieee_<ieee_maxmin>v2sf3" +(define_insn "f<ieee_maxmin>v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7a7ecd4be87..ac350711527 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2191,7 +2191,7 @@ if (!flag_finite_math_only || flag_signed_zeros) { operands[1] = force_reg (<MODE>mode, operands[1]); - emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name> + emit_insn (gen_f<maxmin_float><mode>3<mask_name><round_saeonly_name> (operands[0], operands[1], operands[2] <mask_operand_arg34> <round_saeonly_mask_arg3>)); @@ -2229,7 +2229,7 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>" +(define_insn "f<ieee_maxmin><mode>3<mask_name><round_saeonly_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") (unspec:VF [(match_operand:VF 1 "register_operand" "0,v") diff --git a/gcc/testsuite/gcc.target/i386/pr88540-1.c b/gcc/testsuite/gcc.target/i386/pr88540-1.c new file mode 100644 index 00000000000..a66b89ef66c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr88540-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +void test1(double* __restrict d1, double* __restrict d2, double* __restrict d3) +{ + for (int n = 0; n < 2; ++n) + { + d3[n] = d1[n] < d2[n] ? d1[n] : d2[n]; + } +} + +void test2(double* __restrict d1, double* __restrict d2, double* __restrict d3) +{ + for (int n = 0; n < 2; ++n) + { + d3[n] = d1[n] > d2[n] ? d1[n] : d2[n]; + } +} + +/* { dg-final { scan-assembler "minpd" } } */ +/* { dg-final { scan-assembler "maxpd" } } */ diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index b1e0dce93d8..7ceda4a37ab 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "case-cfn-macros.h" #include "tree-eh.h" +#include "internal-fn.h" +#include "gimple-match.h" static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); static bool two_value_replacement (basic_block, basic_block, edge, gphi *, @@ -1364,7 +1366,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, { tree result, type, rhs; gcond *cond; - gassign *new_stmt; edge true_edge, false_edge; enum tree_code cmp, minmax, ass_code; tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; @@ -1373,7 +1374,10 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, type = TREE_TYPE (PHI_RESULT (phi)); /* The optimization may be unsafe due to NaNs. */ - if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) + if ((HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) + && (!direct_internal_fn_supported_p (IFN_FMIN, type, OPTIMIZE_FOR_BOTH) + || !direct_internal_fn_supported_p (IFN_FMAX, type, + OPTIMIZE_FOR_BOTH))) return false; cond = as_a <gcond *> (last_stmt (cond_bb)); @@ -1530,16 +1534,44 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, gimple *assign = last_and_only_stmt (middle_bb); tree lhs, op0, op1, bound; - if (!assign - || gimple_code (assign) != GIMPLE_ASSIGN) + if (!assign) return false; - lhs = gimple_assign_lhs (assign); - ass_code = gimple_assign_rhs_code (assign); - if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) + if (is_gimple_assign (assign)) + { + lhs = gimple_assign_lhs (assign); + ass_code = gimple_assign_rhs_code (assign); + if (ass_code != MAX_EXPR && ass_code != MIN_EXPR) + return false; + op0 = gimple_assign_rhs1 (assign); + op1 = gimple_assign_rhs2 (assign); + } + else if (is_gimple_call (assign)) + { + lhs = gimple_call_lhs (assign); + if (!lhs) + return false; + combined_fn cfn = gimple_call_combined_fn (assign); + switch (cfn) + { + CASE_CFN_FMIN: + CASE_CFN_FMIN_FN: + ass_code = MIN_EXPR; + op0 = gimple_call_arg (assign, 0); + op1 = gimple_call_arg (assign, 1); + break; + CASE_CFN_FMAX: + CASE_CFN_FMAX_FN: + ass_code = MAX_EXPR; + op0 = gimple_call_arg (assign, 0); + op1 = gimple_call_arg (assign, 1); + break; + default: + return false; + } + } + else return false; - op0 = gimple_assign_rhs1 (assign); - op1 = gimple_assign_rhs2 (assign); if (true_edge->src == middle_bb) { @@ -1574,8 +1606,9 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, return false; /* We need BOUND <= LARGER. */ - if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, - bound, larger))) + if (bound != larger + && !integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node, + bound, larger))) return false; } else if (operand_equal_for_phi_arg_p (arg_false, smaller) @@ -1698,7 +1731,16 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, result = make_ssa_name (TREE_TYPE (result)); /* Emit the statement to compute min/max. */ - new_stmt = gimple_build_assign (result, minmax, arg0, arg1); + gimple *new_stmt; + if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type)) + { + new_stmt = gimple_build_call_internal (minmax == MAX_EXPR + ? IFN_FMAX : IFN_FMIN, + 2, arg0, arg1); + gimple_call_set_lhs (new_stmt, result); + } + else + new_stmt = gimple_build_assign (result, minmax, arg0, arg1); gsi = gsi_last_bb (cond_bb); gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index c097840b09a..b1dcaec3d9f 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -849,9 +849,6 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, else if ((gimple_call_internal_p (call_stmt) && (!vectorizable_internal_fn_p (gimple_call_internal_fn (call_stmt)))) - || gimple_call_tail_p (call_stmt) - || gimple_call_noreturn_p (call_stmt) - || !gimple_call_nothrow_p (call_stmt) || gimple_call_chain (call_stmt)) { if (dump_enabled_p ())