From patchwork Fri Oct 22 21:52:25 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: Add FMA_EXPR, un-cse multiplications before expansion From: Richard Henderson X-Patchwork-Id: 68978 Message-Id: <4CC20799.9070300@redhat.com> To: Richard Guenther Cc: gcc-patches@gcc.gnu.org Date: Fri, 22 Oct 2010 14:52:25 -0700 On 10/22/2010 02:12 PM, Richard Henderson wrote: > On 10/21/2010 02:41 AM, Richard Guenther wrote: >> + /* Leave single uses to the RTL combiner, we otherwise regress >> + in some circumstances. */ >> + if (single_imm_use (mul_result, &use_p, &use_stmt)) >> + return false; > > Like what? We can't honor those PAREN_EXPR rules in rtl, and > we can't leave this to combine without that. > > Ignoring that, consider the following addition to your patch, > which handles the negate during expansion via 4 optabs. Consider the following patch as well, to make sure that FMA = FMA during other CSE. r~ diff --git a/gcc/builtins.c b/gcc/builtins.c index ca69efa..8ee3ba3 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -9265,6 +9265,27 @@ fold_builtin_abs (location_t loc, tree arg, tree type) return fold_build1_loc (loc, ABS_EXPR, type, arg); } +/* Fold a call to fma, fmaf, or fmal with arguments ARG[012]. */ + +static tree +fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type) +{ + if (validate_arg (arg0, REAL_TYPE) + && validate_arg(arg1, REAL_TYPE) + && validate_arg(arg2, REAL_TYPE)) + { + if (TREE_CODE (arg0) == REAL_CST + && TREE_CODE (arg1) == REAL_CST + && TREE_CODE (arg2) == REAL_CST) + return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma); + + /* ??? Only expand to FMA_EXPR if it's directly supported. */ + if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) + return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2); + } + return NULL_TREE; +} + /* Fold a call to builtin fmin or fmax. */ static tree @@ -10539,10 +10560,7 @@ fold_builtin_3 (location_t loc, tree fndecl, return fold_builtin_sincos (loc, arg0, arg1, arg2); CASE_FLT_FN (BUILT_IN_FMA): - if (validate_arg (arg0, REAL_TYPE) - && validate_arg(arg1, REAL_TYPE) - && validate_arg(arg2, REAL_TYPE)) - return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma); + return fold_builtin_fma (loc, arg0, arg1, arg2, type); break; CASE_FLT_FN (BUILT_IN_REMQUO): diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 4753c4b..bb99a8f 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -7174,6 +7174,16 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = gimplify_omp_atomic (expr_p, pre_p); break; + case TRUTH_AND_EXPR: + case TRUTH_OR_EXPR: + case TRUTH_XOR_EXPR: + /* Classified as tcc_expression. */ + goto expr_2; + + case FMA_EXPR: + /* Classified as tcc_expression. */ + goto expr_3; + case POINTER_PLUS_EXPR: /* Convert ((type *)A)+offset into &A->field_of_type_and_offset. The second is gimple immediate saving a need for extra statement. @@ -7253,16 +7263,28 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; } + expr_3: + { + enum gimplify_status r0, r1, r2; + + r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, + post_p, is_gimple_val, fb_rvalue); + r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p, + post_p, is_gimple_val, fb_rvalue); + r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p, + post_p, is_gimple_val, fb_rvalue); + + ret = MIN (MIN (r0, r1), r2); + break; + } + case tcc_declaration: case tcc_constant: ret = GS_ALL_DONE; goto dont_recalculate; default: - gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR - || TREE_CODE (*expr_p) == TRUTH_OR_EXPR - || TREE_CODE (*expr_p) == TRUTH_XOR_EXPR); - goto expr_2; + gcc_unreachable (); } recalculate_side_effects (*expr_p);