diff mbox

Add FMA_EXPR, un-cse multiplications before expansion

Message ID 4CC20799.9070300@redhat.com
State New
Headers show

Commit Message

Richard Henderson Oct. 22, 2010, 9:52 p.m. UTC
On 10/22/2010 02:12 PM, Richard Henderson wrote:
> On 10/21/2010 02:41 AM, Richard Guenther wrote:
>> +   /* Leave single uses to the RTL combiner, we otherwise regress
>> +      in some circumstances.  */
>> +   if (single_imm_use (mul_result, &use_p, &use_stmt))
>> +     return false;
> 
> Like what?  We can't honor those PAREN_EXPR rules in rtl, and
> we can't leave this to combine without that.
> 
> Ignoring that, consider the following addition to your patch,
> which handles the negate during expansion via 4 optabs.

Consider the following patch as well, to make sure that FMA = FMA
during other CSE.


r~

Comments

H.J. Lu Oct. 22, 2010, 9:53 p.m. UTC | #1
On Fri, Oct 22, 2010 at 2:52 PM, Richard Henderson <rth@redhat.com> wrote:
> On 10/22/2010 02:12 PM, Richard Henderson wrote:
>> On 10/21/2010 02:41 AM, Richard Guenther wrote:
>>> +   /* Leave single uses to the RTL combiner, we otherwise regress
>>> +      in some circumstances.  */
>>> +   if (single_imm_use (mul_result, &use_p, &use_stmt))
>>> +     return false;
>>
>> Like what?  We can't honor those PAREN_EXPR rules in rtl, and
>> we can't leave this to combine without that.
>>
>> Ignoring that, consider the following addition to your patch,
>> which handles the negate during expansion via 4 optabs.
>
> Consider the following patch as well, to make sure that FMA = FMA
> during other CSE.
>

One of FMA changes caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46142
Richard Biener Oct. 23, 2010, 6:57 a.m. UTC | #2
On Fri, Oct 22, 2010 at 11:52 PM, Richard Henderson <rth@redhat.com> wrote:
> On 10/22/2010 02:12 PM, Richard Henderson wrote:
>> On 10/21/2010 02:41 AM, Richard Guenther wrote:
>>> +   /* Leave single uses to the RTL combiner, we otherwise regress
>>> +      in some circumstances.  */
>>> +   if (single_imm_use (mul_result, &use_p, &use_stmt))
>>> +     return false;
>>
>> Like what?  We can't honor those PAREN_EXPR rules in rtl, and
>> we can't leave this to combine without that.
>>
>> Ignoring that, consider the following addition to your patch,
>> which handles the negate during expansion via 4 optabs.
>
> Consider the following patch as well, to make sure that FMA = FMA
> during other CSE.

Looks ok independent of the other patches.  I guess we have to fixup the
vectorizer then to handle FMA_EXPR as well though.

Richard.

>
> r~
>
diff mbox

Patch

diff --git a/gcc/builtins.c b/gcc/builtins.c
index ca69efa..8ee3ba3 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -9265,6 +9265,27 @@  fold_builtin_abs (location_t loc, tree arg, tree type)
   return fold_build1_loc (loc, ABS_EXPR, type, arg);
 }
 
+/* Fold a call to fma, fmaf, or fmal with arguments ARG[012].  */
+
+static tree
+fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type)
+{
+  if (validate_arg (arg0, REAL_TYPE)
+      && validate_arg(arg1, REAL_TYPE)
+      && validate_arg(arg2, REAL_TYPE))
+    {
+      if (TREE_CODE (arg0) == REAL_CST
+	  && TREE_CODE (arg1) == REAL_CST
+	  && TREE_CODE (arg2) == REAL_CST)
+	return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+
+      /* ??? Only expand to FMA_EXPR if it's directly supported.  */
+      if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
+        return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2);
+    }
+  return NULL_TREE;
+}
+
 /* Fold a call to builtin fmin or fmax.  */
 
 static tree
@@ -10539,10 +10560,7 @@  fold_builtin_3 (location_t loc, tree fndecl,
       return fold_builtin_sincos (loc, arg0, arg1, arg2);
 
     CASE_FLT_FN (BUILT_IN_FMA):
-      if (validate_arg (arg0, REAL_TYPE)
-	  && validate_arg(arg1, REAL_TYPE)
-	  && validate_arg(arg2, REAL_TYPE))
-	return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma);
+      return fold_builtin_fma (loc, arg0, arg1, arg2, type);
     break;
 
     CASE_FLT_FN (BUILT_IN_REMQUO):
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 4753c4b..bb99a8f 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7174,6 +7174,16 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  ret = gimplify_omp_atomic (expr_p, pre_p);
 	  break;
 
+	case TRUTH_AND_EXPR:
+	case TRUTH_OR_EXPR:
+	case TRUTH_XOR_EXPR:
+	  /* Classified as tcc_expression.  */
+	  goto expr_2;
+
+	case FMA_EXPR:
+	  /* Classified as tcc_expression.  */
+	  goto expr_3;
+
 	case POINTER_PLUS_EXPR:
           /* Convert ((type *)A)+offset into &A->field_of_type_and_offset.
 	     The second is gimple immediate saving a need for extra statement.
@@ -7253,16 +7263,28 @@  gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 		break;
 	      }
 
+	    expr_3:
+	      {
+		enum gimplify_status r0, r1, r2;
+
+		r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
+		                    post_p, is_gimple_val, fb_rvalue);
+		r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
+				    post_p, is_gimple_val, fb_rvalue);
+		r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
+				    post_p, is_gimple_val, fb_rvalue);
+
+		ret = MIN (MIN (r0, r1), r2);
+		break;
+	      }
+
 	    case tcc_declaration:
 	    case tcc_constant:
 	      ret = GS_ALL_DONE;
 	      goto dont_recalculate;
 
 	    default:
-	      gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR
-			  || TREE_CODE (*expr_p) == TRUTH_OR_EXPR
-			  || TREE_CODE (*expr_p) == TRUTH_XOR_EXPR);
-	      goto expr_2;
+	      gcc_unreachable ();
 	    }
 
 	  recalculate_side_effects (*expr_p);