Patchwork [loop] handle fma in loop unroller

login
register
mail settings
Submitter Richard Henderson
Date Nov. 12, 2010, 12:50 a.m.
Message ID <4CDC8F42.7020504@redhat.com>
Download mbox | patch
Permalink /patch/70897/
State New
Headers show

Comments

Richard Henderson - Nov. 12, 2010, 12:50 a.m.
The FMA conversion patch I posted for ppc has a regression in

  gcc.dg/var-expand3.c

(which ought to be under gcc.target/powerpc but isn't).  

When fma is represented as (plus (mult)), the loop unroller would
perform the accumulation transform.  With the new FMA rtx code,
this tranformation no longer happened.

I only transform FMA if the accumulator is in the ADD portion.
Thus the initialization and finalization of the loop is as for PLUS.

Tested on x86_64-linux; full test on ppc64 is still going.

Ok?


r~
* optabs.c (init_optabs): Init {fma,fms,fnma,fnms}_optab properly.
	* loop-unroll.c (analyze_insn_to_expand_var): Accept accumulation
	via FMA if unsafe math.
	(insert_var_expansion_initialization): Handle FMA.
	(combine_var_copies_in_loop_exit): Likewise.
Zdenek Dvorak - Nov. 15, 2010, 2:21 p.m.
Hi,

> The FMA conversion patch I posted for ppc has a regression in
> 
>   gcc.dg/var-expand3.c
> 
> (which ought to be under gcc.target/powerpc but isn't).  
> 
> When fma is represented as (plus (mult)), the loop unroller would
> perform the accumulation transform.  With the new FMA rtx code,
> this tranformation no longer happened.
> 
> I only transform FMA if the accumulator is in the ADD portion.
> Thus the initialization and finalization of the loop is as for PLUS.
> 
> Tested on x86_64-linux; full test on ppc64 is still going.
> 
> Ok?

OK,

Zdenek

Patch

diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
index 47bfa05..67d6ea0 100644
--- a/gcc/loop-unroll.c
+++ b/gcc/loop-unroll.c
@@ -1616,10 +1616,10 @@  reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
 static struct var_to_expand *
 analyze_insn_to_expand_var (struct loop *loop, rtx insn)
 {
-  rtx set, dest, src, op1, op2, something;
+  rtx set, dest, src;
   struct var_to_expand *ves;
-  enum machine_mode mode1, mode2;
   unsigned accum_pos;
+  enum rtx_code code;
   int debug_uses = 0;
 
   set = single_set (insn);
@@ -1628,12 +1628,20 @@  analyze_insn_to_expand_var (struct loop *loop, rtx insn)
 
   dest = SET_DEST (set);
   src = SET_SRC (set);
+  code = GET_CODE (src);
 
-  if (GET_CODE (src) != PLUS
-      && GET_CODE (src) != MINUS
-      && GET_CODE (src) != MULT)
+  if (code != PLUS && code != MINUS && code != MULT && code != FMA)
     return NULL;
 
+  if (FLOAT_MODE_P (GET_MODE (dest)))
+    {
+      if (!flag_associative_math)
+        return NULL;
+      /* In the case of FMA, we're also changing the rounding.  */
+      if (code == FMA && !flag_unsafe_math_optimizations)
+	return NULL;
+    }
+
   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
      in MD.  But if there is no optab to generate the insn, we can not
      perform the variable expansion.  This can happen if an MD provides
@@ -1643,54 +1651,57 @@  analyze_insn_to_expand_var (struct loop *loop, rtx insn)
      So we check have_insn_for which looks for an optab for the operation
      in SRC.  If it doesn't exist, we can't perform the expansion even
      though INSN is valid.  */
-  if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
+  if (!have_insn_for (code, GET_MODE (src)))
     return NULL;
 
-  op1 = XEXP (src, 0);
-  op2 = XEXP (src, 1);
-
   if (!REG_P (dest)
       && !(GET_CODE (dest) == SUBREG
            && REG_P (SUBREG_REG (dest))))
     return NULL;
 
-  if (rtx_equal_p (dest, op1))
+  /* Find the accumulator use within the operation.  */
+  if (code == FMA)
+    {
+      /* We only support accumulation via FMA in the ADD position.  */
+      if (!rtx_equal_p  (dest, XEXP (src, 2)))
+	return NULL;
+      accum_pos = 2;
+    }
+  else if (rtx_equal_p (dest, XEXP (src, 0)))
     accum_pos = 0;
-  else if (rtx_equal_p (dest, op2))
-    accum_pos = 1;
+  else if (rtx_equal_p (dest, XEXP (src, 1)))
+    {
+      /* The method of expansion that we are using; which includes the
+	 initialization of the expansions with zero and the summation of
+         the expansions at the end of the computation will yield wrong
+	 results for (x = something - x) thus avoid using it in that case.  */
+      if (code == MINUS)
+	return NULL;
+      accum_pos = 1;
+    }
   else
     return NULL;
 
-  /* The method of expansion that we are using; which includes
-     the initialization of the expansions with zero and the summation of
-     the expansions at the end of the computation will yield wrong results
-     for (x = something - x) thus avoid using it in that case.  */
-  if (accum_pos == 1
-      && GET_CODE (src) == MINUS)
-   return NULL;
-
-  something = (accum_pos == 0) ? op2 : op1;
-
-  if (rtx_referenced_p (dest, something))
+  /* It must not otherwise be used.  */
+  if (code == FMA)
+    {
+      if (rtx_referenced_p (dest, XEXP (src, 0))
+	  || rtx_referenced_p (dest, XEXP (src, 1)))
+	return NULL;
+    }
+  else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
     return NULL;
 
+  /* It must be used in exactly one insn.  */
   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
     return NULL;
 
-  mode1 = GET_MODE (dest);
-  mode2 = GET_MODE (something);
-  if ((FLOAT_MODE_P (mode1)
-       || FLOAT_MODE_P (mode2))
-      && !flag_associative_math)
-    return NULL;
-
   if (dump_file)
-  {
-    fprintf (dump_file,
-    "\n;; Expanding Accumulator ");
-    print_rtl (dump_file, dest);
-    fprintf (dump_file, "\n");
-  }
+    {
+      fprintf (dump_file, "\n;; Expanding Accumulator ");
+      print_rtl (dump_file, dest);
+      fprintf (dump_file, "\n");
+    }
 
   if (debug_uses)
     /* Instead of resetting the debug insns, we could replace each
@@ -2123,22 +2134,33 @@  insert_var_expansion_initialization (struct var_to_expand *ve,
     return;
 
   start_sequence ();
-  if (ve->op == PLUS || ve->op == MINUS)
-    FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
-      {
-	if (honor_signed_zero_p)
-	  zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
-	else
-	  zero_init = CONST0_RTX (mode);
+  switch (ve->op)
+    {
+    case FMA:
+      /* Note that we only accumulate FMA via the ADD operand.  */
+    case PLUS:
+    case MINUS:
+      FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+        {
+	  if (honor_signed_zero_p)
+	    zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
+	  else
+	    zero_init = CONST0_RTX (mode);
+          emit_move_insn (var, zero_init);
+        }
+      break;
 
-        emit_move_insn (var, zero_init);
-      }
-  else if (ve->op == MULT)
-    FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
-      {
-        zero_init =  CONST1_RTX (GET_MODE (var));
-        emit_move_insn (var, zero_init);
-      }
+    case MULT:
+      FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+        {
+          zero_init = CONST1_RTX (GET_MODE (var));
+          emit_move_insn (var, zero_init);
+        }
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
 
   seq = get_insns ();
   end_sequence ();
@@ -2165,18 +2187,24 @@  combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
     return;
 
   start_sequence ();
-  if (ve->op == PLUS || ve->op == MINUS)
-    FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
-      {
-        sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
-                                   var, sum);
-      }
-  else if (ve->op == MULT)
-    FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
-      {
-        sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
-                                   var, sum);
-      }
+  switch (ve->op)
+    {
+    case FMA:
+      /* Note that we only accumulate FMA via the ADD operand.  */
+    case PLUS:
+    case MINUS:
+      FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+	sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
+      break;
+
+    case MULT:
+      FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var)
+	sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
 
   expr = force_operand (sum, ve->reg);
   if (expr != ve->reg)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index a96eea1..28c433b 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6180,6 +6180,10 @@  init_optabs (void)
   init_optab (umax_optab, UMAX);
   init_optab (pow_optab, UNKNOWN);
   init_optab (atan2_optab, UNKNOWN);
+  init_optab (fma_optab, FMA);
+  init_optab (fms_optab, UNKNOWN);
+  init_optab (fnma_optab, UNKNOWN);
+  init_optab (fnms_optab, UNKNOWN);
 
   /* These three have codes assigned exclusively for the sake of
      have_insn_for.  */