[(3/7)] Widening multiply-and-accumulate pattern matching

Submitted by Andrew Stubbs on July 14, 2011, 2:16 p.m.

Details

Message ID 4E1EFA53.3040104@codesourcery.com
State New
Headers show

Commit Message

Andrew Stubbs July 14, 2011, 2:16 p.m.
This update changes only the context modified by changes to patch 2. The 
patch has already been approved. I'm just posting it for completeness.

Andrew

Comments

Janis Johnson July 18, 2011, 11:33 p.m.
On 07/14/2011 07:16 AM, Andrew Stubbs wrote:
> { dg-options "-O2 -march=armv7-a" }

The tests use "{ dg-options "-O2 -march=armv7-a" }" but -march will be
overridden for multilibs that specify -march, and might conflict with
other multilib options.  If you really need that particular -march value
then use dg-skip-if to skip multilibs with conflicting or overriding
options, or else "dg-require-effective-target arm_dsp" to only run the
tests when the multilib already supports it; that has the advantage of
testing a wider range of arch values.

Janis
Andrew Stubbs July 19, 2011, 8:28 a.m.
On 19/07/11 00:33, Janis Johnson wrote:
> On 07/14/2011 07:16 AM, Andrew Stubbs wrote:
>> { dg-options "-O2 -march=armv7-a" }
>
> The tests use "{ dg-options "-O2 -march=armv7-a" }" but -march will be
> overridden for multilibs that specify -march, and might conflict with
> other multilib options.  If you really need that particular -march value
> then use dg-skip-if to skip multilibs with conflicting or overriding
> options, or else "dg-require-effective-target arm_dsp" to only run the
> tests when the multilib already supports it; that has the advantage of
> testing a wider range of arch values.

Yes, I know about this one. You committed that feature since I first 
posted this, I think? I plan to make that change when I do the final commit.

Andrew

Patch hide | download patch | download mbox

2011-07-14  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* tree-ssa-math-opts.c (convert_plusminus_to_widen): Permit a single
	conversion statement separating multiply-and-accumulate.

	gcc/testsuite/
	* gcc.target/arm/wmul-5.c: New file.
	* gcc.target/arm/no-wmla-1.c: New file.

--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/no-wmla-1.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int a, short b, short c)
+{
+     int bc = b * c;
+        return a + (short)bc;
+}
+
+/* { dg-final { scan-assembler "mul" } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-5.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, char *b, char *c)
+{
+  return a + *b * *c;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2135,6 +2135,7 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 			    enum tree_code code)
 {
   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
+  gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
   tree type, type1, type2, tmp;
   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
@@ -2177,6 +2178,38 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   else
     return false;
 
+  /* Allow for one conversion statement between the multiply
+     and addition/subtraction statement.  If there are more than
+     one conversions then we assume they would invalidate this
+     transformation.  If that's not the case then they should have
+     been folded before now.  */
+  if (CONVERT_EXPR_CODE_P (rhs1_code))
+    {
+      conv1_stmt = rhs1_stmt;
+      rhs1 = gimple_assign_rhs1 (rhs1_stmt);
+      if (TREE_CODE (rhs1) == SSA_NAME)
+	{
+	  rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+	  if (is_gimple_assign (rhs1_stmt))
+	    rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
+	}
+      else
+	return false;
+    }
+  if (CONVERT_EXPR_CODE_P (rhs2_code))
+    {
+      conv2_stmt = rhs2_stmt;
+      rhs2 = gimple_assign_rhs1 (rhs2_stmt);
+      if (TREE_CODE (rhs2) == SSA_NAME)
+	{
+	  rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
+	  if (is_gimple_assign (rhs2_stmt))
+	    rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
+	}
+      else
+	return false;
+    }
+
   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
      is_widening_mult_p, but we still need the rhs returns.
 
@@ -2190,6 +2223,7 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs2;
+      conv_stmt = conv1_stmt;
     }
   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
     {
@@ -2197,6 +2231,7 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs1;
+      conv_stmt = conv2_stmt;
     }
   else
     return false;
@@ -2207,6 +2242,33 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
     return false;
 
+  /* If there was a conversion between the multiply and addition
+     then we need to make sure it fits a multiply-and-accumulate.
+     The should be a single mode change which does not change the
+     value.  */
+  if (conv_stmt)
+    {
+      tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
+      tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
+      int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
+      bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
+
+      if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
+	{
+	  /* Conversion is a truncate.  */
+	  if (TYPE_PRECISION (to_type) < data_size)
+	    return false;
+	}
+      else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
+	{
+	  /* Conversion is an extend.  Check it's the right sort.  */
+	  if (TYPE_UNSIGNED (from_type) != is_unsigned
+	      && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
+	    return false;
+	}
+      /* else convert is a no-op for our purposes.  */
+    }
+
   /* Verify that the machine can perform a widening multiply
      accumulate in this mode/signedness combination, otherwise
      this transformation is likely to pessimize code.  */