Patchwork [(2/7)] Widening multiplies by more than one mode

login
register
mail settings
Submitter Andrew Stubbs
Date July 14, 2011, 2:10 p.m.
Message ID <4E1EF8D5.1050901@codesourcery.com>
Download mbox | patch
Permalink /patch/104682/
State New
Headers show

Comments

Andrew Stubbs - July 14, 2011, 2:10 p.m.
On 12/07/11 12:04, Richard Guenther wrote:
> I wonder if we want to restrict the WIDEN_* operations to operate
> on types that have matching type/mode precision(**).

I've now modified the patch to allow bitfields, or other case where the 
precision is smaller than the mode-size. I've also addressed the 
formatting issues you pointed out (and in fact reorganised the code 
slightly to make the rest of the series a bit cleaner).

As in the previous version of this patch, it's necessary to convert the 
input values to the proper mode for the machine instruction, so the 
basic tools for supporting the bitfields were already there - I just had 
to tweak the conditionals to take bitfields into account.

The only this I haven't done is modified tree.def. Looking at it though, 
I don't thing any needs changing? The code is still valid, and the 
comments are correct (in fact, they may have been wrong before).

Is this version OK?

Andrew
Richard Guenther - July 14, 2011, 2:15 p.m.
On Thu, Jul 14, 2011 at 4:10 PM, Andrew Stubbs <ams@codesourcery.com> wrote:
> On 12/07/11 12:04, Richard Guenther wrote:
>>
>> I wonder if we want to restrict the WIDEN_* operations to operate
>> on types that have matching type/mode precision(**).
>
> I've now modified the patch to allow bitfields, or other case where the
> precision is smaller than the mode-size. I've also addressed the formatting
> issues you pointed out (and in fact reorganised the code slightly to make
> the rest of the series a bit cleaner).
>
> As in the previous version of this patch, it's necessary to convert the
> input values to the proper mode for the machine instruction, so the basic
> tools for supporting the bitfields were already there - I just had to tweak
> the conditionals to take bitfields into account.
>
> The only this I haven't done is modified tree.def. Looking at it though, I
> don't thing any needs changing? The code is still valid, and the comments
> are correct (in fact, they may have been wrong before).

Ah, it indeed talks about at least twice the precision already.

> Is this version OK?

Ok.

Thanks,
Richard.

> Andrew
>

Patch

2011-07-14  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/arm/arm.md (maddhidi4): Remove '*' from name.
	* expr.c (expand_expr_real_2): Use find_widening_optab_handler.
	* optabs.c (find_widening_optab_handler_and_mode): New function.
	(expand_widen_pattern_expr): Use find_widening_optab_handler.
	(expand_binop_directly): Likewise.
	(expand_binop): Likewise.
	* optabs.h (find_widening_optab_handler): New macro define.
	(find_widening_optab_handler_and_mode): New prototype.
	* tree-cfg.c (verify_gimple_assign_binary): Adjust WIDEN_MULT_EXPR
	type precision rules.
	(verify_gimple_assign_ternary): Likewise for WIDEN_MULT_PLUS_EXPR.
	* tree-ssa-math-opts.c (build_and_insert_cast): New function.
	(is_widening_mult_rhs_p): Allow widening by more than one mode.
	Explicitly disallow mis-matched input types.
	(convert_mult_to_widen): Use find_widening_optab_handler, and cast
	input types to fit the new handler.
	(convert_plusminus_to_widen): Likewise.

	gcc/testsuite/
	* gcc.target/arm/wmul-bitfield-1.c: New file.

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1857,7 +1857,7 @@ 
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*maddhidi4"
+(define_insn "maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
 	  (mult:DI (sign_extend:DI
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -7638,19 +7638,16 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 	{
 	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
 	  this_optab = usmul_widen_optab;
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode))
+	  if (find_widening_optab_handler (this_optab, mode, innermode, 0)
+		!= CODE_FOR_nothing)
 	    {
-	      if (widening_optab_handler (this_optab, mode, innermode)
-		    != CODE_FOR_nothing)
-		{
-		  if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
-				     EXPAND_NORMAL);
-		  else
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
-				     EXPAND_NORMAL);
-		  goto binop3;
-		}
+	      if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
+		expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+				 EXPAND_NORMAL);
+	      else
+		expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
+				 EXPAND_NORMAL);
+	      goto binop3;
 	    }
 	}
       /* Check for a multiplication with matching signedness.  */
@@ -7665,10 +7662,9 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 	  optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
 	  this_optab = zextend_p ? umul_widen_optab : smul_widen_optab;
 
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode)
-	      && TREE_CODE (treeop0) != INTEGER_CST)
+	  if (TREE_CODE (treeop0) != INTEGER_CST)
 	    {
-	      if (widening_optab_handler (this_optab, mode, innermode)
+	      if (find_widening_optab_handler (this_optab, mode, innermode, 0)
 		    != CODE_FOR_nothing)
 		{
 		  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
@@ -7677,7 +7673,7 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 					       unsignedp, this_optab);
 		  return REDUCE_BIT_FIELD (temp);
 		}
-	      if (widening_optab_handler (other_optab, mode, innermode)
+	      if (find_widening_optab_handler (other_optab, mode, innermode, 0)
 		    != CODE_FOR_nothing
 		  && innermode == word_mode)
 		{
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -225,6 +225,37 @@  add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
   return 1;
 }
 
+/* Find a widening optab even if it doesn't widen as much as we want.
+   E.g. if from_mode is HImode, and to_mode is DImode, and there is no
+   direct HI->SI insn, then return SI->DI, if that exists.
+   If PERMIT_NON_WIDENING is non-zero then this can be used with
+   non-widening optabs also.  */
+
+enum insn_code
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+				      enum machine_mode from_mode,
+				      int permit_non_widening,
+				      enum machine_mode *found_mode)
+{
+  for (; (permit_non_widening || from_mode != to_mode)
+	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
+	 && from_mode != VOIDmode;
+       from_mode = GET_MODE_WIDER_MODE (from_mode))
+    {
+      enum insn_code handler = widening_optab_handler (op, to_mode,
+						       from_mode);
+
+      if (handler != CODE_FOR_nothing)
+	{
+	  if (found_mode)
+	    *found_mode = from_mode;
+	  return handler;
+	}
+    }
+
+  return CODE_FOR_nothing;
+}
+
 /* Widen OP to MODE and return the rtx for the widened operand.  UNSIGNEDP
    says whether OP is signed or unsigned.  NO_EXTEND is nonzero if we need
    not actually do a sign-extend or zero-extend, but can leave the
@@ -515,8 +546,9 @@  expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
     optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
   if (ops->code == WIDEN_MULT_PLUS_EXPR
       || ops->code == WIDEN_MULT_MINUS_EXPR)
-    icode = widening_optab_handler (widen_pattern_optab,
-				    TYPE_MODE (TREE_TYPE (ops->op2)), tmode0);
+    icode = find_widening_optab_handler (widen_pattern_optab,
+					 TYPE_MODE (TREE_TYPE (ops->op2)),
+					 tmode0, 0);
   else
     icode = optab_handler (widen_pattern_optab, tmode0);
   gcc_assert (icode != CODE_FOR_nothing);
@@ -1243,7 +1275,8 @@  expand_binop_directly (enum machine_mode mode, optab binoptab,
 		       rtx last)
 {
   enum machine_mode from_mode = GET_MODE (op0);
-  enum insn_code icode = widening_optab_handler (binoptab, mode, from_mode);
+  enum insn_code icode = find_widening_optab_handler (binoptab, mode,
+						      from_mode, 1);
   enum machine_mode xmode0 = insn_data[(int) icode].operand[1].mode;
   enum machine_mode xmode1 = insn_data[(int) icode].operand[2].mode;
   enum machine_mode mode0, mode1, tmp_mode;
@@ -1390,7 +1423,7 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
   /* If we can do it with a three-operand insn, do so.  */
 
   if (methods != OPTAB_MUST_WIDEN
-      && widening_optab_handler (binoptab, mode, GET_MODE (op0))
+      && find_widening_optab_handler (binoptab, mode, GET_MODE (op0), 1)
 	    != CODE_FOR_nothing)
     {
       temp = expand_binop_directly (mode, binoptab, op0, op1, target,
@@ -1464,10 +1497,11 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 		!= CODE_FOR_nothing
 	    || (binoptab == smul_optab
 		&& GET_MODE_WIDER_MODE (wider_mode) != VOIDmode
-		&& (widening_optab_handler ((unsignedp ? umul_widen_optab
-						       : smul_widen_optab),
-					    GET_MODE_WIDER_MODE (wider_mode),
-					    mode)
+		&& (find_widening_optab_handler ((unsignedp
+						  ? umul_widen_optab
+						  : smul_widen_optab),
+						 GET_MODE_WIDER_MODE (wider_mode),
+						 mode, 0)
 		    != CODE_FOR_nothing)))
 	  {
 	    rtx xop0 = op0, xop1 = op1;
@@ -2002,7 +2036,7 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	   wider_mode != VOIDmode;
 	   wider_mode = GET_MODE_WIDER_MODE (wider_mode))
 	{
-	  if (widening_optab_handler (binoptab, wider_mode, mode)
+	  if (find_widening_optab_handler (binoptab, wider_mode, mode, 1)
 		  != CODE_FOR_nothing
 	      || (methods == OPTAB_LIB
 		  && optab_libfunc (binoptab, wider_mode)))
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -807,6 +807,15 @@  extern rtx expand_copysign (rtx, rtx, rtx);
 extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
+/* Find a widening optab even if it doesn't widen as much as we want.  */
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+							    enum machine_mode,
+							    enum machine_mode,
+							    int,
+							    enum machine_mode *);
+
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
    distinguish between machines with a vector shift that takes a scalar for the
    shift amount vs. machines that take a vector for the shift amount.  */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+struct bf
+{
+  int a : 3;
+  int b : 15;
+  int c : 3;
+};
+
+long long
+foo (long long a, struct bf b, struct bf c)
+{
+  return a + b.b * c.b;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3577,7 +3577,7 @@  do_pointer_plus_expr_check:
     case WIDEN_MULT_EXPR:
       if (TREE_CODE (lhs_type) != INTEGER_TYPE)
 	return true;
-      return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type))
+      return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))
 	      || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)));
 
     case WIDEN_SUM_EXPR:
@@ -3668,7 +3668,7 @@  verify_gimple_assign_ternary (gimple stmt)
 	   && !FIXED_POINT_TYPE_P (rhs1_type))
 	  || !useless_type_conversion_p (rhs1_type, rhs2_type)
 	  || !useless_type_conversion_p (lhs_type, rhs3_type)
-	  || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)
+	  || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)
 	  || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))
 	{
 	  error ("type mismatch in widening multiply-accumulate expression");
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1086,6 +1086,16 @@  build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
   return result;
 }
 
+/* Build a gimple assignment to cast VAL to TARGET.  Insert the statement
+   prior to GSI's current position, and return the fresh SSA name.  */
+
+static tree
+build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
+		       tree target, tree val)
+{
+  return build_and_insert_binop (gsi, loc, target, CONVERT_EXPR, val, NULL);
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1958,8 +1968,8 @@  struct gimple_opt_pass pass_optimize_bswap =
 /* Return true if RHS is a suitable operand for a widening multiplication.
    There are two cases:
 
-     - RHS makes some value twice as wide.  Store that value in *NEW_RHS_OUT
-       if so, and store its type in *TYPE_OUT.
+     - RHS makes some value at least twice as wide.  Store that value
+       in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
 
      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
        but leave *TYPE_OUT untouched.  */
@@ -1987,7 +1997,7 @@  is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
       rhs1 = gimple_assign_rhs1 (stmt);
       type1 = TREE_TYPE (rhs1);
       if (TREE_CODE (type1) != TREE_CODE (type)
-	  || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
+	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
 	return false;
 
       *new_rhs_out = rhs1;
@@ -2043,6 +2053,10 @@  is_widening_mult_p (gimple stmt,
       *type2_out = *type1_out;
     }
 
+  /* FIXME: remove this restriction.  */
+  if (TYPE_PRECISION (*type1_out) != TYPE_PRECISION (*type2_out))
+    return false;
+
   return true;
 }
 
@@ -2051,12 +2065,14 @@  is_widening_mult_p (gimple stmt,
    value is true iff we converted the statement.  */
 
 static bool
-convert_mult_to_widen (gimple stmt)
+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
-  tree lhs, rhs1, rhs2, type, type1, type2;
+  tree lhs, rhs1, rhs2, type, type1, type2, tmp;
   enum insn_code handler;
-  enum machine_mode to_mode, from_mode;
+  enum machine_mode to_mode, from_mode, actual_mode;
   optab op;
+  int actual_precision;
+  location_t loc = gimple_location (stmt);
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2076,13 +2092,32 @@  convert_mult_to_widen (gimple stmt)
   else
     op = usmul_widen_optab;
 
-  handler = widening_optab_handler (op, to_mode, from_mode);
+  handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
+						  0, &actual_mode);
 
   if (handler == CODE_FOR_nothing)
     return false;
 
-  gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
-  gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
+  /* Ensure that the inputs to the handler are in the correct precison
+     for the opcode.  This will be the full mode size.  */
+  actual_precision = GET_MODE_PRECISION (actual_mode);
+  if (actual_precision != TYPE_PRECISION (type1))
+    {
+      tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, TYPE_UNSIGNED (type1)),
+			    NULL);
+      rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
+
+      /* Reuse the same type info, if possible.  */
+      if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
+	tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, TYPE_UNSIGNED (type2)),
+			      NULL);
+      rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
+    }
+
+  gimple_assign_set_rhs1 (stmt, rhs1);
+  gimple_assign_set_rhs2 (stmt, rhs2);
   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
   update_stmt (stmt);
   widen_mul_stats.widen_mults_inserted++;
@@ -2100,11 +2135,15 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 			    enum tree_code code)
 {
   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
-  tree type, type1, type2;
+  tree type, type1, type2, tmp;
   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
   optab this_optab;
   enum tree_code wmult_code;
+  enum insn_code handler;
+  enum machine_mode to_mode, from_mode, actual_mode;
+  location_t loc = gimple_location (stmt);
+  int actual_precision;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2138,39 +2177,33 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   else
     return false;
 
-  if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
+  /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
+     is_widening_mult_p, but we still need the rhs returns.
+
+     It might also appear that it would be sufficient to use the existing
+     operands of the widening multiply, but that would limit the choice of
+     multiply-and-accumulate instructions.  */
+  if (code == PLUS_EXPR
+      && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
     {
       if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs2;
     }
-  else if (rhs2_code == MULT_EXPR)
+  else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
     {
       if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs1;
     }
-  else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs2;
-    }
-  else if (rhs2_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs1;
-    }
   else
     return false;
 
+  to_mode = TYPE_MODE (type);
+  from_mode = TYPE_MODE (type1);
+
   if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
     return false;
 
@@ -2178,15 +2211,26 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
      accumulate in this mode/signedness combination, otherwise
      this transformation is likely to pessimize code.  */
   this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
-  if (widening_optab_handler (this_optab, TYPE_MODE (type), TYPE_MODE (type1))
-	== CODE_FOR_nothing)
+  handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
+						  from_mode, 0, &actual_mode);
+
+  if (handler == CODE_FOR_nothing)
     return false;
 
-  /* ??? May need some type verification here?  */
+  /* Ensure that the inputs to the handler are in the correct precison
+     for the opcode.  This will be the full mode size.  */
+  actual_precision = GET_MODE_PRECISION (actual_mode);
+  if (actual_precision != TYPE_PRECISION (type1))
+    {
+      tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, TYPE_UNSIGNED (type1)),
+			    NULL);
+
+      mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
+      mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
+    }
 
-  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
-				    fold_convert (type1, mult_rhs1),
-				    fold_convert (type2, mult_rhs2),
+  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
 				    add_rhs);
   update_stmt (gsi_stmt (*gsi));
   widen_mul_stats.maccs_inserted++;
@@ -2398,7 +2442,7 @@  execute_optimize_widening_mul (void)
 	      switch (code)
 		{
 		case MULT_EXPR:
-		  if (!convert_mult_to_widen (stmt)
+		  if (!convert_mult_to_widen (stmt, &gsi)
 		      && convert_mult_to_fma (stmt,
 					      gimple_assign_rhs1 (stmt),
 					      gimple_assign_rhs2 (stmt)))