Patchwork [(2/7)] Widening multiplies by more than one mode

login
register
mail settings
Submitter Andrew Stubbs
Date Aug. 19, 2011, 2:25 p.m.
Message ID <4E4E7242.8010601@codesourcery.com>
Download mbox | patch
Permalink /patch/110679/
State New
Headers show

Comments

Andrew Stubbs - Aug. 19, 2011, 2:25 p.m.
On 14/07/11 15:15, Richard Guenther wrote:
>> Is this version OK?
> Ok.

I've just committed this slightly updated patch.

I found some bugs while testing, now fixed. Most of the changes in this 
patch are context changes, and using widened_mode to handle VOIDmode 
constants.

Andrew

Patch

2011-08-19  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/arm/arm.md (maddhidi4): Remove '*' from name.
	* expr.c (expand_expr_real_2): Use find_widening_optab_handler.
	* optabs.c (find_widening_optab_handler_and_mode): New function.
	(expand_widen_pattern_expr): Use find_widening_optab_handler.
	(expand_binop_directly): Likewise.
	(expand_binop): Likewise.
	* optabs.h (find_widening_optab_handler): New macro define.
	(find_widening_optab_handler_and_mode): New prototype.
	* tree-cfg.c (verify_gimple_assign_binary): Adjust WIDEN_MULT_EXPR
	type precision rules.
	(verify_gimple_assign_ternary): Likewise for WIDEN_MULT_PLUS_EXPR.
	* tree-ssa-math-opts.c (build_and_insert_cast): New function.
	(is_widening_mult_rhs_p): Allow widening by more than one mode.
	Explicitly disallow mis-matched input types.
	(convert_mult_to_widen): Use find_widening_optab_handler, and cast
	input types to fit the new handler.
	(convert_plusminus_to_widen): Likewise.

	gcc/testsuite/
	* gcc.target/arm/wmul-bitfield-1.c: New file.

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1857,7 +1857,7 @@ 
    (set_attr "predicable" "yes")]
 )
 
-(define_insn "*maddhidi4"
+(define_insn "maddhidi4"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(plus:DI
 	  (mult:DI (sign_extend:DI
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8003,19 +8003,16 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 	{
 	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (treeop0));
 	  this_optab = usmul_widen_optab;
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode))
+	  if (find_widening_optab_handler (this_optab, mode, innermode, 0)
+		!= CODE_FOR_nothing)
 	    {
-	      if (widening_optab_handler (this_optab, mode, innermode)
-		    != CODE_FOR_nothing)
-		{
-		  if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
-				     EXPAND_NORMAL);
-		  else
-		    expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
-				     EXPAND_NORMAL);
-		  goto binop3;
-		}
+	      if (TYPE_UNSIGNED (TREE_TYPE (treeop0)))
+		expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
+				 EXPAND_NORMAL);
+	      else
+		expand_operands (treeop0, treeop1, NULL_RTX, &op1, &op0,
+				 EXPAND_NORMAL);
+	      goto binop3;
 	    }
 	}
       /* Check for a multiplication with matching signedness.  */
@@ -8030,10 +8027,9 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 	  optab other_optab = zextend_p ? smul_widen_optab : umul_widen_optab;
 	  this_optab = zextend_p ? umul_widen_optab : smul_widen_optab;
 
-	  if (mode == GET_MODE_2XWIDER_MODE (innermode)
-	      && TREE_CODE (treeop0) != INTEGER_CST)
+	  if (TREE_CODE (treeop0) != INTEGER_CST)
 	    {
-	      if (widening_optab_handler (this_optab, mode, innermode)
+	      if (find_widening_optab_handler (this_optab, mode, innermode, 0)
 		    != CODE_FOR_nothing)
 		{
 		  expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1,
@@ -8042,7 +8038,7 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 					       unsignedp, this_optab);
 		  return REDUCE_BIT_FIELD (temp);
 		}
-	      if (widening_optab_handler (other_optab, mode, innermode)
+	      if (find_widening_optab_handler (other_optab, mode, innermode, 0)
 		    != CODE_FOR_nothing
 		  && innermode == word_mode)
 		{
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -249,6 +249,37 @@  widened_mode (enum machine_mode to_mode, rtx op0, rtx op1)
   return result;
 }
 
+/* Find a widening optab even if it doesn't widen as much as we want.
+   E.g. if from_mode is HImode, and to_mode is DImode, and there is no
+   direct HI->SI insn, then return SI->DI, if that exists.
+   If PERMIT_NON_WIDENING is non-zero then this can be used with
+   non-widening optabs also.  */
+
+enum insn_code
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+				      enum machine_mode from_mode,
+				      int permit_non_widening,
+				      enum machine_mode *found_mode)
+{
+  for (; (permit_non_widening || from_mode != to_mode)
+	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
+	 && from_mode != VOIDmode;
+       from_mode = GET_MODE_WIDER_MODE (from_mode))
+    {
+      enum insn_code handler = widening_optab_handler (op, to_mode,
+						       from_mode);
+
+      if (handler != CODE_FOR_nothing)
+	{
+	  if (found_mode)
+	    *found_mode = from_mode;
+	  return handler;
+	}
+    }
+
+  return CODE_FOR_nothing;
+}
+
 /* Widen OP to MODE and return the rtx for the widened operand.  UNSIGNEDP
    says whether OP is signed or unsigned.  NO_EXTEND is nonzero if we need
    not actually do a sign-extend or zero-extend, but can leave the
@@ -539,8 +570,9 @@  expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op,
     optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
   if (ops->code == WIDEN_MULT_PLUS_EXPR
       || ops->code == WIDEN_MULT_MINUS_EXPR)
-    icode = widening_optab_handler (widen_pattern_optab,
-				    TYPE_MODE (TREE_TYPE (ops->op2)), tmode0);
+    icode = find_widening_optab_handler (widen_pattern_optab,
+					 TYPE_MODE (TREE_TYPE (ops->op2)),
+					 tmode0, 0);
   else
     icode = optab_handler (widen_pattern_optab, tmode0);
   gcc_assert (icode != CODE_FOR_nothing);
@@ -1267,7 +1299,8 @@  expand_binop_directly (enum machine_mode mode, optab binoptab,
 		       rtx last)
 {
   enum machine_mode from_mode = widened_mode (mode, op0, op1);
-  enum insn_code icode = widening_optab_handler (binoptab, mode, from_mode);
+  enum insn_code icode = find_widening_optab_handler (binoptab, mode,
+						      from_mode, 1);
   enum machine_mode xmode0 = insn_data[(int) icode].operand[1].mode;
   enum machine_mode xmode1 = insn_data[(int) icode].operand[2].mode;
   enum machine_mode mode0, mode1, tmp_mode;
@@ -1414,8 +1447,8 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
   /* If we can do it with a three-operand insn, do so.  */
 
   if (methods != OPTAB_MUST_WIDEN
-      && widening_optab_handler (binoptab, mode,
-				 widened_mode (mode, op0, op1))
+      && find_widening_optab_handler (binoptab, mode,
+				      widened_mode (mode, op0, op1), 1)
 	    != CODE_FOR_nothing)
     {
       temp = expand_binop_directly (mode, binoptab, op0, op1, target,
@@ -1488,10 +1521,11 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
 	    || (binoptab == smul_optab
 		&& GET_MODE_WIDER_MODE (wider_mode) != VOIDmode
-		&& (widening_optab_handler ((unsignedp ? umul_widen_optab
-						       : smul_widen_optab),
-					    GET_MODE_WIDER_MODE (wider_mode),
-					    mode)
+		&& (find_widening_optab_handler ((unsignedp
+						  ? umul_widen_optab
+						  : smul_widen_optab),
+						 GET_MODE_WIDER_MODE (wider_mode),
+						 mode, 0)
 		    != CODE_FOR_nothing)))
 	  {
 	    rtx xop0 = op0, xop1 = op1;
@@ -2026,8 +2060,7 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	   wider_mode != VOIDmode;
 	   wider_mode = GET_MODE_WIDER_MODE (wider_mode))
 	{
-	  if (optab_handler (binoptab, wider_mode) != CODE_FOR_nothing
-	      || widening_optab_handler (binoptab, wider_mode, mode)
+	  if (find_widening_optab_handler (binoptab, wider_mode, mode, 1)
 		  != CODE_FOR_nothing
 	      || (methods == OPTAB_LIB
 		  && optab_libfunc (binoptab, wider_mode)))
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -807,6 +807,15 @@  extern rtx expand_copysign (rtx, rtx, rtx);
 extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
+/* Find a widening optab even if it doesn't widen as much as we want.  */
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+							    enum machine_mode,
+							    enum machine_mode,
+							    int,
+							    enum machine_mode *);
+
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
    distinguish between machines with a vector shift that takes a scalar for the
    shift amount vs. machines that take a vector for the shift amount.  */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-bitfield-1.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm_dsp } */
+
+struct bf
+{
+  int a : 3;
+  int b : 15;
+  int c : 3;
+};
+
+long long
+foo (long long a, struct bf b, struct bf c)
+{
+  return a + b.b * c.b;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3564,7 +3564,7 @@  do_pointer_plus_expr_check:
     case WIDEN_MULT_EXPR:
       if (TREE_CODE (lhs_type) != INTEGER_TYPE)
 	return true;
-      return ((2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type))
+      return ((2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type))
 	      || (TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type)));
 
     case WIDEN_SUM_EXPR:
@@ -3655,7 +3655,7 @@  verify_gimple_assign_ternary (gimple stmt)
 	   && !FIXED_POINT_TYPE_P (rhs1_type))
 	  || !useless_type_conversion_p (rhs1_type, rhs2_type)
 	  || !useless_type_conversion_p (lhs_type, rhs3_type)
-	  || 2 * TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (lhs_type)
+	  || 2 * TYPE_PRECISION (rhs1_type) > TYPE_PRECISION (lhs_type)
 	  || TYPE_PRECISION (rhs1_type) != TYPE_PRECISION (rhs2_type))
 	{
 	  error ("type mismatch in widening multiply-accumulate expression");
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -1086,6 +1086,16 @@  build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
   return result;
 }
 
+/* Build a gimple assignment to cast VAL to TARGET.  Insert the statement
+   prior to GSI's current position, and return the fresh SSA name.  */
+
+static tree
+build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
+		       tree target, tree val)
+{
+  return build_and_insert_binop (gsi, loc, target, CONVERT_EXPR, val, NULL);
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1959,8 +1969,8 @@  struct gimple_opt_pass pass_optimize_bswap =
 /* Return true if RHS is a suitable operand for a widening multiplication.
    There are two cases:
 
-     - RHS makes some value twice as wide.  Store that value in *NEW_RHS_OUT
-       if so, and store its type in *TYPE_OUT.
+     - RHS makes some value at least twice as wide.  Store that value
+       in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
 
      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
        but leave *TYPE_OUT untouched.  */
@@ -1988,7 +1998,7 @@  is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
       rhs1 = gimple_assign_rhs1 (stmt);
       type1 = TREE_TYPE (rhs1);
       if (TREE_CODE (type1) != TREE_CODE (type)
-	  || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
+	  || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
 	return false;
 
       *new_rhs_out = rhs1;
@@ -2044,6 +2054,10 @@  is_widening_mult_p (gimple stmt,
       *type2_out = *type1_out;
     }
 
+  /* FIXME: remove this restriction.  */
+  if (TYPE_PRECISION (*type1_out) != TYPE_PRECISION (*type2_out))
+    return false;
+
   return true;
 }
 
@@ -2052,12 +2066,14 @@  is_widening_mult_p (gimple stmt,
    value is true iff we converted the statement.  */
 
 static bool
-convert_mult_to_widen (gimple stmt)
+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
-  tree lhs, rhs1, rhs2, type, type1, type2;
+  tree lhs, rhs1, rhs2, type, type1, type2, tmp;
   enum insn_code handler;
-  enum machine_mode to_mode, from_mode;
+  enum machine_mode to_mode, from_mode, actual_mode;
   optab op;
+  int actual_precision;
+  location_t loc = gimple_location (stmt);
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2077,13 +2093,32 @@  convert_mult_to_widen (gimple stmt)
   else
     op = usmul_widen_optab;
 
-  handler = widening_optab_handler (op, to_mode, from_mode);
+  handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
+						  0, &actual_mode);
 
   if (handler == CODE_FOR_nothing)
     return false;
 
-  gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
-  gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
+  /* Ensure that the inputs to the handler are in the correct precison
+     for the opcode.  This will be the full mode size.  */
+  actual_precision = GET_MODE_PRECISION (actual_mode);
+  if (actual_precision != TYPE_PRECISION (type1))
+    {
+      tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, TYPE_UNSIGNED (type1)),
+			    NULL);
+      rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
+
+      /* Reuse the same type info, if possible.  */
+      if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
+	tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, TYPE_UNSIGNED (type2)),
+			      NULL);
+      rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
+    }
+
+  gimple_assign_set_rhs1 (stmt, rhs1);
+  gimple_assign_set_rhs2 (stmt, rhs2);
   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
   update_stmt (stmt);
   widen_mul_stats.widen_mults_inserted++;
@@ -2101,11 +2136,15 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 			    enum tree_code code)
 {
   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
-  tree type, type1, type2;
+  tree type, type1, type2, tmp;
   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
   optab this_optab;
   enum tree_code wmult_code;
+  enum insn_code handler;
+  enum machine_mode to_mode, from_mode, actual_mode;
+  location_t loc = gimple_location (stmt);
+  int actual_precision;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2139,39 +2178,33 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   else
     return false;
 
-  if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
+  /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
+     is_widening_mult_p, but we still need the rhs returns.
+
+     It might also appear that it would be sufficient to use the existing
+     operands of the widening multiply, but that would limit the choice of
+     multiply-and-accumulate instructions.  */
+  if (code == PLUS_EXPR
+      && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
     {
       if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs2;
     }
-  else if (rhs2_code == MULT_EXPR)
+  else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
     {
       if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
 			       &type2, &mult_rhs2))
 	return false;
       add_rhs = rhs1;
     }
-  else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs2;
-    }
-  else if (rhs2_code == WIDEN_MULT_EXPR)
-    {
-      mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
-      mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
-      type1 = TREE_TYPE (mult_rhs1);
-      type2 = TREE_TYPE (mult_rhs2);
-      add_rhs = rhs1;
-    }
   else
     return false;
 
+  to_mode = TYPE_MODE (type);
+  from_mode = TYPE_MODE (type1);
+
   if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
     return false;
 
@@ -2179,15 +2212,26 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
      accumulate in this mode/signedness combination, otherwise
      this transformation is likely to pessimize code.  */
   this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
-  if (widening_optab_handler (this_optab, TYPE_MODE (type), TYPE_MODE (type1))
-	== CODE_FOR_nothing)
+  handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
+						  from_mode, 0, &actual_mode);
+
+  if (handler == CODE_FOR_nothing)
     return false;
 
-  /* ??? May need some type verification here?  */
+  /* Ensure that the inputs to the handler are in the correct precison
+     for the opcode.  This will be the full mode size.  */
+  actual_precision = GET_MODE_PRECISION (actual_mode);
+  if (actual_precision != TYPE_PRECISION (type1))
+    {
+      tmp = create_tmp_var (build_nonstandard_integer_type
+				(actual_precision, TYPE_UNSIGNED (type1)),
+			    NULL);
+
+      mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
+      mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
+    }
 
-  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
-				    fold_convert (type1, mult_rhs1),
-				    fold_convert (type2, mult_rhs2),
+  gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
 				    add_rhs);
   update_stmt (gsi_stmt (*gsi));
   widen_mul_stats.maccs_inserted++;
@@ -2399,7 +2443,7 @@  execute_optimize_widening_mul (void)
 	      switch (code)
 		{
 		case MULT_EXPR:
-		  if (!convert_mult_to_widen (stmt)
+		  if (!convert_mult_to_widen (stmt, &gsi)
 		      && convert_mult_to_fma (stmt,
 					      gimple_assign_rhs1 (stmt),
 					      gimple_assign_rhs2 (stmt)))