diff mbox

[RFC] : Try and vectorize with shift for mult expr with power 2 integer constant.

Message ID 7794A52CE4D579448B959EED7DD0A4723DD1F787@satlexdag06.amd.com
State New
Headers show

Commit Message

Kumar, Venkataramanan July 28, 2015, 3:12 p.m. UTC
Hi Richard,

For Aarch64 target, I was trying to  vectorize  the expression  "arr[i]=arr[i]*4;"   via vector shifts instructions since they don't have vector mults.

unsigned  long int __attribute__ ((aligned (64)))arr[100];
int i;
#if 1
void test_vector_shifts()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i]<<2;
}
#endif

void test_vectorshift_via_mul()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i]*4;

}

I found a similar PR and your comments https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65952#c6. 
Based on that and IRC discussion I had with you,  I added vector recog pattern that transforms mults to shifts.  The vectorizer is now able to generate vector shifts for the above test case.
PR case also gets vectorized https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65952#c10.

This is just an initial patch and tries to optimize integer type power 2 constants.  I wanted to get feedback on this .  I bootstrapped and reg tested on aarch64-none-linux-gnu .

Regards,
Venkat.

Comments

Jakub Jelinek July 28, 2015, 7:53 p.m. UTC | #1
Hi!

> This is just an initial patch and tries to optimize integer type power 2
> constants.  I wanted to get feedback on this .  I bootstrapped and reg
> tested on aarch64-none-linux-gnu .

Thanks for working on it.
ChangeLog entry for the patch is missing, probably also some testcases.

> @@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
>  	vect_recog_rotate_pattern,
>  	vect_recog_vector_vector_shift_pattern,
>  	vect_recog_divmod_pattern,
> +        vect_recog_multconst_pattern,
>  	vect_recog_mixed_size_cond_pattern,
>  	vect_recog_bool_pattern};

Please watch formatting, the other lines are tab indented, so please use a
tab rather than 8 spaces.

> @@ -2147,6 +2152,87 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
>    return pattern_stmt;
>  }
>  

Function comment is missing here.

> +static gimple
> +vect_recog_multconst_pattern (vec<gimple> *stmts,
> +                           tree *type_in, tree *type_out)

About the function name, wonder if just vect_recog_mult_pattern wouldn't be
enough.

> +  rhs_code = gimple_assign_rhs_code (last_stmt);
> +  switch (rhs_code)
> +    {
> +    case MULT_EXPR:
> +      break;
> +    default:
> +      return NULL;
> +    }

This looks too weird, I'd just do
  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    return NULL;
(you handle just one pattern).

> +  /* If the target can handle vectorized multiplication natively,
> +     don't attempt to optimize this.  */
> +  optab = optab_for_tree_code (rhs_code, vectype, optab_default);

Supposedly you can use MULT_EXPR directly here.

> +  /* If target cannot handle vector left shift then we cannot 
> +     optimize and bail out.  */ 
> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
> +  if (!optab
> +      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
> +        return NULL;
> +
> +  if (integer_pow2p (oprnd1))
> +    {
> +      /* Pattern detected.  */
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_NOTE, vect_location,
> +			 "vect_recog_multconst_pattern: detected:\n");
> +
> +      tree shift;
> +      shift = build_int_cst (itype, tree_log2 (oprnd1));
> +      pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> +					  LSHIFT_EXPR, oprnd0, shift);
> +      if (dump_enabled_p ())
> +	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
> +                              0);
> +      stmts->safe_push (last_stmt);
> +      *type_in = vectype;
> +      *type_out = vectype;
> +      return pattern_stmt;
> +    } 

Trailing whitespace.
The integer_pow2p case (have you checked signed multiply by INT_MIN?)
is only one of the cases you can actually handle, you can look at
expand_mult for many other cases - e.g. multiplication by negated powers of
2, or call choose_mult_variant and handle whatever it returns.

	Jakub
diff mbox

Patch

diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index f034635..948203d 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -76,6 +76,10 @@  static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
 						      tree *, tree *);
 static gimple vect_recog_divmod_pattern (vec<gimple> *,
 					 tree *, tree *);
+
+static gimple vect_recog_multconst_pattern (vec<gimple> *,
+                                         tree *, tree *);
+
 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
 						  tree *, tree *);
 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
@@ -90,6 +94,7 @@  static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_rotate_pattern,
 	vect_recog_vector_vector_shift_pattern,
 	vect_recog_divmod_pattern,
+        vect_recog_multconst_pattern,
 	vect_recog_mixed_size_cond_pattern,
 	vect_recog_bool_pattern};
 
@@ -2147,6 +2152,87 @@  vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
   return pattern_stmt;
 }
 
+static gimple
+vect_recog_multconst_pattern (vec<gimple> *stmts,
+                           tree *type_in, tree *type_out)
+{
+  gimple last_stmt = stmts->pop ();
+  tree oprnd0, oprnd1, vectype, itype;
+  gimple pattern_stmt;
+  enum tree_code rhs_code;
+  optab optab;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  rhs_code = gimple_assign_rhs_code (last_stmt);
+  switch (rhs_code)
+    {
+    case MULT_EXPR:
+      break;
+    default:
+      return NULL;
+    }
+
+  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  itype = TREE_TYPE (oprnd0);
+
+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TREE_CODE (oprnd1) != INTEGER_CST
+      || TREE_CODE (itype) != INTEGER_TYPE
+      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (itype);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  /* If the target can handle vectorized multiplication natively,
+     don't attempt to optimize this.  */
+  optab = optab_for_tree_code (rhs_code, vectype, optab_default);
+  if (optab != unknown_optab)
+    {
+      machine_mode vec_mode = TYPE_MODE (vectype);
+      int icode = (int) optab_handler (optab, vec_mode);
+      if (icode != CODE_FOR_nothing)
+        return NULL;
+    }
+
+  /* If target cannot handle vector left shift then we cannot 
+     optimize and bail out.  */ 
+  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+  if (!optab
+      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+        return NULL;
+
+  if (integer_pow2p (oprnd1))
+    {
+      /* Pattern detected.  */
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "vect_recog_multconst_pattern: detected:\n");
+
+      tree shift;
+      shift = build_int_cst (itype, tree_log2 (oprnd1));
+      pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+					  LSHIFT_EXPR, oprnd0, shift);
+      if (dump_enabled_p ())
+	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
+                              0);
+      stmts->safe_push (last_stmt);
+      *type_in = vectype;
+      *type_out = vectype;
+      return pattern_stmt;
+    } 
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 48c1f8d..833fe4b 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1131,7 +1131,7 @@  extern void vect_slp_transform_bb (basic_block);
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 12
+#define NUM_PATTERNS 13
 void vect_pattern_recog (loop_vec_info, bb_vec_info);
 
 /* In tree-vectorizer.c.  */