Patchwork Use builtin_widen_mult_even/odd in expand_vector_divmod

login
register
mail settings
Submitter Richard Henderson
Date June 29, 2012, 4:50 p.m.
Message ID <4FEDDCE7.1000708@redhat.com>
Download mbox | patch
Permalink /patch/168209/
State New
Headers show

Comments

Richard Henderson - June 29, 2012, 4:50 p.m.
We use it everywhere else, but it got forgotten here.  Saves two shuffles on Altivec.

Although with all of this duplication it makes me wonder if we shouldn't just give up
on the idea of auto-generating MULT_HIGHPART from other operations (particularly given
the extra type frobbing involved).  In some sense it'd be easier to just add a couple
of lines to the backends to implement the operation and be done with it.  Thoughts?

That said, tested on ppc64-linux.  Committed.


r~
* tree-vect-generic.c: Include target.h.
	(expand_vector_divmod): Use builtin_mul_widen_even/odd if supported.
	* Makefile.in (tree-vect-generic.o): Update.
H.J. Lu - June 29, 2012, 6:33 p.m.
On Fri, Jun 29, 2012 at 9:50 AM, Richard Henderson <rth@redhat.com> wrote:
> We use it everywhere else, but it got forgotten here.  Saves two shuffles on Altivec.
>
> Although with all of this duplication it makes me wonder if we shouldn't just give up
> on the idea of auto-generating MULT_HIGHPART from other operations (particularly given
> the extra type frobbing involved).  In some sense it'd be easier to just add a couple
> of lines to the backends to implement the operation and be done with it.  Thoughts?
>
> That said, tested on ppc64-linux.  Committed.

It caused:


FAIL: gcc.c-torture/execute/pr53645.c execution,  -O1
FAIL: gcc.c-torture/execute/pr53645.c execution,  -O2
FAIL: gcc.c-torture/execute/pr53645.c execution,  -O2 -flto
-fno-use-linker-plugin -flto-partition=none
FAIL: gcc.c-torture/execute/pr53645.c execution,  -O2 -flto
-fuse-linker-plugin -fno-fat-lto-objects
FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3 -fomit-frame-pointer
FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3
-fomit-frame-pointer -funroll-all-loops -finline-functions
FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3
-fomit-frame-pointer -funroll-loops
FAIL: gcc.c-torture/execute/pr53645.c execution,  -O3 -g
FAIL: gcc.c-torture/execute/pr53645.c execution,  -Os

on Linux/ia32 configured with

--with-arch=corei7 --with-cpu=corei7 --with-fpmath=sse

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index afea4f3..9955fd7 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3036,7 +3036,7 @@  tree-vect-generic.o : tree-vect-generic.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) \
     $(TM_H) $(TREE_FLOW_H) $(GIMPLE_H) tree-iterator.h $(TREE_PASS_H) \
     $(FLAGS_H) $(OPTABS_H) $(MACHMODE_H) $(EXPR_H) \
     langhooks.h $(FLAGS_H) $(DIAGNOSTIC_H) gt-tree-vect-generic.h $(GGC_H) \
-    coretypes.h insn-codes.h $(DIAGNOSTIC_H)
+    coretypes.h insn-codes.h $(DIAGNOSTIC_H) $(TARGET_H)
 df-core.o : df-core.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    insn-config.h $(RECOG_H) $(FUNCTION_H) $(REGS_H) alloc-pool.h \
    hard-reg-set.h $(BASIC_BLOCK_H) $(DF_H) $(BITMAP_H) sbitmap.h $(TIMEVAR_H) \
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index c83db5e..89d8bae 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -31,6 +31,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "flags.h"
 #include "ggc.h"
 #include "diagnostic.h"
+#include "target.h"
 
 /* Need to include rtl.h, expr.h, etc. for optabs.  */
 #include "expr.h"
@@ -456,7 +457,7 @@  expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
   optab op;
   tree *vec;
   unsigned char *sel = NULL;
-  tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem;
+  tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem, decl_e, decl_o;
 
   if (prec > HOST_BITS_PER_WIDE_INT)
     return NULL_TREE;
@@ -745,32 +746,52 @@  expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
     return NULL_TREE;
 
   op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
-  if (op != NULL
-      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
-    wider_type = NULL_TREE;
+  if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+    wider_type = decl_e = decl_o = NULL_TREE;
   else
     {
-      op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default);
-      if (op == NULL
-	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-	return NULL_TREE;
-      op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default);
-      if (op == NULL
-	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-	return NULL_TREE;
-      sel = XALLOCAVEC (unsigned char, nunits);
-      for (i = 0; i < nunits; i++)
-	sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
-      if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
-	return NULL_TREE;
-      wider_type
-	= build_vector_type (build_nonstandard_integer_type (prec * 2,
-							     unsignedp),
-			     nunits / 2);
+      wider_type = build_nonstandard_integer_type (prec * 2, unsignedp),
+      wider_type = build_vector_type (wider_type, nunits / 2);
       if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
 	  || GET_MODE_BITSIZE (TYPE_MODE (wider_type))
 	     != GET_MODE_BITSIZE (TYPE_MODE (type)))
 	return NULL_TREE;
+
+      sel = XALLOCAVEC (unsigned char, nunits);
+
+      if (targetm.vectorize.builtin_mul_widen_even
+	  && targetm.vectorize.builtin_mul_widen_odd
+	  && (decl_e = targetm.vectorize.builtin_mul_widen_even (type))
+	  && (decl_o = targetm.vectorize.builtin_mul_widen_odd (type))
+	  && (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e)))
+	      == TYPE_MODE (wider_type)))
+	{
+	  for (i = 0; i < nunits; i++)
+	    sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
+	  if (!can_vec_perm_p (TYPE_MODE (wider_type), false, sel))
+	    decl_e = decl_o = NULL_TREE;
+	}
+      else
+	decl_e = decl_o = NULL_TREE;
+
+      if (decl_e == NULL_TREE)
+	{
+	  op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
+				    type, optab_default);
+	  if (op == NULL
+	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+	    return NULL_TREE;
+	  op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
+				    type, optab_default);
+	  if (op == NULL
+	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
+	    return NULL_TREE;
+
+	  for (i = 0; i < nunits; i++)
+	    sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
+	  if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
+	    return NULL_TREE;
+	}
     }
 
   cur_op = op0;
@@ -816,11 +837,34 @@  expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
       for (i = 0; i < nunits; i++)
 	vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
       perm_mask = build_vector (type, vec);
-      mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
-			     cur_op, mulcst);
+
+      if (decl_e != NULL_TREE)
+	{
+	  gimple call;
+
+	  call = gimple_build_call (decl_e, 2, cur_op, mulcst);
+	  mhi = create_tmp_reg (wider_type, NULL);
+	  add_referenced_var (mhi);
+	  mhi = make_ssa_name (mhi, call);
+	  gimple_call_set_lhs (call, mhi);
+	  gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
+
+	  call = gimple_build_call (decl_o, 2, cur_op, mulcst);
+	  mlo = create_tmp_reg (wider_type, NULL);
+	  add_referenced_var (mlo);
+	  mlo = make_ssa_name (mlo, call);
+	  gimple_call_set_lhs (call, mlo);
+	  gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
+	}
+      else
+	{
+	  mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
+				 cur_op, mulcst);
+	  mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
+				 cur_op, mulcst);
+	}
+
       mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi);
-      mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
-			     cur_op, mulcst);
       mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo);
       if (BYTES_BIG_ENDIAN)
 	cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo,