diff mbox

[3/6] Implement interleave via permutation.

Message ID 1319501849-12807-4-git-send-email-rth@redhat.com
State New
Headers show

Commit Message

Richard Henderson Oct. 25, 2011, 12:17 a.m. UTC
From: Richard Henderson <rth@twiddle.net>

---
 gcc/expr.c                |   20 +-------
 gcc/optabs.c              |  116 +++++++++++++++++++++++++++++++++++++++++++++
 gcc/optabs.h              |    3 +
 gcc/tree-vect-data-refs.c |   80 ++++++++++++-------------------
 gcc/tree-vect-generic.c   |    9 ++++
 5 files changed, 159 insertions(+), 69 deletions(-)

Comments

Hans-Peter Nilsson Oct. 28, 2011, 9:54 p.m. UTC | #1
On Mon, 24 Oct 2011, Richard Henderson wrote:

> From: Richard Henderson <rth@twiddle.net>

> +  /* Certain vector operations can be implemented with vector permutation.  */
> +  if (VECTOR_MODE_P (mode))
> +    {
> +      enum tree_code tcode = ERROR_MARK;
> +      rtx sel;
> +
> +      if (binoptab == vec_interleave_high_optab)
> +	tcode = VEC_INTERLEAVE_HIGH_EXPR;
> +      else if (binoptab == vec_interleave_low_optab)
> +	tcode = VEC_INTERLEAVE_LOW_EXPR;
> +      else if (binoptab == vec_extract_even_optab)
> +	tcode = VEC_EXTRACT_EVEN_EXPR;
> +      else if (binoptab == vec_extract_odd_optab)
> +	tcode = VEC_EXTRACT_ODD_EXPR;

Also VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR, and
VEC_PACK_TRUNC_EXPR to mention some.

brgds, H-P
Richard Henderson Oct. 28, 2011, 10:18 p.m. UTC | #2
On 10/28/2011 02:54 PM, Hans-Peter Nilsson wrote:
> Also VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR, and
> VEC_PACK_TRUNC_EXPR to mention some.

VEC_PACK_TRUNC_EXPR requires extra mode changes.  I was intending to get to this one too, but have gotten side-tracked with other work this week. 

VEC_UNPACK_HI_EXPR and VEC_UNPACK_LO_EXPR need even more -- at least for signed -- a source of sign-extended data.  Either a LT comparison that produces -1 (common, but not universal) or an ASHIFTRT with a constant.  We're definitely in the land of diminishing returns here.


r~
Hans-Peter Nilsson Oct. 28, 2011, 10:39 p.m. UTC | #3
On Fri, 28 Oct 2011, Richard Henderson wrote:
> VEC_UNPACK_HI_EXPR and VEC_UNPACK_LO_EXPR need even more -- at
> least for signed -- a source of sign-extended data.  Either a LT
> comparison that produces -1 (common, but not universal) or an
> ASHIFTRT with a constant.  We're definitely in the land of
> diminishing returns here.

Right, I only meant to refer to the zero-extending variant.

brgds, H-P
diff mbox

Patch

diff --git a/gcc/expr.c b/gcc/expr.c
index 121db5e..84cfe5c 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8667,27 +8667,9 @@  expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 
     case VEC_EXTRACT_EVEN_EXPR:
     case VEC_EXTRACT_ODD_EXPR:
-      {
-        expand_operands (treeop0,  treeop1,
-                         NULL_RTX, &op0, &op1, EXPAND_NORMAL);
-        this_optab = optab_for_tree_code (code, type, optab_default);
-        temp = expand_binop (mode, this_optab, op0, op1, target, unsignedp,
-                             OPTAB_WIDEN);
-        gcc_assert (temp);
-        return temp;
-      }
-
     case VEC_INTERLEAVE_HIGH_EXPR:
     case VEC_INTERLEAVE_LOW_EXPR:
-      {
-        expand_operands (treeop0,  treeop1,
-                         NULL_RTX, &op0, &op1, EXPAND_NORMAL);
-        this_optab = optab_for_tree_code (code, type, optab_default);
-        temp = expand_binop (mode, this_optab, op0, op1, target, unsignedp,
-                             OPTAB_WIDEN);
-        gcc_assert (temp);
-        return temp;
-      }
+      goto binop;
 
     case VEC_LSHIFT_EXPR:
     case VEC_RSHIFT_EXPR:
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 26669f4..9afc911 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -1606,6 +1606,30 @@  expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
 	}
     }
 
+  /* Certain vector operations can be implemented with vector permutation.  */
+  if (VECTOR_MODE_P (mode))
+    {
+      enum tree_code tcode = ERROR_MARK;
+      rtx sel;
+
+      if (binoptab == vec_interleave_high_optab)
+	tcode = VEC_INTERLEAVE_HIGH_EXPR;
+      else if (binoptab == vec_interleave_low_optab)
+	tcode = VEC_INTERLEAVE_LOW_EXPR;
+      else if (binoptab == vec_extract_even_optab)
+	tcode = VEC_EXTRACT_EVEN_EXPR;
+      else if (binoptab == vec_extract_odd_optab)
+	tcode = VEC_EXTRACT_ODD_EXPR;
+
+      if (tcode != ERROR_MARK
+	  && can_vec_perm_for_code_p (tcode, mode, &sel))
+	{
+	  temp = expand_vec_perm (mode, op0, op1, sel, target);
+	  gcc_assert (temp != NULL);
+	  return temp;
+	}
+    }
+
   /* Look for a wider mode of the same class for which we think we
      can open-code the operation.  Check for a widening multiply at the
      wider mode as well.  */
@@ -6751,6 +6775,98 @@  can_vec_perm_p (enum machine_mode mode, bool variable,
   return true;
 }
 
+/* Return true if we can implement VEC_INTERLEAVE_{HIGH,LOW}_EXPR or
+   VEC_EXTRACT_{EVEN,ODD}_EXPR with VEC_PERM_EXPR for this target.
+   If PSEL is non-null, return the selector for the permutation.  */
+
+bool
+can_vec_perm_for_code_p (enum tree_code code, enum machine_mode mode,
+			 rtx *psel)
+{
+  bool need_sel_test = false;
+  enum insn_code icode;
+
+  /* If the target doesn't implement a vector mode for the vector type,
+     then no operations are supported.  */
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  /* Do as many tests as possible without reqiring the selector.  */
+  icode = direct_optab_handler (vec_perm_optab, mode);
+  if (icode == CODE_FOR_nothing && GET_MODE_INNER (mode) != QImode)
+    {
+      enum machine_mode qimode
+	= mode_for_vector (QImode, GET_MODE_SIZE (mode));
+      if (VECTOR_MODE_P (qimode))
+	icode = direct_optab_handler (vec_perm_optab, qimode);
+    }
+  if (icode == CODE_FOR_nothing)
+    {
+      icode = direct_optab_handler (vec_perm_const_optab, mode);
+      if (icode != CODE_FOR_nothing
+	  && targetm.vectorize.vec_perm_const_ok != NULL)
+	need_sel_test = true;
+    }
+  if (icode == CODE_FOR_nothing)
+    return false;
+
+  /* If the selector is required, or if we need to test it, build it.  */
+  if (psel || need_sel_test)
+    {
+      int i, nelt = GET_MODE_NUNITS (mode), alt = 0;
+      unsigned char *data = XALLOCAVEC (unsigned char, nelt);
+
+      switch (code)
+	{
+	case VEC_EXTRACT_ODD_EXPR:
+	  alt = 1;
+	  /* FALLTHRU */
+	case VEC_EXTRACT_EVEN_EXPR:
+	  for (i = 0; i < nelt; ++i)
+	    data[i] = i * 2 + alt;
+	  break;
+
+	case VEC_INTERLEAVE_HIGH_EXPR:
+	  alt = nelt / 2;
+	  /* FALLTHRU */
+	case VEC_INTERLEAVE_LOW_EXPR:
+	  for (i = 0; i < nelt / 2; ++i)
+	    {
+	      data[i * 2] = i + alt;
+	      data[i * 2 + 1] = i + nelt + alt;
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (need_sel_test
+	  && !targetm.vectorize.vec_perm_const_ok (mode, data))
+	return false;
+
+      if (psel)
+	{
+	  rtvec vec = rtvec_alloc (nelt);
+	  enum machine_mode imode = mode;
+
+	  for (i = 0; i < nelt; ++i)
+	    RTVEC_ELT (vec, i) = GEN_INT (data[i]);
+
+	  if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+	    {
+	      imode = int_mode_for_mode (GET_MODE_INNER (mode));
+	      imode = mode_for_vector (imode, nelt);
+	      gcc_assert (GET_MODE_CLASS (imode) == MODE_VECTOR_INT);
+	    }
+
+	  *psel = gen_rtx_CONST_VECTOR (imode, vec);
+	}
+    }
+
+  return true;
+}
+
 /* A subroutine of expand_vec_perm for expanding one vec_perm insn.  */
 
 static rtx
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 9e3c5b0..057f653 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -903,6 +903,9 @@  extern rtx expand_vec_shift_expr (sepops, rtx);
 /* Return tree if target supports vector operations for VEC_PERM_EXPR.  */
 extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *);
 
+/* Return true if target supports vector operations using VEC_PERM_EXPR.  */
+extern bool can_vec_perm_for_code_p (enum tree_code, enum machine_mode, rtx *);
+
 /* Generate code for VEC_PERM_EXPR.  */
 extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx);
 
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 4b6164a..a239216 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -3467,7 +3467,7 @@  vect_create_destination_var (tree scalar_dest, tree vectype)
 bool
 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
 {
-  optab interleave_high_optab, interleave_low_optab;
+  optab ih_optab, il_optab;
   enum machine_mode mode;
 
   mode = TYPE_MODE (vectype);
@@ -3482,26 +3482,22 @@  vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
     }
 
   /* Check that the operation is supported.  */
-  interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
-					       vectype, optab_default);
-  interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
-					      vectype, optab_default);
-  if (!interleave_high_optab || !interleave_low_optab)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "no optab for interleave.");
-      return false;
-    }
+  ih_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
+				  vectype, optab_default);
+  il_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
+				  vectype, optab_default);
+  if (il_optab && ih_optab
+      && optab_handler (ih_optab, mode) != CODE_FOR_nothing
+      && optab_handler (il_optab, mode) != CODE_FOR_nothing)
+    return true;
 
-  if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
-      || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "interleave op not supported by target.");
-      return false;
-    }
+  if (can_vec_perm_for_code_p (VEC_INTERLEAVE_HIGH_EXPR, mode, NULL)
+      && can_vec_perm_for_code_p (VEC_INTERLEAVE_LOW_EXPR, mode, NULL))
+    return true;
 
-  return true;
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "interleave op not supported by target.");
+  return false;
 }
 
 
@@ -3923,7 +3919,7 @@  vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
 bool
 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
 {
-  optab perm_even_optab, perm_odd_optab;
+  optab ee_optab, eo_optab;
   enum machine_mode mode;
 
   mode = TYPE_MODE (vectype);
@@ -3937,38 +3933,22 @@  vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
       return false;
     }
 
-  perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
-					 optab_default);
-  if (!perm_even_optab)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "no optab for perm_even.");
-      return false;
-    }
-
-  if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "perm_even op not supported by target.");
-      return false;
-    }
+  ee_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR,
+				  vectype, optab_default);
+  eo_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR,
+				  vectype, optab_default);
+  if (ee_optab && eo_optab
+      && optab_handler (ee_optab, mode) != CODE_FOR_nothing
+      && optab_handler (eo_optab, mode) != CODE_FOR_nothing)
+    return true;
 
-  perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
-					optab_default);
-  if (!perm_odd_optab)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "no optab for perm_odd.");
-      return false;
-    }
+  if (can_vec_perm_for_code_p (VEC_EXTRACT_EVEN_EXPR, mode, NULL)
+      && can_vec_perm_for_code_p (VEC_EXTRACT_ODD_EXPR, mode, NULL))
+    return true;
 
-  if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "perm_odd op not supported by target.");
-      return false;
-    }
-  return true;
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "extract even/odd not supported by target");
+  return false;
 }
 
 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 42ce2e3..ccbec6e 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -771,6 +771,15 @@  expand_vector_operations_1 (gimple_stmt_iterator *gsi)
       || code == VIEW_CONVERT_EXPR)
     return;
 
+  /* These are only created by the vectorizer, after having queried
+     the target support.  It's more than just looking at the optab,
+     and there's no need to do it again.  */
+  if (code == VEC_INTERLEAVE_HIGH_EXPR
+      || code == VEC_INTERLEAVE_LOW_EXPR
+      || code == VEC_EXTRACT_EVEN_EXPR
+      || code == VEC_EXTRACT_ODD_EXPR)
+    return;
+
   gcc_assert (code != CONVERT_EXPR);
 
   /* The signedness is determined from input argument.  */