diff mbox

Fold VEC_PERM_EXPR/VEC_INTERLEAVE*EXPR/VEC_EXTRACT*EXPR with VECTOR_CST/CONSTRUCTOR arguments (PR tree-optimization/51074, take 2)

Message ID 20111111152454.GT27242@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Nov. 11, 2011, 3:24 p.m. UTC
Hi!

On Thu, Nov 10, 2011 at 12:00:53PM -0800, Richard Henderson wrote:
> VEC_PERM_EXPR is explicitly modulo.  Don't fail, mask.

Here is an updated patch.

In addition to the creation of subroutines this performs the permutation
folding using an unsigned char array for selector and folds VEC_PERM_EXPR
with out of bounds constant indices to masked ones if the first two
arguments aren't constants.

The valid_gimple_rhs_p change was needed because otherwise the VEC_PERM_EXPR
to VEC_PERM_EXPR with masked indices folding otherwise wasn't applied, and
the tree-vect-generic change was needed because if for whatever reason
the VEC_PERM_EXPR folding doesn't happen, lower_vec_perm wasn't masking it
and i386 target hook on it would ICE.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-11-11  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/51074
	* fold-const.c (vec_cst_ctor_to_array, fold_vec_perm): New functions.
	(fold_binary_loc): Handle VEC_EXTRACT_EVEN_EXPR,
	VEC_EXTRACT_ODD_EXPR, VEC_INTERLEAVE_HIGH_EXPR and
	VEC_INTERLEAVE_LOW_EXPR with VECTOR_CST or CONSTRUCTOR operands.
	(fold_ternary_loc): Handle VEC_PERM_EXPR with VECTOR_CST or
	CONSTRUCTOR operands.
	* tree-ssa-propagate.c (valid_gimple_rhs_p): Handle ternary
	expressions.
	* tree-vect-generic.c (lower_vec_perm): Mask sel_int elements
	to 0 .. 2 * elements - 1.



	Jakub

Comments

Richard Henderson Nov. 11, 2011, 4:36 p.m. UTC | #1
On 11/11/2011 07:24 AM, Jakub Jelinek wrote:
> 	PR tree-optimization/51074
> 	* fold-const.c (vec_cst_ctor_to_array, fold_vec_perm): New functions.
> 	(fold_binary_loc): Handle VEC_EXTRACT_EVEN_EXPR,
> 	VEC_EXTRACT_ODD_EXPR, VEC_INTERLEAVE_HIGH_EXPR and
> 	VEC_INTERLEAVE_LOW_EXPR with VECTOR_CST or CONSTRUCTOR operands.
> 	(fold_ternary_loc): Handle VEC_PERM_EXPR with VECTOR_CST or
> 	CONSTRUCTOR operands.
> 	* tree-ssa-propagate.c (valid_gimple_rhs_p): Handle ternary
> 	expressions.
> 	* tree-vect-generic.c (lower_vec_perm): Mask sel_int elements
> 	to 0 .. 2 * elements - 1.

Ok, except

> +    elts[i]
> +      = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node);

build_int_cst.


r~
Jakub Jelinek Nov. 11, 2011, 4:41 p.m. UTC | #2
On Fri, Nov 11, 2011 at 08:36:36AM -0800, Richard Henderson wrote:
> Ok, except
> 
> > +    elts[i]
> > +      = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node);
> 
> build_int_cst.

That would work for integer modes only, but here the type can be REAL_TYPE
too.  I think fold_convert from integer_zero_mode to any time is what is
the most common idiom to create zero
INTEGER_CSTs/COMPLEX_CSTs/REAL_CSTs/FIXED_CSTs of any type.

	Jakub
Richard Henderson Nov. 11, 2011, 4:50 p.m. UTC | #3
On 11/11/2011 08:41 AM, Jakub Jelinek wrote:
> On Fri, Nov 11, 2011 at 08:36:36AM -0800, Richard Henderson wrote:
>> Ok, except
>>
>>> +    elts[i]
>>> +      = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node);
>>
>> build_int_cst.
> 
> That would work for integer modes only, but here the type can be REAL_TYPE
> too.  I think fold_convert from integer_zero_mode to any time is what is
> the most common idiom to create zero
> INTEGER_CSTs/COMPLEX_CSTs/REAL_CSTs/FIXED_CSTs of any type.

Ah, right.  Patch is ok as-is.


r~
Richard Biener Nov. 16, 2011, 10:41 a.m. UTC | #4
On Fri, Nov 11, 2011 at 5:41 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Nov 11, 2011 at 08:36:36AM -0800, Richard Henderson wrote:
>> Ok, except
>>
>> > +    elts[i]
>> > +      = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node);
>>
>> build_int_cst.
>
> That would work for integer modes only, but here the type can be REAL_TYPE
> too.  I think fold_convert from integer_zero_mode to any time is what is
> the most common idiom to create zero
> INTEGER_CSTs/COMPLEX_CSTs/REAL_CSTs/FIXED_CSTs of any type.

build_zero_cst

Richard.

>        Jakub
>
diff mbox

Patch

--- gcc/fold-const.c.jj	2011-11-10 18:08:54.328438535 +0100
+++ gcc/fold-const.c	2011-11-11 11:23:09.393186463 +0100
@@ -9528,6 +9528,86 @@  get_pointer_modulus_and_residue (tree ex
   return 1;
 }
 
+/* Helper function for fold_vec_perm.  Store elements of VECTOR_CST or
+   CONSTRUCTOR ARG into array ELTS and return true if successful.  */
+
+static bool
+vec_cst_ctor_to_array (tree arg, tree *elts)
+{
+  unsigned int nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)), i;
+
+  if (TREE_CODE (arg) == VECTOR_CST)
+    {
+      tree t;
+
+      for (i = 0, t = TREE_VECTOR_CST_ELTS (arg);
+	   i < nelts && t; i++, t = TREE_CHAIN (t))
+	elts[i] = TREE_VALUE (t);
+      if (t)
+	return false;
+    }
+  else if (TREE_CODE (arg) == CONSTRUCTOR)
+    {
+      constructor_elt *elt;
+
+      FOR_EACH_VEC_ELT (constructor_elt, CONSTRUCTOR_ELTS (arg), i, elt)
+	if (i >= nelts)
+	  return false;
+	else
+	  elts[i] = elt->value;
+    }
+  else
+    return false;
+  for (; i < nelts; i++)
+    elts[i]
+      = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node);
+  return true;
+}
+
+/* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL
+   selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
+   NULL_TREE otherwise.  */
+
+static tree
+fold_vec_perm (tree type, tree arg0, tree arg1, const unsigned char *sel)
+{
+  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+  tree *elts;
+  bool need_ctor = false;
+
+  gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts
+	      && TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)) == nelts);
+  if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type)
+      || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type))
+    return NULL_TREE;
+
+  elts = XALLOCAVEC (tree, nelts * 3);
+  if (!vec_cst_ctor_to_array (arg0, elts)
+      || !vec_cst_ctor_to_array (arg1, elts + nelts))
+    return NULL_TREE;
+
+  for (i = 0; i < nelts; i++)
+    {
+      if (!CONSTANT_CLASS_P (elts[sel[i]]))
+	need_ctor = true;
+      elts[i + 2 * nelts] = unshare_expr (elts[sel[i]]);
+    }
+
+  if (need_ctor)
+    {
+      VEC(constructor_elt,gc) *v = VEC_alloc (constructor_elt, gc, nelts);
+      for (i = 0; i < nelts; i++)
+	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[2 * nelts + i]);
+      return build_constructor (type, v);
+    }
+  else
+    {
+      tree vals = NULL_TREE;
+      for (i = 0; i < nelts; i++)
+	vals = tree_cons (NULL_TREE, elts[3 * nelts - i - 1], vals);
+      return build_vector (type, vals);
+    }
+}
 
 /* Fold a binary expression of code CODE and type TYPE with operands
    OP0 and OP1.  LOC is the location of the resulting expression.
@@ -13381,6 +13461,41 @@  fold_binary_loc (location_t loc,
       /* An ASSERT_EXPR should never be passed to fold_binary.  */
       gcc_unreachable ();
 
+    case VEC_EXTRACT_EVEN_EXPR:
+    case VEC_EXTRACT_ODD_EXPR:
+    case VEC_INTERLEAVE_HIGH_EXPR:
+    case VEC_INTERLEAVE_LOW_EXPR:
+      if ((TREE_CODE (arg0) == VECTOR_CST
+	   || TREE_CODE (arg0) == CONSTRUCTOR)
+	  && (TREE_CODE (arg1) == VECTOR_CST
+	      || TREE_CODE (arg1) == CONSTRUCTOR))
+	{
+	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+	  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
+
+	  for (i = 0; i < nelts; i++)
+	    switch (code)
+	      {
+	      case VEC_EXTRACT_EVEN_EXPR:
+		sel[i] = i * 2;
+		break;
+	      case VEC_EXTRACT_ODD_EXPR:
+		sel[i] = i * 2 + 1;
+		break;
+	      case VEC_INTERLEAVE_HIGH_EXPR:
+		sel[i] = (i + nelts) / 2 + ((i & 1) ? nelts : 0);
+		break;
+	      case VEC_INTERLEAVE_LOW_EXPR:
+		sel[i] = i / 2 + ((i & 1) ? nelts : 0);
+		break;
+	      default:
+		gcc_unreachable ();
+	      }
+
+	  return fold_vec_perm (type, arg0, arg1, sel);
+	}
+      return NULL_TREE;
+
     default:
       return NULL_TREE;
     } /* switch (code) */
@@ -13767,6 +13882,55 @@  fold_ternary_loc (location_t loc, enum t
 
       return fold_fma (loc, type, arg0, arg1, arg2);
 
+    case VEC_PERM_EXPR:
+      if (TREE_CODE (arg2) == VECTOR_CST)
+	{
+	  unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+	  unsigned char *sel = XALLOCAVEC (unsigned char, nelts);
+	  tree t;
+	  bool need_mask_canon = false;
+
+	  gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg2)));
+	  for (i = 0, t = TREE_VECTOR_CST_ELTS (arg2);
+	       i < nelts && t; i++, t = TREE_CHAIN (t))
+	    {
+	      if (TREE_CODE (TREE_VALUE (t)) != INTEGER_CST)
+		return NULL_TREE;
+
+	      sel[i] = TREE_INT_CST_LOW (TREE_VALUE (t)) & (2 * nelts - 1);
+	      if (TREE_INT_CST_HIGH (TREE_VALUE (t))
+		  || ((unsigned HOST_WIDE_INT)
+		      TREE_INT_CST_LOW (TREE_VALUE (t)) != sel[i]))
+		need_mask_canon = true;
+	    }
+	  if (t)
+	    return NULL_TREE;
+	  for (; i < nelts; i++)
+	    sel[i] = 0;
+
+	  if ((TREE_CODE (arg0) == VECTOR_CST
+	       || TREE_CODE (arg0) == CONSTRUCTOR)
+	      && (TREE_CODE (arg1) == VECTOR_CST
+		  || TREE_CODE (arg1) == CONSTRUCTOR))
+	    {
+	      t = fold_vec_perm (type, arg0, arg1, sel);
+	      if (t != NULL_TREE)
+		return t;
+	    }
+
+	  if (need_mask_canon && arg2 == op2)
+	    {
+	      tree list = NULL_TREE, eltype = TREE_TYPE (TREE_TYPE (arg2));
+	      for (i = 0; i < nelts; i++)
+		list = tree_cons (NULL_TREE,
+				  build_int_cst (eltype, sel[nelts - i - 1]),
+				  list);
+	      t = build_vector (TREE_TYPE (arg2), list);
+	      return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t);
+	    }
+	}
+      return NULL_TREE;
+
     default:
       return NULL_TREE;
     } /* switch (code) */
--- gcc/tree-ssa-propagate.c.jj	2011-11-10 14:57:09.000000000 +0100
+++ gcc/tree-ssa-propagate.c	2011-11-11 12:08:56.949178176 +0100
@@ -602,6 +602,16 @@  valid_gimple_rhs_p (tree expr)
           break;
 
 	default:
+	  if (get_gimple_rhs_class (code) == GIMPLE_TERNARY_RHS)
+	    {
+	      if (((code == VEC_COND_EXPR || code == COND_EXPR)
+		   ? !is_gimple_condexpr (TREE_OPERAND (expr, 0))
+		   : !is_gimple_val (TREE_OPERAND (expr, 0)))
+		  || !is_gimple_val (TREE_OPERAND (expr, 1))
+		  || !is_gimple_val (TREE_OPERAND (expr, 2)))
+		return false;
+	      break;
+	    }
 	  return false;
 	}
       break;
--- gcc/tree-vect-generic.c.jj	2011-10-27 18:39:43.000000000 +0200
+++ gcc/tree-vect-generic.c	2011-11-11 11:41:39.775591369 +0100
@@ -649,7 +649,7 @@  lower_vec_perm (gimple_stmt_iterator *gs
       tree vals = TREE_VECTOR_CST_ELTS (mask);
 
       for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
-	sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals));
+	sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)) & (2 * elements - 1);
 
       if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
 	return;