diff mbox series

Move VEC_PERM_EXPR folding to match.pd

Message ID alpine.LSU.2.20.1905171530070.10704@zhemvz.fhfr.qr
State New
Headers show
Series Move VEC_PERM_EXPR folding to match.pd | expand

Commit Message

Richard Biener May 17, 2019, 1:31 p.m. UTC
This moves things from fold-const.c to match.pd.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-05-17  Richard Biener  <rguenther@suse.de>

	* gimple-match-head.c: Include vec-perm-indices.h.
	* generic-match-head.c: Likewise.
	* fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h
	is included.
	* fold-const.c (fold_vec_perm): Export.
	(fold_ternary_loc): Move non-constant folding of VEC_PERM_EXPR...
	(match.pd): ...here.

Comments

Richard Biener May 20, 2019, 8:52 a.m. UTC | #1
On Fri, 17 May 2019, Richard Biener wrote:

> 
> This moves things from fold-const.c to match.pd.

So there's an issue that was appearantly side-stepped in the GENERIC
folding by not re-folding the result.

        /* Generate a canonical form of the selector.  */
        if (sel.encoding () != builder)

"spuriously" returns true for some cases where we end up with
the same tree representation of the selector (I see
m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3).

So I either have to double-check whether the re-created
op2 is really different from the original one or remove
those "premature" checks from vector_builder::operator==
where we then no longer can compare encoded elts...

Testing the variant with changed = operand_equal_p (...).

Richard.

> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> 
> Richard.
> 
> 2019-05-17  Richard Biener  <rguenther@suse.de>
> 
> 	* gimple-match-head.c: Include vec-perm-indices.h.
> 	* generic-match-head.c: Likewise.
> 	* fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h
> 	is included.
> 	* fold-const.c (fold_vec_perm): Export.
> 	(fold_ternary_loc): Move non-constant folding of VEC_PERM_EXPR...
> 	(match.pd): ...here.
> 
> Index: gcc/gimple-match-head.c
> ===================================================================
> --- gcc/gimple-match-head.c	(revision 271320)
> +++ gcc/gimple-match-head.c	(working copy)
> @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
>  #include "gimple.h"
>  #include "ssa.h"
>  #include "cgraph.h"
> +#include "vec-perm-indices.h"
>  #include "fold-const.h"
>  #include "fold-const-call.h"
>  #include "stor-layout.h"
> Index: gcc/generic-match-head.c
> ===================================================================
> --- gcc/generic-match-head.c	(revision 271320)
> +++ gcc/generic-match-head.c	(working copy)
> @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
>  #include "gimple.h"
>  #include "ssa.h"
>  #include "cgraph.h"
> +#include "vec-perm-indices.h"
>  #include "fold-const.h"
>  #include "stor-layout.h"
>  #include "tree-dfa.h"
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c	(revision 271320)
> +++ gcc/fold-const.c	(working copy)
> @@ -9015,7 +9015,7 @@ vec_cst_ctor_to_array (tree arg, unsigne
>     selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
>     NULL_TREE otherwise.  */
>  
> -static tree
> +tree
>  fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
>  {
>    unsigned int i;
> @@ -11763,7 +11763,10 @@ fold_ternary_loc (location_t loc, enum t
>        return NULL_TREE;
>  
>      case VEC_PERM_EXPR:
> -      if (TREE_CODE (arg2) == VECTOR_CST)
> +      /* Perform constant folding of BIT_INSERT_EXPR.  */
> +      if (TREE_CODE (arg2) == VECTOR_CST
> +	  && TREE_CODE (op0) == VECTOR_CST
> +	  && TREE_CODE (op1) == VECTOR_CST)
>  	{
>  	  /* Build a vector of integers from the tree mask.  */
>  	  vec_perm_builder builder;
> @@ -11774,61 +11777,7 @@ fold_ternary_loc (location_t loc, enum t
>  	  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
>  	  bool single_arg = (op0 == op1);
>  	  vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
> -
> -	  /* Check for cases that fold to OP0 or OP1 in their original
> -	     element order.  */
> -	  if (sel.series_p (0, 1, 0, 1))
> -	    return op0;
> -	  if (sel.series_p (0, 1, nelts, 1))
> -	    return op1;
> -
> -	  if (!single_arg)
> -	    {
> -	      if (sel.all_from_input_p (0))
> -		op1 = op0;
> -	      else if (sel.all_from_input_p (1))
> -		{
> -		  op0 = op1;
> -		  sel.rotate_inputs (1);
> -		}
> -	    }
> -
> -	  if ((TREE_CODE (op0) == VECTOR_CST
> -	       || TREE_CODE (op0) == CONSTRUCTOR)
> -	      && (TREE_CODE (op1) == VECTOR_CST
> -		  || TREE_CODE (op1) == CONSTRUCTOR))
> -	    {
> -	      tree t = fold_vec_perm (type, op0, op1, sel);
> -	      if (t != NULL_TREE)
> -		return t;
> -	    }
> -
> -	  bool changed = (op0 == op1 && !single_arg);
> -
> -	  /* Generate a canonical form of the selector.  */
> -	  if (arg2 == op2 && sel.encoding () != builder)
> -	    {
> -	      /* Some targets are deficient and fail to expand a single
> -		 argument permutation while still allowing an equivalent
> -		 2-argument version.  */
> -	      if (sel.ninputs () == 2
> -		  || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
> -		op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
> -	      else
> -		{
> -		  vec_perm_indices sel2 (builder, 2, nelts);
> -		  if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
> -		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
> -		  else
> -		    /* Not directly supported with either encoding,
> -		       so use the preferred form.  */
> -		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
> -		}
> -	      changed = true;
> -	    }
> -
> -	  if (changed)
> -	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
> +	  return fold_vec_perm (type, op0, op1, sel);
>  	}
>        return NULL_TREE;
>  
> Index: gcc/fold-const.h
> ===================================================================
> --- gcc/fold-const.h	(revision 271320)
> +++ gcc/fold-const.h	(working copy)
> @@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre
>  			       tree, enum tree_code, tree, tree,
>  			       tree, enum tree_code, tree, tree, tree *);
>  extern tree fold_read_from_constant_string (tree);
> +#if GCC_VEC_PERN_INDICES_H
> +extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
> +#endif
>  extern bool wide_int_binop (wide_int &res, enum tree_code,
>  			    const wide_int &arg1, const wide_int &arg2,
>  			    signop, wi::overflow_type *);
> Index: gcc/match.pd
> ===================================================================
> --- gcc/match.pd	(revision 271320)
> +++ gcc/match.pd	(working copy)
> @@ -5374,3 +5374,83 @@ (define_operator_list COND_TERNARY
>        (bit_and:elt_type
>         (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
>         { elt; })))))))
> +
> +(simplify
> + (vec_perm @0 @1 VECTOR_CST@2)
> + (with
> +  {
> +    tree op0 = @0, op1 = @1, op2 = @2;
> +
> +    /* Build a vector of integers from the tree mask.  */
> +    vec_perm_builder builder;
> +    if (!tree_to_vec_perm_builder (&builder, op2))
> +      return NULL_TREE;
> +
> +    /* Create a vec_perm_indices for the integer vector.  */
> +    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
> +    bool single_arg = (op0 == op1);
> +    vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
> +  }
> +  (if (sel.series_p (0, 1, 0, 1))
> +   { op0; }
> +   (if (sel.series_p (0, 1, nelts, 1))
> +    { op1; }
> +    (with
> +     {
> +       if (!single_arg)
> +         {
> +	   if (sel.all_from_input_p (0))
> +	     op1 = op0;
> +	   else if (sel.all_from_input_p (1))
> +	     {
> +	       op0 = op1;
> +	       sel.rotate_inputs (1);
> +	     }
> +         }
> +       gassign *def;
> +       tree cop0 = op0, cop1 = op1;
> +       if (TREE_CODE (op0) == SSA_NAME
> +           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op0)))
> +	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
> +	 cop0 = gimple_assign_rhs1 (def);
> +       if (TREE_CODE (op1) == SSA_NAME
> +           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op1)))
> +	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
> +	 cop1 = gimple_assign_rhs1 (def);
> +
> +       tree t;
> +    }
> +    (if ((TREE_CODE (cop0) == VECTOR_CST
> +	  || TREE_CODE (cop0) == CONSTRUCTOR)
> +	 && (TREE_CODE (cop1) == VECTOR_CST
> +	     || TREE_CODE (cop1) == CONSTRUCTOR)
> +	 && (t = fold_vec_perm (type, cop0, cop1, sel)))
> +     { t; }
> +     (with
> +      {
> +	bool changed = (op0 == op1 && !single_arg);
> +
> +	/* Generate a canonical form of the selector.  */
> +	if (sel.encoding () != builder)
> +	  {
> +	    /* Some targets are deficient and fail to expand a single
> +	       argument permutation while still allowing an equivalent
> +	       2-argument version.  */
> +	    if (sel.ninputs () == 2
> +	       || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
> +	      op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
> +	    else
> +	      {
> +	        vec_perm_indices sel2 (builder, 2, nelts);
> +	        if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
> +	          op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2);
> +	        else
> +	          /* Not directly supported with either encoding,
> +		     so use the preferred form.  */
> +		  op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
> +	      }
> +	    changed = true;
> +	  }
> +      }
> +      (if (changed)
> +       (vec_perm { op0; } { op1; } { op2; })))))))))
>
Richard Sandiford May 20, 2019, 9:27 a.m. UTC | #2
Richard Biener <rguenther@suse.de> writes:
> On Fri, 17 May 2019, Richard Biener wrote:
>
>> 
>> This moves things from fold-const.c to match.pd.
>
> So there's an issue that was appearantly side-stepped in the GENERIC
> folding by not re-folding the result.
>
>         /* Generate a canonical form of the selector.  */
>         if (sel.encoding () != builder)
>
> "spuriously" returns true for some cases where we end up with
> the same tree representation of the selector (I see
> m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3).
>
> So I either have to double-check whether the re-created
> op2 is really different from the original one or remove
> those "premature" checks from vector_builder::operator==
> where we then no longer can compare encoded elts...

But the point of the comparison is to test whether the canonical
form now in "sel" is different from the original form.  Which it
is if you see m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3.
So I don't think there's anything premature in vector_bolder::operator==.

Isn't the problem coming from:

	    /* Some targets are deficient and fail to expand a single
	       argument permutation while still allowing an equivalent
	       2-argument version.  */

I.e. those targets force us to create a non-canonical VEC_PERM_EXPR
(possibly the same as the original VEC_PERM_EXPR) because otherwise the
targets won't recognise the result.  Then we'll recanonicalise the
selector internally next time through the code.  And so on and so on.

The folds always want to operate on the canonical form, so I think
the code up to and including the above comparison is correct.
It's just that, although we tried to replace a non-canonical
VEC_PERM_EXPR with a canonical VEC_PERM_EXPR, the target wouldn't
let us.

> Testing the variant with changed = operand_equal_p (...).

Yeah, agree that's the right fix FWIW.

Richard

>
> Richard.
>
>> Bootstrap / regtest running on x86_64-unknown-linux-gnu.
>> 
>> Richard.
>> 
>> 2019-05-17  Richard Biener  <rguenther@suse.de>
>> 
>> 	* gimple-match-head.c: Include vec-perm-indices.h.
>> 	* generic-match-head.c: Likewise.
>> 	* fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h
>> 	is included.
>> 	* fold-const.c (fold_vec_perm): Export.
>> 	(fold_ternary_loc): Move non-constant folding of VEC_PERM_EXPR...
>> 	(match.pd): ...here.
>> 
>> Index: gcc/gimple-match-head.c
>> ===================================================================
>> --- gcc/gimple-match-head.c	(revision 271320)
>> +++ gcc/gimple-match-head.c	(working copy)
>> @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
>>  #include "gimple.h"
>>  #include "ssa.h"
>>  #include "cgraph.h"
>> +#include "vec-perm-indices.h"
>>  #include "fold-const.h"
>>  #include "fold-const-call.h"
>>  #include "stor-layout.h"
>> Index: gcc/generic-match-head.c
>> ===================================================================
>> --- gcc/generic-match-head.c	(revision 271320)
>> +++ gcc/generic-match-head.c	(working copy)
>> @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
>>  #include "gimple.h"
>>  #include "ssa.h"
>>  #include "cgraph.h"
>> +#include "vec-perm-indices.h"
>>  #include "fold-const.h"
>>  #include "stor-layout.h"
>>  #include "tree-dfa.h"
>> Index: gcc/fold-const.c
>> ===================================================================
>> --- gcc/fold-const.c	(revision 271320)
>> +++ gcc/fold-const.c	(working copy)
>> @@ -9015,7 +9015,7 @@ vec_cst_ctor_to_array (tree arg, unsigne
>>     selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
>>     NULL_TREE otherwise.  */
>>  
>> -static tree
>> +tree
>>  fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
>>  {
>>    unsigned int i;
>> @@ -11763,7 +11763,10 @@ fold_ternary_loc (location_t loc, enum t
>>        return NULL_TREE;
>>  
>>      case VEC_PERM_EXPR:
>> -      if (TREE_CODE (arg2) == VECTOR_CST)
>> +      /* Perform constant folding of BIT_INSERT_EXPR.  */
>> +      if (TREE_CODE (arg2) == VECTOR_CST
>> +	  && TREE_CODE (op0) == VECTOR_CST
>> +	  && TREE_CODE (op1) == VECTOR_CST)
>>  	{
>>  	  /* Build a vector of integers from the tree mask.  */
>>  	  vec_perm_builder builder;
>> @@ -11774,61 +11777,7 @@ fold_ternary_loc (location_t loc, enum t
>>  	  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
>>  	  bool single_arg = (op0 == op1);
>>  	  vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
>> -
>> -	  /* Check for cases that fold to OP0 or OP1 in their original
>> -	     element order.  */
>> -	  if (sel.series_p (0, 1, 0, 1))
>> -	    return op0;
>> -	  if (sel.series_p (0, 1, nelts, 1))
>> -	    return op1;
>> -
>> -	  if (!single_arg)
>> -	    {
>> -	      if (sel.all_from_input_p (0))
>> -		op1 = op0;
>> -	      else if (sel.all_from_input_p (1))
>> -		{
>> -		  op0 = op1;
>> -		  sel.rotate_inputs (1);
>> -		}
>> -	    }
>> -
>> -	  if ((TREE_CODE (op0) == VECTOR_CST
>> -	       || TREE_CODE (op0) == CONSTRUCTOR)
>> -	      && (TREE_CODE (op1) == VECTOR_CST
>> -		  || TREE_CODE (op1) == CONSTRUCTOR))
>> -	    {
>> -	      tree t = fold_vec_perm (type, op0, op1, sel);
>> -	      if (t != NULL_TREE)
>> -		return t;
>> -	    }
>> -
>> -	  bool changed = (op0 == op1 && !single_arg);
>> -
>> -	  /* Generate a canonical form of the selector.  */
>> -	  if (arg2 == op2 && sel.encoding () != builder)
>> -	    {
>> -	      /* Some targets are deficient and fail to expand a single
>> -		 argument permutation while still allowing an equivalent
>> -		 2-argument version.  */
>> -	      if (sel.ninputs () == 2
>> -		  || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
>> -		op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
>> -	      else
>> -		{
>> -		  vec_perm_indices sel2 (builder, 2, nelts);
>> -		  if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
>> -		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
>> -		  else
>> -		    /* Not directly supported with either encoding,
>> -		       so use the preferred form.  */
>> -		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
>> -		}
>> -	      changed = true;
>> -	    }
>> -
>> -	  if (changed)
>> -	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
>> +	  return fold_vec_perm (type, op0, op1, sel);
>>  	}
>>        return NULL_TREE;
>>  
>> Index: gcc/fold-const.h
>> ===================================================================
>> --- gcc/fold-const.h	(revision 271320)
>> +++ gcc/fold-const.h	(working copy)
>> @@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre
>>  			       tree, enum tree_code, tree, tree,
>>  			       tree, enum tree_code, tree, tree, tree *);
>>  extern tree fold_read_from_constant_string (tree);
>> +#if GCC_VEC_PERN_INDICES_H
>> +extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
>> +#endif
>>  extern bool wide_int_binop (wide_int &res, enum tree_code,
>>  			    const wide_int &arg1, const wide_int &arg2,
>>  			    signop, wi::overflow_type *);
>> Index: gcc/match.pd
>> ===================================================================
>> --- gcc/match.pd	(revision 271320)
>> +++ gcc/match.pd	(working copy)
>> @@ -5374,3 +5374,83 @@ (define_operator_list COND_TERNARY
>>        (bit_and:elt_type
>>         (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
>>         { elt; })))))))
>> +
>> +(simplify
>> + (vec_perm @0 @1 VECTOR_CST@2)
>> + (with
>> +  {
>> +    tree op0 = @0, op1 = @1, op2 = @2;
>> +
>> +    /* Build a vector of integers from the tree mask.  */
>> +    vec_perm_builder builder;
>> +    if (!tree_to_vec_perm_builder (&builder, op2))
>> +      return NULL_TREE;
>> +
>> +    /* Create a vec_perm_indices for the integer vector.  */
>> +    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
>> +    bool single_arg = (op0 == op1);
>> +    vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
>> +  }
>> +  (if (sel.series_p (0, 1, 0, 1))
>> +   { op0; }
>> +   (if (sel.series_p (0, 1, nelts, 1))
>> +    { op1; }
>> +    (with
>> +     {
>> +       if (!single_arg)
>> +         {
>> +	   if (sel.all_from_input_p (0))
>> +	     op1 = op0;
>> +	   else if (sel.all_from_input_p (1))
>> +	     {
>> +	       op0 = op1;
>> +	       sel.rotate_inputs (1);
>> +	     }
>> +         }
>> +       gassign *def;
>> +       tree cop0 = op0, cop1 = op1;
>> +       if (TREE_CODE (op0) == SSA_NAME
>> +           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op0)))
>> +	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
>> +	 cop0 = gimple_assign_rhs1 (def);
>> +       if (TREE_CODE (op1) == SSA_NAME
>> +           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op1)))
>> +	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
>> +	 cop1 = gimple_assign_rhs1 (def);
>> +
>> +       tree t;
>> +    }
>> +    (if ((TREE_CODE (cop0) == VECTOR_CST
>> +	  || TREE_CODE (cop0) == CONSTRUCTOR)
>> +	 && (TREE_CODE (cop1) == VECTOR_CST
>> +	     || TREE_CODE (cop1) == CONSTRUCTOR)
>> +	 && (t = fold_vec_perm (type, cop0, cop1, sel)))
>> +     { t; }
>> +     (with
>> +      {
>> +	bool changed = (op0 == op1 && !single_arg);
>> +
>> +	/* Generate a canonical form of the selector.  */
>> +	if (sel.encoding () != builder)
>> +	  {
>> +	    /* Some targets are deficient and fail to expand a single
>> +	       argument permutation while still allowing an equivalent
>> +	       2-argument version.  */
>> +	    if (sel.ninputs () == 2
>> +	       || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
>> +	      op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
>> +	    else
>> +	      {
>> +	        vec_perm_indices sel2 (builder, 2, nelts);
>> +	        if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
>> +	          op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2);
>> +	        else
>> +	          /* Not directly supported with either encoding,
>> +		     so use the preferred form.  */
>> +		  op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
>> +	      }
>> +	    changed = true;
>> +	  }
>> +      }
>> +      (if (changed)
>> +       (vec_perm { op0; } { op1; } { op2; })))))))))
>>
Richard Biener May 20, 2019, 10:31 a.m. UTC | #3
On Mon, 20 May 2019, Richard Sandiford wrote:

> Richard Biener <rguenther@suse.de> writes:
> > On Fri, 17 May 2019, Richard Biener wrote:
> >
> >> 
> >> This moves things from fold-const.c to match.pd.
> >
> > So there's an issue that was appearantly side-stepped in the GENERIC
> > folding by not re-folding the result.
> >
> >         /* Generate a canonical form of the selector.  */
> >         if (sel.encoding () != builder)
> >
> > "spuriously" returns true for some cases where we end up with
> > the same tree representation of the selector (I see
> > m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3).
> >
> > So I either have to double-check whether the re-created
> > op2 is really different from the original one or remove
> > those "premature" checks from vector_builder::operator==
> > where we then no longer can compare encoded elts...
> 
> But the point of the comparison is to test whether the canonical
> form now in "sel" is different from the original form.  Which it
> is if you see m_npatterns == 4 vs. 1 and m_nelts_per_pattern 1 vs. 3.
> So I don't think there's anything premature in vector_bolder::operator==.
> 
> Isn't the problem coming from:
> 
> 	    /* Some targets are deficient and fail to expand a single
> 	       argument permutation while still allowing an equivalent
> 	       2-argument version.  */
> 
> I.e. those targets force us to create a non-canonical VEC_PERM_EXPR
> (possibly the same as the original VEC_PERM_EXPR) because otherwise the
> targets won't recognise the result.  Then we'll recanonicalise the
> selector internally next time through the code.  And so on and so on.

Ah, of course.

> The folds always want to operate on the canonical form, so I think
> the code up to and including the above comparison is correct.
> It's just that, although we tried to replace a non-canonical
> VEC_PERM_EXPR with a canonical VEC_PERM_EXPR, the target wouldn't
> let us.
> 
> > Testing the variant with changed = operand_equal_p (...).
> 
> Yeah, agree that's the right fix FWIW.

So the following is what I have applied.

Bootstrapped / tested on x86_64-unknown-linux-gnu.

Richard.

2019-05-20  Richard Biener  <rguenther@suse.de>

	* gimple-match-head.c: Include vec-perm-indices.h.
	* generic-match-head.c: Likewise.
	* fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h
	is included.
	* fold-const.c (fold_vec_perm): Export.
	(fold_ternary_loc): Move non-constant folding of VEC_PERM_EXPR...
	(match.pd): ...here.

Index: gcc/gimple-match-head.c
===================================================================
--- gcc/gimple-match-head.c	(revision 271320)
+++ gcc/gimple-match-head.c	(working copy)
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "ssa.h"
 #include "cgraph.h"
+#include "vec-perm-indices.h"
 #include "fold-const.h"
 #include "fold-const-call.h"
 #include "stor-layout.h"
Index: gcc/generic-match-head.c
===================================================================
--- gcc/generic-match-head.c	(revision 271320)
+++ gcc/generic-match-head.c	(working copy)
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "ssa.h"
 #include "cgraph.h"
+#include "vec-perm-indices.h"
 #include "fold-const.h"
 #include "stor-layout.h"
 #include "tree-dfa.h"
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 271320)
+++ gcc/fold-const.c	(working copy)
@@ -9015,7 +9015,7 @@ vec_cst_ctor_to_array (tree arg, unsigne
    selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
    NULL_TREE otherwise.  */
 
-static tree
+tree
 fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
 {
   unsigned int i;
@@ -11763,7 +11763,10 @@ fold_ternary_loc (location_t loc, enum t
       return NULL_TREE;
 
     case VEC_PERM_EXPR:
-      if (TREE_CODE (arg2) == VECTOR_CST)
+      /* Perform constant folding of BIT_INSERT_EXPR.  */
+      if (TREE_CODE (arg2) == VECTOR_CST
+	  && TREE_CODE (op0) == VECTOR_CST
+	  && TREE_CODE (op1) == VECTOR_CST)
 	{
 	  /* Build a vector of integers from the tree mask.  */
 	  vec_perm_builder builder;
@@ -11774,61 +11777,7 @@ fold_ternary_loc (location_t loc, enum t
 	  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
 	  bool single_arg = (op0 == op1);
 	  vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
-
-	  /* Check for cases that fold to OP0 or OP1 in their original
-	     element order.  */
-	  if (sel.series_p (0, 1, 0, 1))
-	    return op0;
-	  if (sel.series_p (0, 1, nelts, 1))
-	    return op1;
-
-	  if (!single_arg)
-	    {
-	      if (sel.all_from_input_p (0))
-		op1 = op0;
-	      else if (sel.all_from_input_p (1))
-		{
-		  op0 = op1;
-		  sel.rotate_inputs (1);
-		}
-	    }
-
-	  if ((TREE_CODE (op0) == VECTOR_CST
-	       || TREE_CODE (op0) == CONSTRUCTOR)
-	      && (TREE_CODE (op1) == VECTOR_CST
-		  || TREE_CODE (op1) == CONSTRUCTOR))
-	    {
-	      tree t = fold_vec_perm (type, op0, op1, sel);
-	      if (t != NULL_TREE)
-		return t;
-	    }
-
-	  bool changed = (op0 == op1 && !single_arg);
-
-	  /* Generate a canonical form of the selector.  */
-	  if (arg2 == op2 && sel.encoding () != builder)
-	    {
-	      /* Some targets are deficient and fail to expand a single
-		 argument permutation while still allowing an equivalent
-		 2-argument version.  */
-	      if (sel.ninputs () == 2
-		  || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
-		op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
-	      else
-		{
-		  vec_perm_indices sel2 (builder, 2, nelts);
-		  if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
-		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
-		  else
-		    /* Not directly supported with either encoding,
-		       so use the preferred form.  */
-		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
-		}
-	      changed = true;
-	    }
-
-	  if (changed)
-	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
+	  return fold_vec_perm (type, op0, op1, sel);
 	}
       return NULL_TREE;
 
Index: gcc/fold-const.h
===================================================================
--- gcc/fold-const.h	(revision 271320)
+++ gcc/fold-const.h	(working copy)
@@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre
 			       tree, enum tree_code, tree, tree,
 			       tree, enum tree_code, tree, tree, tree *);
 extern tree fold_read_from_constant_string (tree);
+#if GCC_VEC_PERN_INDICES_H
+extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
+#endif
 extern bool wide_int_binop (wide_int &res, enum tree_code,
 			    const wide_int &arg1, const wide_int &arg2,
 			    signop, wi::overflow_type *);
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	(revision 271395)
+++ gcc/match.pd	(working copy)
@@ -5374,3 +5374,86 @@ (define_operator_list COND_TERNARY
       (bit_and:elt_type
        (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
        { elt; })))))))
+
+(simplify
+ (vec_perm @0 @1 VECTOR_CST@2)
+ (with
+  {
+    tree op0 = @0, op1 = @1, op2 = @2;
+
+    /* Build a vector of integers from the tree mask.  */
+    vec_perm_builder builder;
+    if (!tree_to_vec_perm_builder (&builder, op2))
+      return NULL_TREE;
+
+    /* Create a vec_perm_indices for the integer vector.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
+    bool single_arg = (op0 == op1);
+    vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
+  }
+  (if (sel.series_p (0, 1, 0, 1))
+   { op0; }
+   (if (sel.series_p (0, 1, nelts, 1))
+    { op1; }
+    (with
+     {
+       if (!single_arg)
+         {
+	   if (sel.all_from_input_p (0))
+	     op1 = op0;
+	   else if (sel.all_from_input_p (1))
+	     {
+	       op0 = op1;
+	       sel.rotate_inputs (1);
+	     }
+         }
+       gassign *def;
+       tree cop0 = op0, cop1 = op1;
+       if (TREE_CODE (op0) == SSA_NAME
+           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op0)))
+	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
+	 cop0 = gimple_assign_rhs1 (def);
+       if (TREE_CODE (op1) == SSA_NAME
+           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op1)))
+	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
+	 cop1 = gimple_assign_rhs1 (def);
+
+       tree t;
+    }
+    (if ((TREE_CODE (cop0) == VECTOR_CST
+	  || TREE_CODE (cop0) == CONSTRUCTOR)
+	 && (TREE_CODE (cop1) == VECTOR_CST
+	     || TREE_CODE (cop1) == CONSTRUCTOR)
+	 && (t = fold_vec_perm (type, cop0, cop1, sel)))
+     { t; }
+     (with
+      {
+	bool changed = (op0 == op1 && !single_arg);
+
+	/* Generate a canonical form of the selector.  */
+	if (sel.encoding () != builder)
+	  {
+	    /* Some targets are deficient and fail to expand a single
+	       argument permutation while still allowing an equivalent
+	       2-argument version.  */
+	    tree oldop2 = op2;
+	    if (sel.ninputs () == 2
+	       || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
+	      op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
+	    else
+	      {
+	        vec_perm_indices sel2 (builder, 2, nelts);
+	        if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
+	          op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2);
+	        else
+	          /* Not directly supported with either encoding,
+		     so use the preferred form.  */
+		  op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
+	      }
+	    /* Differences in the encoder do not necessarily mean
+	       differences in the resulting vector.  */
+	    changed = !operand_equal_p (op2, oldop2, 0);
+	  }
+      }
+      (if (changed)
+       (vec_perm { op0; } { op1; } { op2; })))))))))
kk
Bernhard Reutner-Fischer May 23, 2019, 11:15 p.m. UTC | #4
On 20 May 2019 12:31:46 CEST, Richard Biener <rguenther@suse.de> wrote:

>
>So the following is what I have applied.

Typo in the guard?

s/GCC_VEC_PERN_INDICES_H/GCC_VEC_PERM_INDICES_H/

?
I.e. not pern but perm, with an 'M'
thanks,

>Bootstrapped / tested on x86_64-unknown-linux-gnu.
>
>Richard.
>
>2019-05-20  Richard Biener  <rguenther@suse.de>
>
>	* gimple-match-head.c: Include vec-perm-indices.h.
>	* generic-match-head.c: Likewise.
>	* fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h
>	is included.


>Index: gcc/fold-const.h
>===================================================================
>--- gcc/fold-const.h	(revision 271320)
>+++ gcc/fold-const.h	(working copy)
>@@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre
> 			       tree, enum tree_code, tree, tree,
> 			       tree, enum tree_code, tree, tree, tree *);
> extern tree fold_read_from_constant_string (tree);
>+#if GCC_VEC_PERN_INDICES_H
>+extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices
>&);
>+#endif
Richard Biener May 24, 2019, 7:59 a.m. UTC | #5
On Fri, 24 May 2019, Bernhard Reutner-Fischer wrote:

> On 20 May 2019 12:31:46 CEST, Richard Biener <rguenther@suse.de> wrote:
> 
> >
> >So the following is what I have applied.
> 
> Typo in the guard?
> 
> s/GCC_VEC_PERN_INDICES_H/GCC_VEC_PERM_INDICES_H/
> 
> ?

Yeah, but copied that from vec-perm-indices.h ... (otherwise
it wouldn't work obviously)

Richard.

> I.e. not pern but perm, with an 'M'
> thanks,
> 
> >Bootstrapped / tested on x86_64-unknown-linux-gnu.
> >
> >Richard.
> >
> >2019-05-20  Richard Biener  <rguenther@suse.de>
> >
> >	* gimple-match-head.c: Include vec-perm-indices.h.
> >	* generic-match-head.c: Likewise.
> >	* fold-const.h (fold_vec_perm): Declare when vec-perm-indices.h
> >	is included.
> 
> 
> >Index: gcc/fold-const.h
> >===================================================================
> >--- gcc/fold-const.h	(revision 271320)
> >+++ gcc/fold-const.h	(working copy)
> >@@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tre
> > 			       tree, enum tree_code, tree, tree,
> > 			       tree, enum tree_code, tree, tree, tree *);
> > extern tree fold_read_from_constant_string (tree);
> >+#if GCC_VEC_PERN_INDICES_H
> >+extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices
> >&);
> >+#endif
> 
>
diff mbox series

Patch

Index: gcc/gimple-match-head.c
===================================================================
--- gcc/gimple-match-head.c	(revision 271320)
+++ gcc/gimple-match-head.c	(working copy)
@@ -27,6 +27,7 @@  along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "ssa.h"
 #include "cgraph.h"
+#include "vec-perm-indices.h"
 #include "fold-const.h"
 #include "fold-const-call.h"
 #include "stor-layout.h"
Index: gcc/generic-match-head.c
===================================================================
--- gcc/generic-match-head.c	(revision 271320)
+++ gcc/generic-match-head.c	(working copy)
@@ -27,6 +27,7 @@  along with GCC; see the file COPYING3.
 #include "gimple.h"
 #include "ssa.h"
 #include "cgraph.h"
+#include "vec-perm-indices.h"
 #include "fold-const.h"
 #include "stor-layout.h"
 #include "tree-dfa.h"
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 271320)
+++ gcc/fold-const.c	(working copy)
@@ -9015,7 +9015,7 @@  vec_cst_ctor_to_array (tree arg, unsigne
    selector.  Return the folded VECTOR_CST or CONSTRUCTOR if successful,
    NULL_TREE otherwise.  */
 
-static tree
+tree
 fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel)
 {
   unsigned int i;
@@ -11763,7 +11763,10 @@  fold_ternary_loc (location_t loc, enum t
       return NULL_TREE;
 
     case VEC_PERM_EXPR:
-      if (TREE_CODE (arg2) == VECTOR_CST)
+      /* Perform constant folding of BIT_INSERT_EXPR.  */
+      if (TREE_CODE (arg2) == VECTOR_CST
+	  && TREE_CODE (op0) == VECTOR_CST
+	  && TREE_CODE (op1) == VECTOR_CST)
 	{
 	  /* Build a vector of integers from the tree mask.  */
 	  vec_perm_builder builder;
@@ -11774,61 +11777,7 @@  fold_ternary_loc (location_t loc, enum t
 	  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
 	  bool single_arg = (op0 == op1);
 	  vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
-
-	  /* Check for cases that fold to OP0 or OP1 in their original
-	     element order.  */
-	  if (sel.series_p (0, 1, 0, 1))
-	    return op0;
-	  if (sel.series_p (0, 1, nelts, 1))
-	    return op1;
-
-	  if (!single_arg)
-	    {
-	      if (sel.all_from_input_p (0))
-		op1 = op0;
-	      else if (sel.all_from_input_p (1))
-		{
-		  op0 = op1;
-		  sel.rotate_inputs (1);
-		}
-	    }
-
-	  if ((TREE_CODE (op0) == VECTOR_CST
-	       || TREE_CODE (op0) == CONSTRUCTOR)
-	      && (TREE_CODE (op1) == VECTOR_CST
-		  || TREE_CODE (op1) == CONSTRUCTOR))
-	    {
-	      tree t = fold_vec_perm (type, op0, op1, sel);
-	      if (t != NULL_TREE)
-		return t;
-	    }
-
-	  bool changed = (op0 == op1 && !single_arg);
-
-	  /* Generate a canonical form of the selector.  */
-	  if (arg2 == op2 && sel.encoding () != builder)
-	    {
-	      /* Some targets are deficient and fail to expand a single
-		 argument permutation while still allowing an equivalent
-		 2-argument version.  */
-	      if (sel.ninputs () == 2
-		  || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
-		op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
-	      else
-		{
-		  vec_perm_indices sel2 (builder, 2, nelts);
-		  if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
-		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
-		  else
-		    /* Not directly supported with either encoding,
-		       so use the preferred form.  */
-		    op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
-		}
-	      changed = true;
-	    }
-
-	  if (changed)
-	    return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2);
+	  return fold_vec_perm (type, op0, op1, sel);
 	}
       return NULL_TREE;
 
Index: gcc/fold-const.h
===================================================================
--- gcc/fold-const.h	(revision 271320)
+++ gcc/fold-const.h	(working copy)
@@ -100,6 +100,9 @@  extern tree fold_bit_and_mask (tree, tre
 			       tree, enum tree_code, tree, tree,
 			       tree, enum tree_code, tree, tree, tree *);
 extern tree fold_read_from_constant_string (tree);
+#if GCC_VEC_PERN_INDICES_H
+extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &);
+#endif
 extern bool wide_int_binop (wide_int &res, enum tree_code,
 			    const wide_int &arg1, const wide_int &arg2,
 			    signop, wi::overflow_type *);
Index: gcc/match.pd
===================================================================
--- gcc/match.pd	(revision 271320)
+++ gcc/match.pd	(working copy)
@@ -5374,3 +5374,83 @@  (define_operator_list COND_TERNARY
       (bit_and:elt_type
        (BIT_FIELD_REF:elt_type @0 { size; } { pos; })
        { elt; })))))))
+
+(simplify
+ (vec_perm @0 @1 VECTOR_CST@2)
+ (with
+  {
+    tree op0 = @0, op1 = @1, op2 = @2;
+
+    /* Build a vector of integers from the tree mask.  */
+    vec_perm_builder builder;
+    if (!tree_to_vec_perm_builder (&builder, op2))
+      return NULL_TREE;
+
+    /* Create a vec_perm_indices for the integer vector.  */
+    poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
+    bool single_arg = (op0 == op1);
+    vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
+  }
+  (if (sel.series_p (0, 1, 0, 1))
+   { op0; }
+   (if (sel.series_p (0, 1, nelts, 1))
+    { op1; }
+    (with
+     {
+       if (!single_arg)
+         {
+	   if (sel.all_from_input_p (0))
+	     op1 = op0;
+	   else if (sel.all_from_input_p (1))
+	     {
+	       op0 = op1;
+	       sel.rotate_inputs (1);
+	     }
+         }
+       gassign *def;
+       tree cop0 = op0, cop1 = op1;
+       if (TREE_CODE (op0) == SSA_NAME
+           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op0)))
+	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
+	 cop0 = gimple_assign_rhs1 (def);
+       if (TREE_CODE (op1) == SSA_NAME
+           && (def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op1)))
+	   && gimple_assign_rhs_code (def) == CONSTRUCTOR)
+	 cop1 = gimple_assign_rhs1 (def);
+
+       tree t;
+    }
+    (if ((TREE_CODE (cop0) == VECTOR_CST
+	  || TREE_CODE (cop0) == CONSTRUCTOR)
+	 && (TREE_CODE (cop1) == VECTOR_CST
+	     || TREE_CODE (cop1) == CONSTRUCTOR)
+	 && (t = fold_vec_perm (type, cop0, cop1, sel)))
+     { t; }
+     (with
+      {
+	bool changed = (op0 == op1 && !single_arg);
+
+	/* Generate a canonical form of the selector.  */
+	if (sel.encoding () != builder)
+	  {
+	    /* Some targets are deficient and fail to expand a single
+	       argument permutation while still allowing an equivalent
+	       2-argument version.  */
+	    if (sel.ninputs () == 2
+	       || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
+	      op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
+	    else
+	      {
+	        vec_perm_indices sel2 (builder, 2, nelts);
+	        if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
+	          op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2);
+	        else
+	          /* Not directly supported with either encoding,
+		     so use the preferred form.  */
+		  op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel);
+	      }
+	    changed = true;
+	  }
+      }
+      (if (changed)
+       (vec_perm { op0; } { op1; } { op2; })))))))))