Patchwork Fix vector permutation forwprop optimization (PR tree-optimization/54610)

login
register
mail settings
Submitter Jakub Jelinek
Date Sept. 18, 2012, 12:20 p.m.
Message ID <20120918122009.GY22619@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/184698/
State New
Headers show

Comments

Jakub Jelinek - Sept. 18, 2012, 12:20 p.m.
Hi!

vect_gen_perm_mask is not suitable for use outside of the vectorizer,
it uses current vector size to determine the number of units of a vector,
which isn't something that should be used outside of the vectorizer.

The following patch just does construct the mask inline, it is not that long
code.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2012-09-18  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/54610
	* tree-ssa-forwprop.c: Include optabs.h.
	(simplify_vector_constructor): Don't use vect_gen_perm_mask,
	instead create the mask constant here.
	* Makefile.in (tree-ssa-forwprop.o): Depend on $(OPTABS_H).

	* gcc.target/i386/pr54610.c: New test.


	Jakub
Richard Guenther - Sept. 18, 2012, 12:52 p.m.
On Tue, Sep 18, 2012 at 2:20 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> vect_gen_perm_mask is not suitable for use outside of the vectorizer,
> it uses current vector size to determine the number of units of a vector,
> which isn't something that should be used outside of the vectorizer.
>
> The following patch just does construct the mask inline, it is not that long
> code.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2012-09-18  Jakub Jelinek  <jakub@redhat.com>
>
>         PR tree-optimization/54610
>         * tree-ssa-forwprop.c: Include optabs.h.
>         (simplify_vector_constructor): Don't use vect_gen_perm_mask,
>         instead create the mask constant here.
>         * Makefile.in (tree-ssa-forwprop.o): Depend on $(OPTABS_H).
>
>         * gcc.target/i386/pr54610.c: New test.
>
> --- gcc/tree-ssa-forwprop.c.jj  2012-09-14 14:20:56.000000000 +0200
> +++ gcc/tree-ssa-forwprop.c     2012-09-18 10:17:40.627193548 +0200
> @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.
>  #include "expr.h"
>  #include "cfgloop.h"
>  #include "tree-vectorizer.h"
> +#include "optabs.h"
>
>  /* This pass propagates the RHS of assignment statements into use
>     sites of the LHS of the assignment.  It's basically a specialized
> @@ -2854,14 +2855,24 @@ simplify_vector_constructor (gimple_stmt
>      return false;
>
>    if (maybe_ident)
> -    {
> -      gimple_assign_set_rhs_from_tree (gsi, orig);
> -    }
> +    gimple_assign_set_rhs_from_tree (gsi, orig);
>    else
>      {
> -      op2 = vect_gen_perm_mask (type, sel);
> -      if (!op2)
> +      tree mask_type, *mask_elts;
> +
> +      if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
> +       return false;
> +      mask_type
> +       = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
> +                            nelts);
> +      if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
> +         || GET_MODE_SIZE (TYPE_MODE (mask_type))
> +            != GET_MODE_SIZE (TYPE_MODE (type)))
>         return false;
> +      mask_elts = XALLOCAVEC (tree, nelts);
> +      for (i = 0; i < nelts; i++)
> +       mask_elts[i] = build_int_cst (TREE_TYPE (mask_type), sel[i]);
> +      op2 = build_vector (mask_type, mask_elts);
>        gimple_assign_set_rhs_with_ops_1 (gsi, VEC_PERM_EXPR, orig, orig, op2);
>      }
>    update_stmt (gsi_stmt (*gsi));
> --- gcc/Makefile.in.jj  2012-09-13 07:54:44.000000000 +0200
> +++ gcc/Makefile.in     2012-09-18 10:18:05.717067056 +0200
> @@ -2245,7 +2245,7 @@ tree-ssa-forwprop.o : tree-ssa-forwprop.
>     $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
>     langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
> -   $(TREE_VECTORIZER_H)
> +   $(TREE_VECTORIZER_H) $(OPTABS_H)
>  tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
>     $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
>     $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
> --- gcc/testsuite/gcc.target/i386/pr54610.c.jj  2012-09-18 10:24:58.793981091 +0200
> +++ gcc/testsuite/gcc.target/i386/pr54610.c     2012-09-18 10:26:26.838535968 +0200
> @@ -0,0 +1,17 @@
> +/* PR tree-optimization/54610 */
> +/* { dg-do compile } */
> +/* { dg-options "-O -mavx -fdump-tree-optimized" } */
> +
> +typedef double vec __attribute__((vector_size (2 * sizeof (double))));
> +void f (vec *px, vec *y, vec *z)
> +{
> +  vec x = *px;
> +  vec t1 = { x[1], x[0] };
> +  vec t2 = { x[0], x[1] };
> +  *y = t1;
> +  *z = t2;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
>
>         Jakub

Patch

--- gcc/tree-ssa-forwprop.c.jj	2012-09-14 14:20:56.000000000 +0200
+++ gcc/tree-ssa-forwprop.c	2012-09-18 10:17:40.627193548 +0200
@@ -34,6 +34,7 @@  along with GCC; see the file COPYING3.
 #include "expr.h"
 #include "cfgloop.h"
 #include "tree-vectorizer.h"
+#include "optabs.h"
 
 /* This pass propagates the RHS of assignment statements into use
    sites of the LHS of the assignment.  It's basically a specialized
@@ -2854,14 +2855,24 @@  simplify_vector_constructor (gimple_stmt
     return false;
 
   if (maybe_ident)
-    {
-      gimple_assign_set_rhs_from_tree (gsi, orig);
-    }
+    gimple_assign_set_rhs_from_tree (gsi, orig);
   else
     {
-      op2 = vect_gen_perm_mask (type, sel);
-      if (!op2)
+      tree mask_type, *mask_elts;
+
+      if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
+	return false;
+      mask_type
+	= build_vector_type (build_nonstandard_integer_type (elem_size, 1),
+			     nelts);
+      if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
+	  || GET_MODE_SIZE (TYPE_MODE (mask_type))
+	     != GET_MODE_SIZE (TYPE_MODE (type)))
 	return false;
+      mask_elts = XALLOCAVEC (tree, nelts);
+      for (i = 0; i < nelts; i++)
+	mask_elts[i] = build_int_cst (TREE_TYPE (mask_type), sel[i]);
+      op2 = build_vector (mask_type, mask_elts);
       gimple_assign_set_rhs_with_ops_1 (gsi, VEC_PERM_EXPR, orig, orig, op2);
     }
   update_stmt (gsi_stmt (*gsi));
--- gcc/Makefile.in.jj	2012-09-13 07:54:44.000000000 +0200
+++ gcc/Makefile.in	2012-09-18 10:18:05.717067056 +0200
@@ -2245,7 +2245,7 @@  tree-ssa-forwprop.o : tree-ssa-forwprop.
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) $(CFGLOOP_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
    langhooks.h $(FLAGS_H) $(GIMPLE_H) $(GIMPLE_PRETTY_PRINT_H) $(EXPR_H) \
-   $(TREE_VECTORIZER_H)
+   $(TREE_VECTORIZER_H) $(OPTABS_H)
 tree-ssa-phiprop.o : tree-ssa-phiprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(TREE_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(TREE_PASS_H) $(DIAGNOSTIC_H) \
--- gcc/testsuite/gcc.target/i386/pr54610.c.jj	2012-09-18 10:24:58.793981091 +0200
+++ gcc/testsuite/gcc.target/i386/pr54610.c	2012-09-18 10:26:26.838535968 +0200
@@ -0,0 +1,17 @@ 
+/* PR tree-optimization/54610 */
+/* { dg-do compile } */
+/* { dg-options "-O -mavx -fdump-tree-optimized" } */
+
+typedef double vec __attribute__((vector_size (2 * sizeof (double))));
+void f (vec *px, vec *y, vec *z)
+{
+  vec x = *px;
+  vec t1 = { x[1], x[0] };
+  vec t2 = { x[0], x[1] };
+  *y = t1;
+  *z = t2;
+}
+
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "BIT_FIELD_REF" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */