Patchwork [vectorizer] Fix PR tree-optimization/48765

login
register
mail settings
Submitter Ira Rosen
Date April 28, 2011, 7:51 p.m.
Message ID <OFD28EBEA5.2BE93CC1-ONC2257880.00610D57-C2257880.006D111A@il.ibm.com>
Download mbox | patch
Permalink /patch/93283/
State New
Headers show

Comments

Ira Rosen - April 28, 2011, 7:51 p.m.
gcc-patches-owner@gcc.gnu.org wrote on 28/04/2011 03:42:01 PM:

>
>
> Hi,
>
> Sometimes loop vectorization factor changes during the analysis, while
> statement analysis depends on it. This patch moves the update of the
> vectorization before statements, avoiding current difference between the
> analysis and the transformations phases that caused the problem described
> in the PR.
>
> Bootstrapped and now testing on powerpc64-suse-linux.
> I'll commit the patch once the testing completes.
>

I ended up committing a slightly different version of the patch that scans
the loop statements only if we decided to use SLP.

Bootstrapped and tested on powerpc64-suse-linux.
Committed revision 173132.

Ira

ChangeLog:

        PR tree-optimization/48765
        * tree-vectorizer.h (vect_make_slp_decision): Return bool.
        * tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
        to indicate if loop aware SLP is being used.  Scan the statements
        and update the vectorization factor according to the type of
        vectorization before statement analysis.
        (vect_analyze_loop_2): Get a return value from
vect_make_slp_decision,
        pass it to vect_analyze_loop_operations.
        (vectorizable_reduction): Set number of copies to 1 in case of pure
        SLP statement.
        * tree-vect-stmts.c (vectorizable_conversion,
        vectorizable_assignment, vectorizable_shift,
        vectorizable_operation, vectorizable_type_demotion,
        vectorizable_type_promotion, vectorizable_store,
vectorizable_load):
        Likewise.
        (vectorizable_condition): Move the check that it is not SLP
        vectorization before the number of copies check.
        * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
        to vectorize the loop using SLP.


testsuite/ChangeLog:

        PR tree-optimization/48765
        * gcc.dg/vect/pr48765.c: New.

(See attached file: pr48765.txt)

Patch

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 173127)
+++ ChangeLog	(working copy)
@@ -1,3 +1,25 @@ 
+2011-04-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/48765
+	* tree-vectorizer.h (vect_make_slp_decision): Return bool.
+	* tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
+	to indicate if loop aware SLP is being used.  Scan the statements
+	and update the vectorization factor according to the type of
+	vectorization before statement analysis.
+	(vect_analyze_loop_2): Get a return value from vect_make_slp_decision,
+	pass it to vect_analyze_loop_operations.
+	(vectorizable_reduction): Set number of copies to 1 in case of pure
+	SLP statement.
+	* tree-vect-stmts.c (vectorizable_conversion,
+	vectorizable_assignment, vectorizable_shift,
+	vectorizable_operation, vectorizable_type_demotion,
+	vectorizable_type_promotion, vectorizable_store, vectorizable_load):
+	Likewise.
+	(vectorizable_condition): Move the check that it is not SLP
+	vectorization before the number of copies check.
+	* tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
+	to vectorize the loop using SLP.
+
 2011-04-28  Jakub Jelinek  <jakub@redhat.com>
 
 	PR middle-end/48597
Index: testsuite/gcc.dg/vect/pr48765.c
===================================================================
--- testsuite/gcc.dg/vect/pr48765.c	(revision 0)
+++ testsuite/gcc.dg/vect/pr48765.c	(revision 0)
@@ -0,0 +1,82 @@ 
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+  NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+    LIM_REG_CLASSES
+};
+enum machine_mode
+{
+  VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode, OImode,
+    QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode, DCmode,
+    XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+    BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+  int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int, int,
+			    int);
+void
+stupid_life_analysis (f, nregs, file)
+     rtx f;
+{
+  register int i;
+  for (i = (((64)) + 3) + 1; i < max_regno; i++)
+    {
+      register int r = reg_order[i];
+      if ((int) LIM_REG_CLASSES > 1)
+	reg_renumber[r] =
+	  stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+			   ((regno_reg_rtx[r])->mode), reg_where_born[r],
+			   reg_where_dead[r], regs_change_size[r]);
+    }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+		 changes_size)
+     int call_preserved;
+     enum reg_class class;
+     enum machine_mode mode;
+{
+  register int i, ins;
+  HARD_REG_SET used, this_reg;
+  for (ins = born_insn; ins < dead_insn; ins++)
+    do
+      {
+	register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+	  (after_insn_hard_regs[ins]);
+	for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+	  *scan_tp_++ |= *scan_fp_++;
+      }
+    while (0);
+  for (i = 0; i < 64; i++)
+    {
+      int regno = reg_alloc_order[i];
+      if (((used)[(regno) / ((unsigned) 32)] &
+	   (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+	{
+	  register int j;
+	  if (j == regno)
+	    return regno;
+	}
+    }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 173127)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,8 @@ 
+2011-04-28  Ira Rosen  <ira.rosen@linaro.org>
+
+	PR tree-optimization/48765
+	* gcc.dg/vect/pr48765.c: New.
+
 2011-04-28  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	PR tree-optimization/48775
Index: tree-vectorizer.h
===================================================================
--- tree-vectorizer.h	(revision 173127)
+++ tree-vectorizer.h	(working copy)
@@ -870,7 +870,7 @@  extern bool vect_transform_slp_perm_load (gimple,
 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
 extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
-extern void vect_make_slp_decision (loop_vec_info);
+extern bool vect_make_slp_decision (loop_vec_info);
 extern void vect_detect_hybrid_slp (loop_vec_info);
 extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
                                VEC (tree,heap) **, int);
Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c	(revision 173127)
+++ tree-vect-loop.c	(working copy)
@@ -1146,7 +1146,7 @@  vect_get_cost (enum vect_cost_for_stmt type_of_cos
    Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -1167,7 +1167,41 @@  static bool
 
   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  if (slp)
+    {
+      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+	 vectorization factor of the loop is the unrolling factor required by
+	 the SLP instances.  If that unrolling factor is 1, we say, that we
+	 perform pure SLP on loop - cross iteration parallelism is not
+	 exploited.  */
+      for (i = 0; i < nbbs; i++)
+	{
+	  basic_block bb = bbs[i];
+	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+	    {
+	      gimple stmt = gsi_stmt (si);
+	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+	      gcc_assert (stmt_info);
+	      if ((STMT_VINFO_RELEVANT_P (stmt_info)
+		   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+		  && !PURE_SLP_STMT (stmt_info))
+		/* STMT needs both SLP and loop-based vectorization.  */
+		only_slp_in_loop = false;
+	    }
+	}
 
+      if (only_slp_in_loop)
+	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+      else
+	vectorization_factor = least_common_multiple (vectorization_factor,
+				LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+      if (vect_print_dump_info (REPORT_DETAILS))
+	fprintf (vect_dump, "Updating vectorization factor to %d ",
+	 		    vectorization_factor);
+    }
+
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -1272,18 +1306,8 @@  static bool
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          gcc_assert (stmt_info);
-
 	  if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
 	    return false;
-
-          if ((STMT_VINFO_RELEVANT_P (stmt_info)
-               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
-              && !PURE_SLP_STMT (stmt_info))
-            /* STMT needs both SLP and loop-based vectorization.  */
-            only_slp_in_loop = false;
         }
     } /* bbs */
 
@@ -1303,18 +1327,6 @@  static bool
       return false;
     }
 
-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by the
-     SLP instances.  If that unrolling factor is 1, we say, that we perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -1410,7 +1422,7 @@  static bool
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, dummy;
+  bool ok, dummy, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
 
@@ -1524,7 +1536,7 @@  vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   if (ok)
     {
       /* Decide which possible SLP instances to SLP.  */
-      vect_make_slp_decision (loop_vinfo);
+      slp = vect_make_slp_decision (loop_vinfo);
 
       /* Find stmts that need to be both vectorized and SLPed.  */
       vect_detect_hybrid_slp (loop_vinfo);
@@ -1533,7 +1545,7 @@  vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   /* Scan all the operations in the loop and make sure they are
      vectorizable.  */
 
-  ok = vect_analyze_loop_operations (loop_vinfo);
+  ok = vect_analyze_loop_operations (loop_vinfo, slp);
   if (!ok)
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -4136,7 +4148,7 @@  vectorizable_reduction (gimple stmt, gimple_stmt_i
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;
 
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c	(revision 173127)
+++ tree-vect-stmts.c	(working copy)
@@ -1747,7 +1747,7 @@  vectorizable_conversion (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
 
   /* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@  vectorizable_assignment (gimple stmt, gimple_stmt_
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@  vectorizable_shift (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@  vectorizable_operation (gimple stmt, gimple_stmt_i
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@  vectorizable_type_demotion (gimple stmt, gimple_st
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@  vectorizable_type_promotion (gimple stmt, gimple_s
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@  vectorizable_store (gimple stmt, gimple_stmt_itera
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@  vectorizable_load (gimple stmt, gimple_stmt_iterat
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (slp)
+  if (slp || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@  vectorizable_condition (gimple stmt, gimple_stmt_i
   /* FORNOW: unsupported in basic block SLP.  */
   gcc_assert (loop_vinfo);
 
+  /* FORNOW: SLP not supported.  */
+  if (STMT_SLP_TYPE (stmt_info))
+    return false;
+
   gcc_assert (ncopies >= 1);
   if (reduc_index && ncopies > 1)
     return false; /* FORNOW */
@@ -4469,10 +4473,6 @@  vectorizable_condition (gimple stmt, gimple_stmt_i
            && reduc_def))
     return false;
 
-  /* FORNOW: SLP not supported.  */
-  if (STMT_SLP_TYPE (stmt_info))
-    return false;
-
   /* FORNOW: not yet supported.  */
   if (STMT_VINFO_LIVE_P (stmt_info))
     {
Index: tree-vect-slp.c
===================================================================
--- tree-vect-slp.c	(revision 173127)
+++ tree-vect-slp.c	(working copy)
@@ -1351,9 +1351,10 @@  vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec
 
 
 /* For each possible SLP instance decide whether to SLP it and calculate overall
-   unrolling factor needed to SLP the loop.  */
+   unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
+   least one instance.  */
 
-void
+bool
 vect_make_slp_decision (loop_vec_info loop_vinfo)
 {
   unsigned int i, unrolling_factor = 1;
@@ -1382,6 +1383,8 @@  vect_make_slp_decision (loop_vec_info loop_vinfo)
   if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
     fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
 	     decided_to_slp, unrolling_factor);
+
+  return (decided_to_slp > 0);
 }