diff mbox

Fix PR64844

Message ID alpine.LSU.2.11.1501291347460.12905@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener Jan. 29, 2015, 12:48 p.m. UTC
The following fixes double accounting of loads in loops peeled for
alignment.  It also adjusts some dump stuff to be more informative
in failing cases.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-01-29  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/64844
	* tree-vect-loop.c (vect_estimate_min_profitable_iters): Always
	dump cost model analysis.
	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
	Do not register adjusted load/store costs here.

	* gcc.dg/vect/pr64844.c: New testcase.
diff mbox

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 220205)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -2990,6 +2990,27 @@  vect_estimate_min_profitable_iters (loop
 
   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
   
+  if (dump_enabled_p ())
+    {
+      dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
+      dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
+                   vec_inside_cost);
+      dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
+                   vec_prologue_cost);
+      dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
+                   vec_epilogue_cost);
+      dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
+                   scalar_single_iter_cost);
+      dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
+                   scalar_outside_cost);
+      dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
+                   vec_outside_cost);
+      dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
+                   peel_iters_prologue);
+      dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
+                   peel_iters_epilogue);
+    }
+
   /* Calculate number of iterations required to make the vector version
      profitable, relative to the loop bodies only.  The following condition
      must hold true:
@@ -3037,30 +3058,9 @@  vect_estimate_min_profitable_iters (loop
       return;
     }
 
-  if (dump_enabled_p ())
-    {
-      dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
-      dump_printf (MSG_NOTE, "  Vector inside of loop cost: %d\n",
-                   vec_inside_cost);
-      dump_printf (MSG_NOTE, "  Vector prologue cost: %d\n",
-                   vec_prologue_cost);
-      dump_printf (MSG_NOTE, "  Vector epilogue cost: %d\n",
-                   vec_epilogue_cost);
-      dump_printf (MSG_NOTE, "  Scalar iteration cost: %d\n",
-                   scalar_single_iter_cost);
-      dump_printf (MSG_NOTE, "  Scalar outside cost: %d\n",
-                   scalar_outside_cost);
-      dump_printf (MSG_NOTE, "  Vector outside cost: %d\n",
-                   vec_outside_cost);
-      dump_printf (MSG_NOTE, "  prologue iterations: %d\n",
-                   peel_iters_prologue);
-      dump_printf (MSG_NOTE, "  epilogue iterations: %d\n",
-                   peel_iters_epilogue);
-      dump_printf (MSG_NOTE,
-                   "  Calculated minimum iters for profitability: %d\n",
-                   min_profitable_iters);
-      dump_printf (MSG_NOTE, "\n");
-    }
+  dump_printf (MSG_NOTE,
+	       "  Calculated minimum iters for profitability: %d\n",
+	       min_profitable_iters);
 
   min_profitable_iters =
 	min_profitable_iters < vf ? vf : min_profitable_iters;
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	(revision 220205)
+++ gcc/tree-vect-data-refs.c	(working copy)
@@ -1763,9 +1763,6 @@  vect_enhance_data_refs_alignment (loop_v
 
       if (do_peeling)
         {
-	  stmt_info_for_cost *si;
-	  void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
-
           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
              If the misalignment of DR_i is identical to that of dr0 then set
              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
@@ -1791,20 +1788,10 @@  vect_enhance_data_refs_alignment (loop_v
               dump_printf_loc (MSG_NOTE, vect_location,
                                "Peeling for alignment will be applied.\n");
             }
-	  /* We've delayed passing the inside-loop peeling costs to the
-	     target cost model until we were sure peeling would happen.
-	     Do so now.  */
-	  if (body_cost_vec.exists ())
-	    {
-	      FOR_EACH_VEC_ELT (body_cost_vec, i, si)
-		{
-		  struct _stmt_vec_info *stmt_info
-		    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
-		  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
-					si->misalign, vect_body);
-		}
-	      body_cost_vec.release ();
-	    }
+	  /* The inside-loop cost will be accounted for in vectorizable_load
+	     and vectorizable_store correctly with adjusted alignments.
+	     Drop the body_cst_vec on the floor here.  */
+	  body_cost_vec.release ();
 
 	  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
 	  gcc_assert (stat);
Index: gcc/testsuite/gcc.dg/vect/pr64844.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr64844.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vect/pr64844.c	(revision 0)
@@ -0,0 +1,52 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-additional-options "-ffast-math" } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+typedef __SIZE_TYPE__ size_t;
+
+static double
+compute(size_t n, double const * __restrict a, double const * __restrict b)
+{
+  double res = 0.0;
+  size_t i;
+  for (i = 0; i < n; ++i)
+    res += a[i] + b[i];
+  return res;
+}
+
+void init(double *, double *);
+
+int
+main()
+{
+  double ary1[1024];
+  double ary2[1024];
+  size_t i;
+
+  check_vect ();
+
+  // Initialize arrays
+  for (i = 0; i < 1024; ++i)
+    {
+      ary1[i] = 1 / (double)(i + 1);
+      ary2[i] = 1 + 1 / (double) (i + 1);
+      __asm__ volatile ("" : : : "memory");
+    }
+
+  // Compute two results using different starting elements
+  if ((int) compute (512, &ary1[0], &ary2[0]) != 525
+      || (int) compute(512, &ary1[1], &ary2[1]) != 523)
+    abort ();
+
+  return 0;
+}
+
+/* All targets should allow vectorizing this by some means of
+   dealing with the known misalignment in loop 2.  */
+
+/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */