Patchwork Fix epilogue for double reduction vectorization

login
register
mail settings
Submitter Ira Rosen
Date July 29, 2010, 6:03 a.m.
Message ID <OF4E6E8ABD.1D4F01EA-ONC225776E.00372366-C225776F.002149BB@il.ibm.com>
Download mbox | patch
Permalink /patch/60194/
State New
Headers show

Comments

Ira Rosen - July 29, 2010, 6:03 a.m.
Hi,

When vectorizing double reductions:

sum = 0
for i
  {
    for j
      sum +=...
  }
use sum

scalar extraction should be done on the exit from the outer loop:

vsum = 0
for vi
  {
    for vj
      vsum += ...
  }
scalar_sum = reduce_vector_to_scalar (vsum)
use scalar_sum

And not inside the outer loop as it is done now (the code is also correct
but not optimized). This patch fixes this problem.

Bootstrapped and tested on x86_64-suse-linux.
Committed.

Ira

ChangeLog:

	* tree-vect-loop.c (vect_create_epilog_for_reduction): Switch
	to outer loop when creating reduction epilogue for double reduction,
	and switch back to the inner loop when updating the phi nodes.
	Update uses of outer loop exit phi nodes in double reduction (instead
	of uses of reduction).

create:
@@ -3738,7 +3749,45 @@ vect_finalize_reduction:
                     }
                 }
             }
+        }

+      VEC_free (gimple, heap, phis);
+      if (nested_in_vect_loop)
+        {
+          if (double_reduc)
+            loop = outer_loop;
+          else
+            continue;
+        }
+
+      phis = VEC_alloc (gimple, heap, 3);
+      /* Find the loop-closed-use at the loop exit of the original scalar
+         result. (The reduction result is expected to have two immediate
uses -
+         one at the latch block, and one at the loop exit). For double
+         reductions we are looking for exit phis of the outer loop.  */
+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
+        {
+          if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
+            VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
+          else
+            {
+              if (double_reduc && gimple_code (USE_STMT (use_p)) ==
GIMPLE_PHI)
+                {
+                  tree phi_res = PHI_RESULT (USE_STMT (use_p));
+
+                  FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
+                    {
+                      if (!flow_bb_inside_loop_p (loop,
+                                             gimple_bb (USE_STMT
(phi_use_p))))
+                        VEC_safe_push (gimple, heap, phis,
+                                       USE_STMT (phi_use_p));
+                    }
+                }
+            }
+        }
+
+      for (i = 0; VEC_iterate (gimple, phis, i, exit_phi); i++)
+        {
           /* Replace the uses:  */
           orig_name = PHI_RESULT (exit_phi);
           scalar_result = VEC_index (tree, scalar_results, k);

Patch

Index: tree-vect-loop.c
===================================================================
--- tree-vect-loop.c    (revision 162620)
+++ tree-vect-loop.c    (working copy)
@@ -3101,8 +3101,8 @@  vect_create_epilog_for_reduction (VEC (t
   tree vec_initial_def = NULL;
   tree reduction_op, expr, def;
   tree orig_name, scalar_result;
-  imm_use_iterator imm_iter;
-  use_operand_p use_p;
+  imm_use_iterator imm_iter, phi_imm_iter;
+  use_operand_p use_p, phi_use_p;
   bool extract_scalar_result = false;
   gimple use_stmt, orig_stmt, reduction_phi = NULL;
   bool nested_in_vect_loop = false;
@@ -3264,6 +3264,14 @@  vect_create_epilog_for_reduction (VEC (t
         }
     }

+  /* The epilogue is created for the outer-loop, i.e., for the loop being
+     vectorized.  */
+  if (double_reduc)
+    {
+      loop = outer_loop;
+      exit_bb = single_exit (loop)->dest;
+    }
+
   exit_gsi = gsi_after_labels (exit_bb);

   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
@@ -3519,6 +3527,9 @@  vect_create_epilog_for_reduction (VEC (t

 vect_finalize_reduction:

+  if (double_reduc)
+    loop = loop->inner;
+
   /* 2.5 Adjust the final result by the initial value of the reduction
         variable. (When such adjustment is not needed, then
         'adjustment_def' is zero).  For example, if code is PLUS we