Patchwork [gomp4,trunk] Fix up simd loop handling (PR tree-optimization/58392)

login
register
mail settings
Submitter Jakub Jelinek
Date Sept. 12, 2013, 2:36 p.m.
Message ID <20130912143611.GW1817@tucnak.redhat.com>
Download mbox | patch
Permalink /patch/274562/
State New
Headers show

Comments

Jakub Jelinek - Sept. 12, 2013, 2:36 p.m.
Hi!

This patch fixes PR58392 and some related issues I've discovered.
move_sese_region_to_fn wasn't remapping loop->simduid VAR_DECL,
and wasn't setting dest_cfun->has_{simduid,force_vect}_loops
when needed, the inliner wasn't copying/remapping simduid nor
force_vect (just conservatively setting cfun->has_{simduid,force_vect}_loops
if src_cfun had those set) and omp-low.c didn't handle outer var refs in
simd loops nested in non-parallel/task contexts well.
Tested on x86_64-linux.  Ok for gomp-4_0-branch and trunk (just the part
without the testcase for now, because Aldy's backport of the stuff this
is fixing is already on the trunk)?

2013-09-12  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/58392
	* tree-cfg.c (move_sese_region_to_fn): Rename loop variable
	to avoid shadowing of outer loop variable.  If
	saved_cfun->has_simduid_loops or saved_cfun->has_force_vect_loops,
	replace_by_duplicate_decl simduid of loops that have it set and
	set dest_cfun->has_simduid_loops and/or
	dest_cfun->has_force_vect_loops.
	* omp-low.c (build_outer_var_ref): Call maybe_lookup_decl_in_outer_ctx
	instead of maybe_lookup_decl.
	* tree-inline.c (copy_loops): Change blocks_to_copy argument to id.
	Use id->blocks_to_copy instead of blocks_to_copy.  Adjust recursive
	call.  Copy over force_vect and copy and remap simduid.  Set
	cfun->has_simduid_loops and/or cfun->has_force_vect_loops.
	(copy_cfg_body): Remove blocks_to_copy argument.  Use
	id->blocks_to_copy instead of blocks_to_copy.  Adjust copy_loops
	caller.  Don't set cfun->has_simduid_loops and/or
	cfun->has_force_vect_loops here.
	(copy_body): Remove blocks_to_copy argument.  Adjust copy_cfg_body
	caller.
	(expand_call_inline, tree_function_versioning): Adjust copy_body
	callers.

	* testsuite/libgomp.c/pr58392.c: New test.


	Jakub
Richard Guenther - Sept. 13, 2013, 7:34 a.m.
On Thu, 12 Sep 2013, Jakub Jelinek wrote:

> Hi!
> 
> This patch fixes PR58392 and some related issues I've discovered.
> move_sese_region_to_fn wasn't remapping loop->simduid VAR_DECL,
> and wasn't setting dest_cfun->has_{simduid,force_vect}_loops
> when needed, the inliner wasn't copying/remapping simduid nor
> force_vect (just conservatively setting cfun->has_{simduid,force_vect}_loops
> if src_cfun had those set) and omp-low.c didn't handle outer var refs in
> simd loops nested in non-parallel/task contexts well.
> Tested on x86_64-linux.  Ok for gomp-4_0-branch and trunk (just the part
> without the testcase for now, because Aldy's backport of the stuff this
> is fixing is already on the trunk)?

Ok.

Thanks,
Richard.

> 2013-09-12  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/58392
> 	* tree-cfg.c (move_sese_region_to_fn): Rename loop variable
> 	to avoid shadowing of outer loop variable.  If
> 	saved_cfun->has_simduid_loops or saved_cfun->has_force_vect_loops,
> 	replace_by_duplicate_decl simduid of loops that have it set and
> 	set dest_cfun->has_simduid_loops and/or
> 	dest_cfun->has_force_vect_loops.
> 	* omp-low.c (build_outer_var_ref): Call maybe_lookup_decl_in_outer_ctx
> 	instead of maybe_lookup_decl.
> 	* tree-inline.c (copy_loops): Change blocks_to_copy argument to id.
> 	Use id->blocks_to_copy instead of blocks_to_copy.  Adjust recursive
> 	call.  Copy over force_vect and copy and remap simduid.  Set
> 	cfun->has_simduid_loops and/or cfun->has_force_vect_loops.
> 	(copy_cfg_body): Remove blocks_to_copy argument.  Use
> 	id->blocks_to_copy instead of blocks_to_copy.  Adjust copy_loops
> 	caller.  Don't set cfun->has_simduid_loops and/or
> 	cfun->has_force_vect_loops here.
> 	(copy_body): Remove blocks_to_copy argument.  Adjust copy_cfg_body
> 	caller.
> 	(expand_call_inline, tree_function_versioning): Adjust copy_body
> 	callers.
> 
> 	* testsuite/libgomp.c/pr58392.c: New test.
> 
> --- gcc/tree-cfg.c.jj	2013-09-05 09:19:03.000000000 +0200
> +++ gcc/tree-cfg.c	2013-09-12 15:19:40.103267115 +0200
> @@ -6777,10 +6777,10 @@ move_sese_region_to_fn (struct function
>        if (bb->loop_father->header == bb
>  	  && loop_outer (bb->loop_father) == loop)
>  	{
> -	  struct loop *loop = bb->loop_father;
> +	  struct loop *this_loop = bb->loop_father;
>  	  flow_loop_tree_node_remove (bb->loop_father);
> -	  flow_loop_tree_node_add (get_loop (dest_cfun, 0), loop);
> -	  fixup_loop_arrays_after_move (saved_cfun, cfun, loop);
> +	  flow_loop_tree_node_add (get_loop (dest_cfun, 0), this_loop);
> +	  fixup_loop_arrays_after_move (saved_cfun, cfun, this_loop);
>  	}
>  
>        /* Remove loop exits from the outlined region.  */
> @@ -6835,6 +6835,23 @@ move_sese_region_to_fn (struct function
>         outer; outer = loop_outer (outer))
>      outer->num_nodes -= bbs.length ();
>  
> +  if (saved_cfun->has_simduid_loops || saved_cfun->has_force_vect_loops)
> +    {
> +      struct loop *aloop;
> +      for (i = 0; vec_safe_iterate (loops->larray, i, &aloop); i++)
> +	if (aloop != NULL)
> +	  {
> +	    if (aloop->simduid)
> +	      {
> +		replace_by_duplicate_decl (&aloop->simduid, d.vars_map,
> +					   d.to_context);
> +		dest_cfun->has_simduid_loops = true;
> +	      }
> +	    if (aloop->force_vect)
> +	      dest_cfun->has_force_vect_loops = true;
> +	  }
> +    }
> +
>    /* Rewire BLOCK_SUBBLOCKS of orig_block.  */
>    if (orig_block)
>      {
> --- gcc/omp-low.c.jj	2013-09-11 18:19:39.000000000 +0200
> +++ gcc/omp-low.c	2013-09-12 13:55:34.081148580 +0200
> @@ -976,7 +976,7 @@ build_outer_var_ref (tree var, omp_conte
>        if (ctx->outer && is_taskreg_ctx (ctx))
>  	x = lookup_decl (var, ctx->outer);
>        else if (ctx->outer)
> -	x = maybe_lookup_decl (var, ctx->outer);
> +	x = maybe_lookup_decl_in_outer_ctx (var, ctx);
>        if (x == NULL_TREE)
>  	x = var;
>      }
> --- gcc/tree-inline.c.jj	2013-08-27 20:53:28.000000000 +0200
> +++ gcc/tree-inline.c	2013-09-12 16:05:58.348124256 +0200
> @@ -2254,14 +2254,14 @@ maybe_move_debug_stmts_to_successors (co
>     as siblings of DEST_PARENT.  */
>  
>  static void
> -copy_loops (bitmap blocks_to_copy,
> +copy_loops (copy_body_data *id,
>  	    struct loop *dest_parent, struct loop *src_parent)
>  {
>    struct loop *src_loop = src_parent->inner;
>    while (src_loop)
>      {
> -      if (!blocks_to_copy
> -	  || bitmap_bit_p (blocks_to_copy, src_loop->header->index))
> +      if (!id->blocks_to_copy
> +	  || bitmap_bit_p (id->blocks_to_copy, src_loop->header->index))
>  	{
>  	  struct loop *dest_loop = alloc_loop ();
>  
> @@ -2285,8 +2285,19 @@ copy_loops (bitmap blocks_to_copy,
>  	  place_new_loop (cfun, dest_loop);
>  	  flow_loop_tree_node_add (dest_parent, dest_loop);
>  
> +	  if (src_loop->simduid)
> +	    {
> +	      dest_loop->simduid = remap_decl (src_loop->simduid, id);
> +	      cfun->has_simduid_loops = true;
> +	    }
> +	  if (src_loop->force_vect)
> +	    {
> +	      dest_loop->force_vect = true;
> +	      cfun->has_force_vect_loops = true;
> +	    }
> +
>  	  /* Recurse.  */
> -	  copy_loops (blocks_to_copy, dest_loop, src_loop);
> +	  copy_loops (id, dest_loop, src_loop);
>  	}
>        src_loop = src_loop->next;
>      }
> @@ -2315,7 +2326,7 @@ redirect_all_calls (copy_body_data * id,
>  static tree
>  copy_cfg_body (copy_body_data * id, gcov_type count, int frequency_scale,
>  	       basic_block entry_block_map, basic_block exit_block_map,
> -	       bitmap blocks_to_copy, basic_block new_entry)
> +	       basic_block new_entry)
>  {
>    tree callee_fndecl = id->src_fn;
>    /* Original cfun for the callee, doesn't change.  */
> @@ -2379,7 +2390,7 @@ copy_cfg_body (copy_body_data * id, gcov
>  
>    /* Use aux pointers to map the original blocks to copy.  */
>    FOR_EACH_BB_FN (bb, cfun_to_copy)
> -    if (!blocks_to_copy || bitmap_bit_p (blocks_to_copy, bb->index))
> +    if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index))
>        {
>  	basic_block new_bb = copy_bb (id, bb, frequency_scale, count_scale);
>  	bb->aux = new_bb;
> @@ -2393,8 +2404,8 @@ copy_cfg_body (copy_body_data * id, gcov
>    bool can_make_abormal_goto
>      = id->gimple_call && stmt_can_make_abnormal_goto (id->gimple_call);
>    FOR_ALL_BB_FN (bb, cfun_to_copy)
> -    if (!blocks_to_copy
> -        || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
> +    if (!id->blocks_to_copy
> +	|| (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
>        need_debug_cleanup |= copy_edges_for_bb (bb, count_scale, exit_block_map,
>  					       can_make_abormal_goto);
>  
> @@ -2409,12 +2420,10 @@ copy_cfg_body (copy_body_data * id, gcov
>    if (loops_for_fn (src_cfun) != NULL
>        && current_loops != NULL)
>      {
> -      copy_loops (blocks_to_copy, entry_block_map->loop_father,
> +      copy_loops (id, entry_block_map->loop_father,
>  		  get_loop (src_cfun, 0));
>        /* Defer to cfgcleanup to update loop-father fields of basic-blocks.  */
>        loops_state_set (LOOPS_NEED_FIXUP);
> -      cfun->has_force_vect_loops |= src_cfun->has_force_vect_loops;
> -      cfun->has_simduid_loops |= src_cfun->has_simduid_loops;
>      }
>  
>    /* If the loop tree in the source function needed fixup, mark the
> @@ -2424,8 +2433,8 @@ copy_cfg_body (copy_body_data * id, gcov
>  
>    if (gimple_in_ssa_p (cfun))
>      FOR_ALL_BB_FN (bb, cfun_to_copy)
> -      if (!blocks_to_copy
> -	  || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
> +      if (!id->blocks_to_copy
> +	  || (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
>  	copy_phis_for_bb (bb, id);
>  
>    FOR_ALL_BB_FN (bb, cfun_to_copy)
> @@ -2597,7 +2606,7 @@ copy_tree_body (copy_body_data *id)
>  static tree
>  copy_body (copy_body_data *id, gcov_type count, int frequency_scale,
>  	   basic_block entry_block_map, basic_block exit_block_map,
> -	   bitmap blocks_to_copy, basic_block new_entry)
> +	   basic_block new_entry)
>  {
>    tree fndecl = id->src_fn;
>    tree body;
> @@ -2605,7 +2614,7 @@ copy_body (copy_body_data *id, gcov_type
>    /* If this body has a CFG, walk CFG and copy.  */
>    gcc_assert (ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (fndecl)));
>    body = copy_cfg_body (id, count, frequency_scale, entry_block_map, exit_block_map,
> -		        blocks_to_copy, new_entry);
> +			new_entry);
>    copy_debug_stmts (id);
>  
>    return body;
> @@ -4209,7 +4218,7 @@ expand_call_inline (basic_block bb, gimp
>       duplicate our body before altering anything.  */
>    copy_body (id, bb->count,
>    	     GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE),
> -	     bb, return_block, NULL, NULL);
> +	     bb, return_block, NULL);
>  
>    /* Reset the escaped solution.  */
>    if (cfun->gimple_df)
> @@ -5336,7 +5345,7 @@ tree_function_versioning (tree old_decl,
>  
>    /* Copy the Function's body.  */
>    copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE,
> -	     ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, blocks_to_copy, new_entry);
> +	     ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, new_entry);
>  
>    /* Renumber the lexical scoping (non-code) blocks consecutively.  */
>    number_blocks (new_decl);
> --- libgomp/testsuite/libgomp.c/pr58392.c.jj	2013-09-12 16:09:10.493065328 +0200
> +++ libgomp/testsuite/libgomp.c/pr58392.c	2013-09-12 16:08:57.000000000 +0200
> @@ -0,0 +1,58 @@
> +/* PR tree-optimization/58392 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-msse2" { target sse2_runtime } } */
> +/* { dg-additional-options "-mavx" { target avx_runtime } } */
> +
> +extern void abort (void);
> +int d[32 * 32];
> +
> +__attribute__((noinline, noclone)) int
> +foo (int a, int b)
> +{
> +  int j, c = 0;
> +  #pragma omp parallel for reduction(+: c)
> +    for (j = 0; j < a; j += 32)
> +      {
> +	int l;
> +	#pragma omp simd reduction(+: c)
> +	  for (l = 0; l < b; ++l)
> +	    c += d[j + l];
> +      }
> +  return c;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +bar (int a)
> +{
> +  int j, c = 0;
> +  #pragma omp parallel for simd reduction(+: c)
> +    for (j = 0; j < a; ++j)
> +      c += d[j];
> +  return c;
> +}
> +
> +__attribute__((noinline)) static int
> +baz (int a)
> +{
> +  int j, c = 0;
> +  #pragma omp simd reduction(+: c)
> +    for (j = 0; j < a; ++j)
> +      c += d[j];
> +  return c;
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +  for (i = 0; i < 32 * 32; i++)
> +    d[i] = (i & 31);
> +  if (foo (32 * 32, 32) != (31 * 32 / 2) * 32)
> +    abort ();
> +  if (bar (32 * 32) != (31 * 32 / 2) * 32)
> +    abort ();
> +  if (baz (32 * 32) != (31 * 32 / 2) * 32)
> +    abort ();
> +  return 0;
> +}
> 
> 	Jakub
> 
>

Patch

--- gcc/tree-cfg.c.jj	2013-09-05 09:19:03.000000000 +0200
+++ gcc/tree-cfg.c	2013-09-12 15:19:40.103267115 +0200
@@ -6777,10 +6777,10 @@  move_sese_region_to_fn (struct function
       if (bb->loop_father->header == bb
 	  && loop_outer (bb->loop_father) == loop)
 	{
-	  struct loop *loop = bb->loop_father;
+	  struct loop *this_loop = bb->loop_father;
 	  flow_loop_tree_node_remove (bb->loop_father);
-	  flow_loop_tree_node_add (get_loop (dest_cfun, 0), loop);
-	  fixup_loop_arrays_after_move (saved_cfun, cfun, loop);
+	  flow_loop_tree_node_add (get_loop (dest_cfun, 0), this_loop);
+	  fixup_loop_arrays_after_move (saved_cfun, cfun, this_loop);
 	}
 
       /* Remove loop exits from the outlined region.  */
@@ -6835,6 +6835,23 @@  move_sese_region_to_fn (struct function
        outer; outer = loop_outer (outer))
     outer->num_nodes -= bbs.length ();
 
+  if (saved_cfun->has_simduid_loops || saved_cfun->has_force_vect_loops)
+    {
+      struct loop *aloop;
+      for (i = 0; vec_safe_iterate (loops->larray, i, &aloop); i++)
+	if (aloop != NULL)
+	  {
+	    if (aloop->simduid)
+	      {
+		replace_by_duplicate_decl (&aloop->simduid, d.vars_map,
+					   d.to_context);
+		dest_cfun->has_simduid_loops = true;
+	      }
+	    if (aloop->force_vect)
+	      dest_cfun->has_force_vect_loops = true;
+	  }
+    }
+
   /* Rewire BLOCK_SUBBLOCKS of orig_block.  */
   if (orig_block)
     {
--- gcc/omp-low.c.jj	2013-09-11 18:19:39.000000000 +0200
+++ gcc/omp-low.c	2013-09-12 13:55:34.081148580 +0200
@@ -976,7 +976,7 @@  build_outer_var_ref (tree var, omp_conte
       if (ctx->outer && is_taskreg_ctx (ctx))
 	x = lookup_decl (var, ctx->outer);
       else if (ctx->outer)
-	x = maybe_lookup_decl (var, ctx->outer);
+	x = maybe_lookup_decl_in_outer_ctx (var, ctx);
       if (x == NULL_TREE)
 	x = var;
     }
--- gcc/tree-inline.c.jj	2013-08-27 20:53:28.000000000 +0200
+++ gcc/tree-inline.c	2013-09-12 16:05:58.348124256 +0200
@@ -2254,14 +2254,14 @@  maybe_move_debug_stmts_to_successors (co
    as siblings of DEST_PARENT.  */
 
 static void
-copy_loops (bitmap blocks_to_copy,
+copy_loops (copy_body_data *id,
 	    struct loop *dest_parent, struct loop *src_parent)
 {
   struct loop *src_loop = src_parent->inner;
   while (src_loop)
     {
-      if (!blocks_to_copy
-	  || bitmap_bit_p (blocks_to_copy, src_loop->header->index))
+      if (!id->blocks_to_copy
+	  || bitmap_bit_p (id->blocks_to_copy, src_loop->header->index))
 	{
 	  struct loop *dest_loop = alloc_loop ();
 
@@ -2285,8 +2285,19 @@  copy_loops (bitmap blocks_to_copy,
 	  place_new_loop (cfun, dest_loop);
 	  flow_loop_tree_node_add (dest_parent, dest_loop);
 
+	  if (src_loop->simduid)
+	    {
+	      dest_loop->simduid = remap_decl (src_loop->simduid, id);
+	      cfun->has_simduid_loops = true;
+	    }
+	  if (src_loop->force_vect)
+	    {
+	      dest_loop->force_vect = true;
+	      cfun->has_force_vect_loops = true;
+	    }
+
 	  /* Recurse.  */
-	  copy_loops (blocks_to_copy, dest_loop, src_loop);
+	  copy_loops (id, dest_loop, src_loop);
 	}
       src_loop = src_loop->next;
     }
@@ -2315,7 +2326,7 @@  redirect_all_calls (copy_body_data * id,
 static tree
 copy_cfg_body (copy_body_data * id, gcov_type count, int frequency_scale,
 	       basic_block entry_block_map, basic_block exit_block_map,
-	       bitmap blocks_to_copy, basic_block new_entry)
+	       basic_block new_entry)
 {
   tree callee_fndecl = id->src_fn;
   /* Original cfun for the callee, doesn't change.  */
@@ -2379,7 +2390,7 @@  copy_cfg_body (copy_body_data * id, gcov
 
   /* Use aux pointers to map the original blocks to copy.  */
   FOR_EACH_BB_FN (bb, cfun_to_copy)
-    if (!blocks_to_copy || bitmap_bit_p (blocks_to_copy, bb->index))
+    if (!id->blocks_to_copy || bitmap_bit_p (id->blocks_to_copy, bb->index))
       {
 	basic_block new_bb = copy_bb (id, bb, frequency_scale, count_scale);
 	bb->aux = new_bb;
@@ -2393,8 +2404,8 @@  copy_cfg_body (copy_body_data * id, gcov
   bool can_make_abormal_goto
     = id->gimple_call && stmt_can_make_abnormal_goto (id->gimple_call);
   FOR_ALL_BB_FN (bb, cfun_to_copy)
-    if (!blocks_to_copy
-        || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
+    if (!id->blocks_to_copy
+	|| (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
       need_debug_cleanup |= copy_edges_for_bb (bb, count_scale, exit_block_map,
 					       can_make_abormal_goto);
 
@@ -2409,12 +2420,10 @@  copy_cfg_body (copy_body_data * id, gcov
   if (loops_for_fn (src_cfun) != NULL
       && current_loops != NULL)
     {
-      copy_loops (blocks_to_copy, entry_block_map->loop_father,
+      copy_loops (id, entry_block_map->loop_father,
 		  get_loop (src_cfun, 0));
       /* Defer to cfgcleanup to update loop-father fields of basic-blocks.  */
       loops_state_set (LOOPS_NEED_FIXUP);
-      cfun->has_force_vect_loops |= src_cfun->has_force_vect_loops;
-      cfun->has_simduid_loops |= src_cfun->has_simduid_loops;
     }
 
   /* If the loop tree in the source function needed fixup, mark the
@@ -2424,8 +2433,8 @@  copy_cfg_body (copy_body_data * id, gcov
 
   if (gimple_in_ssa_p (cfun))
     FOR_ALL_BB_FN (bb, cfun_to_copy)
-      if (!blocks_to_copy
-	  || (bb->index > 0 && bitmap_bit_p (blocks_to_copy, bb->index)))
+      if (!id->blocks_to_copy
+	  || (bb->index > 0 && bitmap_bit_p (id->blocks_to_copy, bb->index)))
 	copy_phis_for_bb (bb, id);
 
   FOR_ALL_BB_FN (bb, cfun_to_copy)
@@ -2597,7 +2606,7 @@  copy_tree_body (copy_body_data *id)
 static tree
 copy_body (copy_body_data *id, gcov_type count, int frequency_scale,
 	   basic_block entry_block_map, basic_block exit_block_map,
-	   bitmap blocks_to_copy, basic_block new_entry)
+	   basic_block new_entry)
 {
   tree fndecl = id->src_fn;
   tree body;
@@ -2605,7 +2614,7 @@  copy_body (copy_body_data *id, gcov_type
   /* If this body has a CFG, walk CFG and copy.  */
   gcc_assert (ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (fndecl)));
   body = copy_cfg_body (id, count, frequency_scale, entry_block_map, exit_block_map,
-		        blocks_to_copy, new_entry);
+			new_entry);
   copy_debug_stmts (id);
 
   return body;
@@ -4209,7 +4218,7 @@  expand_call_inline (basic_block bb, gimp
      duplicate our body before altering anything.  */
   copy_body (id, bb->count,
   	     GCOV_COMPUTE_SCALE (cg_edge->frequency, CGRAPH_FREQ_BASE),
-	     bb, return_block, NULL, NULL);
+	     bb, return_block, NULL);
 
   /* Reset the escaped solution.  */
   if (cfun->gimple_df)
@@ -5336,7 +5345,7 @@  tree_function_versioning (tree old_decl,
 
   /* Copy the Function's body.  */
   copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE,
-	     ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, blocks_to_copy, new_entry);
+	     ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, new_entry);
 
   /* Renumber the lexical scoping (non-code) blocks consecutively.  */
   number_blocks (new_decl);
--- libgomp/testsuite/libgomp.c/pr58392.c.jj	2013-09-12 16:09:10.493065328 +0200
+++ libgomp/testsuite/libgomp.c/pr58392.c	2013-09-12 16:08:57.000000000 +0200
@@ -0,0 +1,58 @@ 
+/* PR tree-optimization/58392 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+extern void abort (void);
+int d[32 * 32];
+
+__attribute__((noinline, noclone)) int
+foo (int a, int b)
+{
+  int j, c = 0;
+  #pragma omp parallel for reduction(+: c)
+    for (j = 0; j < a; j += 32)
+      {
+	int l;
+	#pragma omp simd reduction(+: c)
+	  for (l = 0; l < b; ++l)
+	    c += d[j + l];
+      }
+  return c;
+}
+
+__attribute__((noinline, noclone)) int
+bar (int a)
+{
+  int j, c = 0;
+  #pragma omp parallel for simd reduction(+: c)
+    for (j = 0; j < a; ++j)
+      c += d[j];
+  return c;
+}
+
+__attribute__((noinline)) static int
+baz (int a)
+{
+  int j, c = 0;
+  #pragma omp simd reduction(+: c)
+    for (j = 0; j < a; ++j)
+      c += d[j];
+  return c;
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 32 * 32; i++)
+    d[i] = (i & 31);
+  if (foo (32 * 32, 32) != (31 * 32 / 2) * 32)
+    abort ();
+  if (bar (32 * 32) != (31 * 32 / 2) * 32)
+    abort ();
+  if (baz (32 * 32) != (31 * 32 / 2) * 32)
+    abort ();
+  return 0;
+}