Patchwork [fortran] Fix PR 50690

login
register
mail settings
Submitter Thomas Koenig
Date Dec. 8, 2011, 7:48 p.m.
Message ID <4EE1148F.8000408@netcologne.de>
Download mbox | patch
Permalink /patch/130218/
State New
Headers show

Comments

Thomas Koenig - Dec. 8, 2011, 7:48 p.m.
Hello world,

this is what I hope is the final round of the OMP front-end optimization
patch.  This one ignores outer workshares when doing function
elimination within omp do and similar blocks.

Regression-tested.  OK for trunk?

	Thomas


2011-12-02  Thomas Koenig  <tkoenig@gcc.gnu.org>

         PR fortran/50690
         * frontend-passes.c (omp_level):  New variable.
         (omp_size):  New variable.
         (omp_block):  New variable.
         (gfc_run_passes):  Allocate and deallocate omp_block, set
         omp_size.
         (cfe_expr_0):  Don't eliminiate common function if it would put
         the variable immediately into a WORKSHARE construct.
         (optimize_namespace):  Set omp_level.
         (gfc_code_walker):  Keep track of OMP PARALLEL and OMP WORKSHARE
         constructs.

2011-12-02  Thomas Koenig  <tkoenig@gcc.gnu.org>

         PR fortran/50690
         * gfortran.dg/gomp/workshare2.f90:  New test.
         * gfortran.dg/gomp/workshare3.f90:  New test.
! { dg-do compile }
! { dg-options "-ffrontend-optimize -fdump-tree-original" }
! Test that common function elimination is done within the OMP parallel
! blocks even if there is a workshare around it.
program foo
  implicit none
  integer, parameter :: n = 100000000
  real, parameter :: eps = 3e-7
  integer :: i,j
  real :: A(n), B(5), C(n)
  real :: tmp
  B(1) = 3.344
  tmp = B(1)
  do i=1,10
     call random_number(a)
     c = a
     !$omp parallel workshare
     !$omp parallel default(shared)
     !$omp do
     do j=1,n
       A(j) = A(j)*cos(B(1))+A(j)*cos(B(1))
     end do
     !$omp end do
     !$omp end parallel
     !$omp end parallel workshare
  end do

  c = c*cos(b(1))+ c*cos(b(1))

  do j=1,n
     if (abs(a(j)-c(j)) > eps) then
        print *,1,j,a(j), c(j)
        call abort
     end if
  end do

end program foo
! { dg-final { scan-tree-dump-times "__builtin_cosf" 2 "original" } }
! { dg-final { cleanup-tree-dump "original" } }
! { dg-do compile }
! { dg-options "-ffrontend-optimize -fdump-tree-original" }
! PR 50690 - this used to ICE because workshare could not handle
! BLOCKs.
! To test for correct execution, run this program (but don't forget
! to unset the stack limit).
program foo
  implicit none
  integer, parameter :: n = 100000000
  real, parameter :: eps = 3e-7
  integer :: i,j
  real :: A(n), B(5), C(n)
  real :: tmp
  B(1) = 3.344
  tmp = B(1)
  do i=1,10
     call random_number(a)
     c = a
     !$omp parallel default(shared)
     !$omp workshare
     A(:) = A(:)*cos(B(1))+A(:)*cos(B(1))
     !$omp end workshare nowait
     !$omp end parallel ! sync is implied here
  end do

  c = c*tmp + c*tmp

  do j=1,n
     if (abs(a(j)-c(j)) > eps) then
        print *,1,j,a(j), c(j)
        call abort
     end if
  end do

  do i=1,10
     call random_number(a)
     c = a
     !$omp parallel workshare default(shared)
     A(:) = A(:)*cos(B(1))+A(:)*cos(B(1))
     !$omp end parallel workshare
  end do

  c = c*tmp + c*tmp
  do j=1,n
     if (abs(a(j)-c(j)) > eps) then
        print *,2,j,a(j), c(j)
        call abort
     end if
  end do

end program foo
! { dg-final { scan-tree-dump-times "__var" 0 "original" } }
! { dg-final { cleanup-tree-dump "original" } }
Jakub Jelinek - Dec. 8, 2011, 8:02 p.m.
On Thu, Dec 08, 2011 at 08:48:31PM +0100, Thomas Koenig wrote:
>  /* Entry point - run all passes for a namespace.  So far, only an
>     optimization pass is run.  */
>  
> @@ -76,12 +83,15 @@ gfc_run_passes (gfc_namespace *ns)
>      {
>        expr_size = 20;
>        expr_array = XNEWVEC(gfc_expr **, expr_size);
> +      omp_size = 20;
> +      omp_block = XCNEWVEC(gfc_code *, omp_size);

Both of these arrays should be really vec.h vectors, it doesn't
make any sense to handcode the same thing everywhere.
You can then start with NULL vectors and push something using VEC_safe_push
only when needed and let it handle reallocation etc.

	Jakub
Thomas Koenig - Dec. 8, 2011, 8:36 p.m.
Hi Jakub,

> Both of these arrays should be really vec.h vectors, it doesn't
> make any sense to handcode the same thing everywhere.
> You can then start with NULL vectors and push something using VEC_safe_push
> only when needed and let it handle reallocation etc.

I tried that originally, but could not get it to work; getting the
macros right just didn't happen.

A cleanup patch would be OK, though.

	Thomas
Jakub Jelinek - Dec. 8, 2011, 9 p.m.
On Thu, Dec 08, 2011 at 09:36:13PM +0100, Thomas Koenig wrote:
> >Both of these arrays should be really vec.h vectors, it doesn't
> >make any sense to handcode the same thing everywhere.
> >You can then start with NULL vectors and push something using VEC_safe_push
> >only when needed and let it handle reallocation etc.
>
> I tried that originally, but could not get it to work; getting the
> macros right just didn't happen.

Untested:

#include "vec.h"
...
/* static int omp_level;
   static int omp_size;
   static gfc_code **omp_block; */
typedef gfc_code *gfc_codep;
DEF_VEC_P(gfc_codep);
DEF_VEC_ALLOC_P(gfc_codep,heap);
static VEC(gfc_codep, heap) *omp_block;
...
    /* omp_size = 20;
       omp_block = XCNEWVEC(gfc_code *, omp_size); - Just remove these, VEC_free clears omp_block.  */
...
    /* XDELETEVEC (omp_block); */
    VEC_free (gfc_codep, heap, omp_block);
...
  /* if (omp_level > 0)
       {
	 gfc_exec_op op;
	 op = omp_block[omp_level - 1]->op; */
  if (!VEC_empty (gfc_codep, omp_block))
    {
      gfc_exec_op op;
      op = VEC_last (gfc_codep, omp_block)->op;
...
  /* omp_level = 0; */
  VEC_truncate (gfc_codep, omp_block, 0);
...
   /* if (omp_level >= omp_size)
	{
	  omp_size += omp_size;
	  omp_block = XRESIZEVEC(gfc_code *, omp_block, omp_size);
	}

      omp_block[omp_level] = co;
      omp_level ++; */
    VEC_safe_push (gfc_codep, heap, omp_block, op);
...
  /* if (in_omp)
      omp_level --; */
  if (in_omp)
    VEC_pop (gfc_codep, omp_block);

	Jakub
Jakub Jelinek - Dec. 8, 2011, 9:57 p.m.
On Thu, Dec 08, 2011 at 08:48:31PM +0100, Thomas Koenig wrote:
> this is what I hope is the final round of the OMP front-end optimization
> patch.  This one ignores outer workshares when doing function
> elimination within omp do and similar blocks.

Sorry, stopped reading the patch details once noticing you are
reimplementing vec.h.  Reading it again, isn't it overkill to keep the
vector?  All you need is a bool and a way to restore its previous state.

static bool in_omp_workshare;

and in gfc_code_walker:

  bool save_in_omp_workshare = in_omp_workshare;
  switch (...)
    case EXEC_OMP_WORKSHARE:
    case EXEC_OMP_PARALLEL_WORKSHARE:
      in_omp_workshare = true;
      ...
    case EXEC_OMP_PARALLEL:
    case EXEC_OMP_PARALLEL_DO:
    case EXEC_OMP_PARALLEL_SECTIONS:
      in_omp_workshare = false;
      ...
  ...
  in_omp_workshare = save_in_omp_workshare;

That said, it would be nice if the other vector-ish array got vec.h-ized.

> ! { dg-do compile }
> ! { dg-options "-ffrontend-optimize -fdump-tree-original" }
> ! PR 50690 - this used to ICE because workshare could not handle
> ! BLOCKs.
> ! To test for correct execution, run this program (but don't forget
> ! to unset the stack limit).
> program foo
>   implicit none
>   integer, parameter :: n = 100000000

Why can't you use a reasonable size, like 100000?

	Jakub

Patch

Index: frontend-passes.c
===================================================================
--- frontend-passes.c	(Revision 181809)
+++ frontend-passes.c	(Arbeitskopie)
@@ -66,6 +66,13 @@  static gfc_namespace *current_ns;
 
 static int forall_level;
 
+/* Keep track of the OMP blocks, so we can mark variables introduced
+   by optimizations as private.  */
+
+static int omp_level;
+static int omp_size;
+static gfc_code **omp_block;
+
 /* Entry point - run all passes for a namespace.  So far, only an
    optimization pass is run.  */
 
@@ -76,12 +83,15 @@  gfc_run_passes (gfc_namespace *ns)
     {
       expr_size = 20;
       expr_array = XNEWVEC(gfc_expr **, expr_size);
+      omp_size = 20;
+      omp_block = XCNEWVEC(gfc_code *, omp_size);
 
       optimize_namespace (ns);
       if (gfc_option.dump_fortran_optimized)
 	gfc_dump_parse_tree (ns, stdout);
 
       XDELETEVEC (expr_array);
+      XDELETEVEC (omp_block);
     }
 }
 
@@ -367,6 +377,23 @@  cfe_expr_0 (gfc_expr **e, int *walk_subtrees,
   int i,j;
   gfc_expr *newvar;
 
+  /* If we are within an OMP WORKSHARE or OMP PARALLEL WORKSHARE
+     construct, don't do this optimization.  Only look at the
+     innermost level because an EXEC_OMP_PARALLEL{,_DO,_SECTIONS}
+     nested in an EXEC_OMP_WORKSHARE/EXEC_OMP_PARALLEL_WORKSHARE
+     is OK.  */
+  if (omp_level > 0)
+    {
+      gfc_exec_op op;
+      op = omp_block[omp_level - 1]->op;
+
+      if (op == EXEC_OMP_WORKSHARE || op == EXEC_OMP_PARALLEL_WORKSHARE)
+	{
+	  *walk_subtrees = 0;
+	  return 0;
+	}
+    }
+
   expr_count = 0;
 
   gfc_expr_walker (e, cfe_register_funcs, NULL);
@@ -505,6 +532,7 @@  optimize_namespace (gfc_namespace *ns)
 
   current_ns = ns;
   forall_level = 0;
+  omp_level = 0;
 
   gfc_code_walker (&ns->code, convert_do_while, dummy_expr_callback, NULL);
   gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
@@ -1150,11 +1178,13 @@  gfc_code_walker (gfc_code **c, walk_code_fn_t code
 	  gfc_actual_arglist *a;
 	  gfc_code *co;
 	  gfc_association_list *alist;
+	  bool in_omp;
 
 	  /* There might be statement insertions before the current code,
 	     which must not affect the expression walker.  */
 
 	  co = *c;
+	  in_omp = false;
 
 	  switch (co->op)
 	    {
@@ -1330,14 +1360,32 @@  gfc_code_walker (gfc_code **c, walk_code_fn_t code
 	      WALK_SUBEXPR (co->ext.dt->extra_comma);
 	      break;
 
-	    case EXEC_OMP_DO:
 	    case EXEC_OMP_PARALLEL:
 	    case EXEC_OMP_PARALLEL_DO:
 	    case EXEC_OMP_PARALLEL_SECTIONS:
 	    case EXEC_OMP_PARALLEL_WORKSHARE:
+	    case EXEC_OMP_WORKSHARE:
+
+	      /* Register all OMP PARALLEL and WORKSHARE constructs
+		 on a stack so they can be handled separately for
+		 common function elimination.  */
+
+	      in_omp = 1;
+
+	      if (omp_level >= omp_size)
+		{
+		  omp_size += omp_size;
+		  omp_block = XRESIZEVEC(gfc_code *, omp_block, omp_size);
+		}
+
+	      omp_block[omp_level] = co;
+	      omp_level ++;
+
+	      /* Fall through. */
+
 	    case EXEC_OMP_SECTIONS:
+	    case EXEC_OMP_DO:
 	    case EXEC_OMP_SINGLE:
-	    case EXEC_OMP_WORKSHARE:
 	    case EXEC_OMP_END_SINGLE:
 	    case EXEC_OMP_TASK:
 	      if (co->ext.omp_clauses)
@@ -1366,6 +1414,9 @@  gfc_code_walker (gfc_code **c, walk_code_fn_t code
 	  if (co->op == EXEC_FORALL)
 	    forall_level --;
 
+	  if (in_omp)
+	    omp_level --;
+
 	}
     }
   return 0;