diff mbox

Fix !$omp workshare (PR fortran/69128)

Message ID 20160107214926.GR18720@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Jan. 7, 2016, 9:49 p.m. UTC
Hi!

As the testcase shows, gfc_trans_scalarized_loop_end can be called
multiple times (and not just for different dimensions of the same loop)
on a single assignment, and we could in that case when inside of
!$omp workshare generate nested !$omp do, which is obviously incorrect.

Fixed by making sure we do it only in the toplevel loop nest generated from
the statement.  Bootstrapped/regtested on x86_64-linux and i686-linux, will
commit tomorrow.

2016-01-07  Jakub Jelinek  <jakub@redhat.com>

	PR fortran/69128
	* trans.h (OMPWS_SCALARIZER_BODY): Define.
	(OMPWS_NOWAIT): Renumber.
	* trans-stmt.c (gfc_trans_where_3): Only set OMPWS_SCALARIZER_WS
	if OMPWS_SCALARIZER_BODY is not set already, and set also
	OMPWS_SCALARIZER_BODY until the final loop creation.
	* trans-expr.c (gfc_trans_assignment_1): Likewise.
	* trans-openmp.c (gfc_trans_omp_workshare): Also clear
	OMPWS_SCALARIZER_BODY.
	* trans-array.c (gfc_trans_scalarized_loop_end): Don't create
	OMP_FOR if OMPWS_SCALARIZER_BODY is set.

	* gfortran.dg/gomp/pr69128.f90: New test.


	Jakub

Comments

Paul Richard Thomas Jan. 7, 2016, 10:05 p.m. UTC | #1
Hi Jakub,

Thanks for responding so quickly - it looks good to me.

The PR is marked as a 4.9/5/6 regression. Do you intend to backport to
5, at least?

Cheers

Paul

On 7 January 2016 at 22:49, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As the testcase shows, gfc_trans_scalarized_loop_end can be called
> multiple times (and not just for different dimensions of the same loop)
> on a single assignment, and we could in that case when inside of
> !$omp workshare generate nested !$omp do, which is obviously incorrect.
>
> Fixed by making sure we do it only in the toplevel loop nest generated from
> the statement.  Bootstrapped/regtested on x86_64-linux and i686-linux, will
> commit tomorrow.
>
> 2016-01-07  Jakub Jelinek  <jakub@redhat.com>
>
>         PR fortran/69128
>         * trans.h (OMPWS_SCALARIZER_BODY): Define.
>         (OMPWS_NOWAIT): Renumber.
>         * trans-stmt.c (gfc_trans_where_3): Only set OMPWS_SCALARIZER_WS
>         if OMPWS_SCALARIZER_BODY is not set already, and set also
>         OMPWS_SCALARIZER_BODY until the final loop creation.
>         * trans-expr.c (gfc_trans_assignment_1): Likewise.
>         * trans-openmp.c (gfc_trans_omp_workshare): Also clear
>         OMPWS_SCALARIZER_BODY.
>         * trans-array.c (gfc_trans_scalarized_loop_end): Don't create
>         OMP_FOR if OMPWS_SCALARIZER_BODY is set.
>
>         * gfortran.dg/gomp/pr69128.f90: New test.
>
> --- gcc/fortran/trans.h.jj      2016-01-07 18:38:20.274008188 +0100
> +++ gcc/fortran/trans.h 2016-01-07 18:42:25.187630832 +0100
> @@ -1039,7 +1039,9 @@ extern const char gfc_msg_wrong_return[]
>                                            construct is not workshared.  */
>  #define OMPWS_SCALARIZER_WS    4       /* Set if scalarizer should attempt
>                                            to create parallel loops.  */
> -#define OMPWS_NOWAIT           8       /* Use NOWAIT on OMP_FOR.  */
> +#define OMPWS_SCALARIZER_BODY  8       /* Set if handling body of potential
> +                                          parallel loop.  */
> +#define OMPWS_NOWAIT           16      /* Use NOWAIT on OMP_FOR.  */
>  extern int ompws_flags;
>
>  #endif /* GFC_TRANS_H */
> --- gcc/fortran/trans-stmt.c.jj 2016-01-07 18:38:20.269008257 +0100
> +++ gcc/fortran/trans-stmt.c    2016-01-07 18:42:25.186630846 +0100
> @@ -5057,10 +5057,15 @@ gfc_trans_where_3 (gfc_code * cblock, gf
>    gfc_loopinfo loop;
>    gfc_ss *edss = 0;
>    gfc_ss *esss = 0;
> +  bool maybe_workshare = false;
>
>    /* Allow the scalarizer to workshare simple where loops.  */
> -  if (ompws_flags & OMPWS_WORKSHARE_FLAG)
> -    ompws_flags |= OMPWS_SCALARIZER_WS;
> +  if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_BODY))
> +      == OMPWS_WORKSHARE_FLAG)
> +    {
> +      maybe_workshare = true;
> +      ompws_flags |= OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY;
> +    }
>
>    cond = cblock->expr1;
>    tdst = cblock->next->expr1;
> @@ -5160,6 +5165,8 @@ gfc_trans_where_3 (gfc_code * cblock, gf
>    gfc_add_expr_to_block (&body, tmp);
>    gfc_add_block_to_block (&body, &cse.post);
>
> +  if (maybe_workshare)
> +    ompws_flags &= ~OMPWS_SCALARIZER_BODY;
>    gfc_trans_scalarizing_loops (&loop, &body);
>    gfc_add_block_to_block (&block, &loop.pre);
>    gfc_add_block_to_block (&block, &loop.post);
> --- gcc/fortran/trans-openmp.c.jj       2016-01-07 18:38:20.279008119 +0100
> +++ gcc/fortran/trans-openmp.c  2016-01-07 18:42:25.188630818 +0100
> @@ -4297,7 +4297,7 @@ gfc_trans_omp_workshare (gfc_code *code,
>
>        /* By default, every gfc_code is a single unit of work.  */
>        ompws_flags |= OMPWS_CURR_SINGLEUNIT;
> -      ompws_flags &= ~OMPWS_SCALARIZER_WS;
> +      ompws_flags &= ~(OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY);
>
>        switch (code->op)
>         {
> --- gcc/fortran/trans-array.c.jj        2016-01-07 18:38:20.284008050 +0100
> +++ gcc/fortran/trans-array.c   2016-01-07 18:42:25.191630777 +0100
> @@ -3601,7 +3601,8 @@ gfc_trans_scalarized_loop_end (gfc_loopi
>    tree init;
>    tree incr;
>
> -  if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS))
> +  if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS
> +                     | OMPWS_SCALARIZER_BODY))
>        == (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS)
>        && n == loop->dimen - 1)
>      {
> --- gcc/fortran/trans-expr.c.jj 2016-01-07 18:38:20.289007981 +0100
> +++ gcc/fortran/trans-expr.c    2016-01-07 18:42:25.193630749 +0100
> @@ -9160,6 +9160,7 @@ gfc_trans_assignment_1 (gfc_expr * expr1
>    bool scalar_to_array;
>    tree string_length;
>    int n;
> +  bool maybe_workshare = false;
>
>    /* Assignment of the form lhs = rhs.  */
>    gfc_start_block (&block);
> @@ -9234,8 +9235,13 @@ gfc_trans_assignment_1 (gfc_expr * expr1
>         }
>
>        /* Allow the scalarizer to workshare array assignments.  */
> -      if ((ompws_flags & OMPWS_WORKSHARE_FLAG) && loop.temp_ss == NULL)
> -       ompws_flags |= OMPWS_SCALARIZER_WS;
> +      if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_BODY))
> +         == OMPWS_WORKSHARE_FLAG
> +         && loop.temp_ss == NULL)
> +       {
> +         maybe_workshare = true;
> +         ompws_flags |= OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY;
> +       }
>
>        /* Start the scalarized loop body.  */
>        gfc_start_scalarized_body (&loop, &body);
> @@ -9384,6 +9390,9 @@ gfc_trans_assignment_1 (gfc_expr * expr1
>             gfc_add_expr_to_block (&loop.code[expr1->rank - 1], tmp);
>         }
>
> +      if (maybe_workshare)
> +       ompws_flags &= ~OMPWS_SCALARIZER_BODY;
> +
>        /* Generate the copying loops.  */
>        gfc_trans_scalarizing_loops (&loop, &body);
>
> --- gcc/testsuite/gfortran.dg/gomp/pr69128.f90.jj       2016-01-07 18:58:59.044893885 +0100
> +++ gcc/testsuite/gfortran.dg/gomp/pr69128.f90  2016-01-07 18:58:51.000000000 +0100
> @@ -0,0 +1,23 @@
> +! PR fortran/69128
> +! { dg-do compile }
> +
> +program test
> +  implicit none
> +  interface
> +    subroutine use(b, c)
> +      real, allocatable :: b(:), c(:)
> +    end subroutine
> +  end interface
> +  real, allocatable :: a(:,:), b(:), c(:)
> +  integer :: dim1, dim2, i,j
> +  dim1=10000
> +  dim2=500
> +  allocate(a(dim1,dim2),b(dim1),c(dim1))
> +  call random_number(a)
> +
> +!$omp parallel workshare
> +  b(:) = maxval(a(:,:), dim=2)
> +  c(:) = sum(a(:,:), dim=2)
> +!$omp end parallel workshare
> +  call use(b, c)
> +end program
>
>         Jakub
diff mbox

Patch

--- gcc/fortran/trans.h.jj	2016-01-07 18:38:20.274008188 +0100
+++ gcc/fortran/trans.h	2016-01-07 18:42:25.187630832 +0100
@@ -1039,7 +1039,9 @@  extern const char gfc_msg_wrong_return[]
 					   construct is not workshared.  */
 #define OMPWS_SCALARIZER_WS	4	/* Set if scalarizer should attempt
 					   to create parallel loops.  */
-#define OMPWS_NOWAIT		8	/* Use NOWAIT on OMP_FOR.  */
+#define OMPWS_SCALARIZER_BODY	8	/* Set if handling body of potential
+					   parallel loop.  */
+#define OMPWS_NOWAIT		16	/* Use NOWAIT on OMP_FOR.  */
 extern int ompws_flags;
 
 #endif /* GFC_TRANS_H */
--- gcc/fortran/trans-stmt.c.jj	2016-01-07 18:38:20.269008257 +0100
+++ gcc/fortran/trans-stmt.c	2016-01-07 18:42:25.186630846 +0100
@@ -5057,10 +5057,15 @@  gfc_trans_where_3 (gfc_code * cblock, gf
   gfc_loopinfo loop;
   gfc_ss *edss = 0;
   gfc_ss *esss = 0;
+  bool maybe_workshare = false;
 
   /* Allow the scalarizer to workshare simple where loops.  */
-  if (ompws_flags & OMPWS_WORKSHARE_FLAG)
-    ompws_flags |= OMPWS_SCALARIZER_WS;
+  if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_BODY))
+      == OMPWS_WORKSHARE_FLAG)
+    {
+      maybe_workshare = true;
+      ompws_flags |= OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY;
+    }
 
   cond = cblock->expr1;
   tdst = cblock->next->expr1;
@@ -5160,6 +5165,8 @@  gfc_trans_where_3 (gfc_code * cblock, gf
   gfc_add_expr_to_block (&body, tmp);
   gfc_add_block_to_block (&body, &cse.post);
 
+  if (maybe_workshare)
+    ompws_flags &= ~OMPWS_SCALARIZER_BODY;
   gfc_trans_scalarizing_loops (&loop, &body);
   gfc_add_block_to_block (&block, &loop.pre);
   gfc_add_block_to_block (&block, &loop.post);
--- gcc/fortran/trans-openmp.c.jj	2016-01-07 18:38:20.279008119 +0100
+++ gcc/fortran/trans-openmp.c	2016-01-07 18:42:25.188630818 +0100
@@ -4297,7 +4297,7 @@  gfc_trans_omp_workshare (gfc_code *code,
 
       /* By default, every gfc_code is a single unit of work.  */
       ompws_flags |= OMPWS_CURR_SINGLEUNIT;
-      ompws_flags &= ~OMPWS_SCALARIZER_WS;
+      ompws_flags &= ~(OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY);
 
       switch (code->op)
 	{
--- gcc/fortran/trans-array.c.jj	2016-01-07 18:38:20.284008050 +0100
+++ gcc/fortran/trans-array.c	2016-01-07 18:42:25.191630777 +0100
@@ -3601,7 +3601,8 @@  gfc_trans_scalarized_loop_end (gfc_loopi
   tree init;
   tree incr;
 
-  if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS))
+  if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS
+		      | OMPWS_SCALARIZER_BODY))
       == (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_WS)
       && n == loop->dimen - 1)
     {
--- gcc/fortran/trans-expr.c.jj	2016-01-07 18:38:20.289007981 +0100
+++ gcc/fortran/trans-expr.c	2016-01-07 18:42:25.193630749 +0100
@@ -9160,6 +9160,7 @@  gfc_trans_assignment_1 (gfc_expr * expr1
   bool scalar_to_array;
   tree string_length;
   int n;
+  bool maybe_workshare = false;
 
   /* Assignment of the form lhs = rhs.  */
   gfc_start_block (&block);
@@ -9234,8 +9235,13 @@  gfc_trans_assignment_1 (gfc_expr * expr1
 	}
 
       /* Allow the scalarizer to workshare array assignments.  */
-      if ((ompws_flags & OMPWS_WORKSHARE_FLAG) && loop.temp_ss == NULL)
-	ompws_flags |= OMPWS_SCALARIZER_WS;
+      if ((ompws_flags & (OMPWS_WORKSHARE_FLAG | OMPWS_SCALARIZER_BODY))
+	  == OMPWS_WORKSHARE_FLAG
+	  && loop.temp_ss == NULL)
+	{
+	  maybe_workshare = true;
+	  ompws_flags |= OMPWS_SCALARIZER_WS | OMPWS_SCALARIZER_BODY;
+	}
 
       /* Start the scalarized loop body.  */
       gfc_start_scalarized_body (&loop, &body);
@@ -9384,6 +9390,9 @@  gfc_trans_assignment_1 (gfc_expr * expr1
 	    gfc_add_expr_to_block (&loop.code[expr1->rank - 1], tmp);
 	}
 
+      if (maybe_workshare)
+	ompws_flags &= ~OMPWS_SCALARIZER_BODY;
+
       /* Generate the copying loops.  */
       gfc_trans_scalarizing_loops (&loop, &body);
 
--- gcc/testsuite/gfortran.dg/gomp/pr69128.f90.jj	2016-01-07 18:58:59.044893885 +0100
+++ gcc/testsuite/gfortran.dg/gomp/pr69128.f90	2016-01-07 18:58:51.000000000 +0100
@@ -0,0 +1,23 @@ 
+! PR fortran/69128
+! { dg-do compile }
+
+program test
+  implicit none
+  interface
+    subroutine use(b, c)
+      real, allocatable :: b(:), c(:)
+    end subroutine
+  end interface
+  real, allocatable :: a(:,:), b(:), c(:)
+  integer :: dim1, dim2, i,j
+  dim1=10000
+  dim2=500
+  allocate(a(dim1,dim2),b(dim1),c(dim1))
+  call random_number(a)
+
+!$omp parallel workshare
+  b(:) = maxval(a(:,:), dim=2)
+  c(:) = sum(a(:,:), dim=2)
+!$omp end parallel workshare
+  call use(b, c)
+end program