Patchwork [fortran,5/5] PR 45648: Inline transpose part 2: Do dependency analysis in case of transpose optimization.

login
register
mail settings
Submitter Mikael Morin
Date Sept. 20, 2010, 10:21 p.m.
Message ID <20100920222130.27269.70079@gimli.local>
Download mbox | patch
Permalink /patch/65263/
State New
Headers show

Comments

Mikael Morin - Sept. 20, 2010, 10:21 p.m.
This disables the transpose optimization in case there is an alias between arguments.
For example:
!!!
call foo(a, transpose(a))
!!!
I couldn't see any better way than setting a force_tmp field in the gfc_se struct before entering gfc_conv_array_parameter for argument evaluation, and then using it to set the need_tmp flag in gfc_conv_expr_descriptor.

Ok for trunk ?
2010-09-20  Mikael Morin  <mikael@gcc.gnu.org>

	PR fortran/45648
	* trans.h (gfc_se): New field force_tmp. 
	* trans-expr.c (gfc_conv_procedure_call): Check for argument alias
	and set parmse.force_tmp if some alias is found. 
	* trans-array.c (gfc_conv_expr_descriptor): Force a temporary creation
	if se->force_tmp is set.

2010-09-20  Mikael Morin  <mikael@gcc.gnu.org>

	PR fortran/45648
	* gfortran.dg/inline_transpose_1.f90: Add function calls with aliasing
	arguments checks. Update temporary counts.
	* gfortran.dg/transpose_optimization_1.f90: New.
diff --git a/inline_transpose_1.f90 b/inline_transpose_1.f90
index 36198f8..a364842 100644
--- a/inline_transpose_1.f90
+++ b/inline_transpose_1.f90
@@ -168,6 +168,19 @@
   call toto2 (c, transpose (a))
   if (any (c /= 2 * q + 13)) call abort
 
+  call toto2 (e, transpose(e))           ! { dg-warning "Creating array temporary" }
+  if (any (e /= 4 * r + 13)) call abort
+
+  call toto2 (e, transpose(transpose(e)))           ! { dg-warning "Creating array temporary" }
+  if (any (e /= 4 * r + 14)) call abort
+
+
+  call toto3 (e, transpose(e))
+  if (any (e /= 4 * r + 14)) call abort
+
+
+  call titi (nx, e, transpose(e))           ! { dg-warning "Creating array temporary" }
+  if (any (e /= 4 * s + 17)) call abort
 
   contains
 
@@ -199,17 +212,26 @@
     x = y + 1
   end subroutine toto2
 
+  subroutine toto3 (x, y)
+    integer, dimension(:,:), intent(in) :: x, y
+  end subroutine toto3
+
 end
 
+subroutine titi (n, x, y)
+  integer :: n, x(n,n), y(n,n)
+  x = y + 3
+end subroutine titi
+
 ! No call to transpose
 ! { dg-final { scan-tree-dump-times "_gfortran_transpose" 0 "original" } }
 !
-! 21 temporaries
-! { dg-final { scan-tree-dump-times "struct\[^\\n\]*atmp" 21 "original" } }
+! 24 temporaries
+! { dg-final { scan-tree-dump-times "struct\[^\\n\]*atmp" 24 "original" } }
 !
 ! 2 tests optimized out
-! { dg-final { scan-tree-dump-times "_gfortran_abort" 35 "original" } }
-! { # Commented out as failing at -O0: dg-final { scan-tree-dump-times "_gfortran_abort" 33 "optimized" } }
+! { dg-final { scan-tree-dump-times "_gfortran_abort" 39 "original" } }
+! { # Commented out as failing at -O0: dg-final { scan-tree-dump-times "_gfortran_abort" 37 "optimized" } }
 !
 ! cleanup
 ! { dg-final { cleanup-tree-dump "original" } }
diff --git a/transpose_optimization_1.f90 b/transpose_optimization_1.f90
new file mode 100644
index 0000000..885ff7c
--- /dev/null
+++ b/transpose_optimization_1.f90
@@ -0,0 +1,106 @@
+! { dg-do compile }
+! { dg-options "-Warray-temporaries -fdump-tree-original" }
+!
+! PR fortran/45648
+! Non-copying descriptor transpose optimization (for function call args).
+!
+! Contributed by Richard Sandiford <richard@codesourcery.com>
+
+module foo
+  interface
+    subroutine ext1 (a, b)
+      real, intent (in), dimension (:, :) :: a, b
+    end subroutine ext1
+    subroutine ext2 (a, b)
+      real, intent (in), dimension (:, :) :: a
+      real, intent (out), dimension (:, :) :: b
+    end subroutine ext2
+    subroutine ext3 (a, b)
+      real, dimension (:, :) :: a, b
+    end subroutine ext3
+  end interface
+contains
+  ! No temporary needed here.
+  subroutine test1 (n, a, b, c)
+    integer :: n
+    real, dimension (n, n) :: a, b, c
+    a = matmul (transpose (b), c)
+  end subroutine test1
+
+  ! No temporary either, as we know the arguments to matmul are intent(in)
+  subroutine test2 (n, a, b)
+    integer :: n
+    real, dimension (n, n) :: a, b
+    a = matmul (transpose (b), b)
+  end subroutine test2
+
+  ! No temporary needed.
+  subroutine test3 (n, a, b, c)
+    integer :: n
+    real, dimension (n, n) :: a, c
+    real, dimension (n+4, n+4) :: b
+    a = matmul (transpose (b (2:n+1, 3:n+2)), c)
+  end subroutine test3
+
+  ! A temporary is needed for the result of either the transpose or matmul.
+  subroutine test4 (n, a, b)
+    integer :: n
+    real, dimension (n, n) :: a, b
+    a = matmul (transpose (a), b)       ! { dg-warning "Creating array temporary" }
+  end subroutine test4
+
+  ! The temporary is needed here since the second argument to imp1
+  ! has unknown intent.
+  subroutine test5 (n, a)
+    integer :: n
+    real, dimension (n, n) :: a
+    call imp1 (transpose (a), a)        ! { dg-warning "Creating array temporary" }
+  end subroutine test5
+
+  ! No temporaries are needed here; imp1 can't modify either argument.
+  ! We have to pack the arguments, however. 
+  subroutine test6 (n, a, b)
+    integer :: n
+    real, dimension (n, n) :: a, b
+    call imp1 (transpose (a), transpose (b))    ! { dg-warning "Creating array temporary" }
+  end subroutine test6
+
+  ! No temporaries are needed here; imp1 can't modify either argument.
+  ! We don't have to pack the arguments. 
+  subroutine test6_bis (n, a, b)
+    integer :: n
+    real, dimension (n, n) :: a, b
+    call ext3 (transpose (a), transpose (b))
+  end subroutine test6_bis
+
+  ! No temporary is neede here; the second argument is intent(in).
+  subroutine test7 (n, a)
+    integer :: n
+    real, dimension (n, n) :: a
+    call ext1 (transpose (a), a)
+  end subroutine test7
+
+  ! The temporary is needed here though.
+  subroutine test8 (n, a)
+    integer :: n
+    real, dimension (n, n) :: a
+    call ext2 (transpose (a), a)        ! { dg-warning "Creating array temporary" } 
+  end subroutine test8
+
+  ! Silly, but we don't need any temporaries here.
+  subroutine test9 (n, a)
+    integer :: n
+    real, dimension (n, n) :: a
+    call ext1 (transpose (transpose (a)), a)
+  end subroutine test9
+
+  ! The outer transpose needs a temporary; the inner one doesn't.
+  subroutine test10 (n, a)
+    integer :: n
+    real, dimension (n, n) :: a
+    call ext2 (transpose (transpose (a)), a)    ! { dg-warning "Creating array temporary" }
+  end subroutine test10
+end module foo
+
+! { dg-final { scan-tree-dump-times "struct\[^\\n\]*atmp" 4 "original" } }
+! { dg-final { cleanup-tree-dump "original" } }
Mikael Morin - Sept. 21, 2010, 4:07 p.m.
On Tuesday 21 September 2010 16:05:33 Paul Richard Thomas wrote:
> Mikael,
> 
> The complete patch and the testcase are OK for trunk.  Please commit
> the whole lot in one go.
Will do.
Thanks for the reviews.

Mikael

Patch

diff --git a/trans-array.c b/trans-array.c
index 947ed4b..1bb4429 100644
--- a/trans-array.c
+++ b/trans-array.c
@@ -5136,7 +5136,6 @@  get_array_charlen (gfc_expr *expr, gfc_se *se)
 }
 
 
-
 /* Convert an array for passing as an actual argument.  Expressions and
    vector subscripts are evaluated and stored in a temporary, which is then
    passed.  For whole arrays the descriptor is passed.  For array sections
@@ -5158,7 +5157,13 @@  get_array_charlen (gfc_expr *expr, gfc_se *se)
 	 EXPR is the right-hand side of a pointer assignment and
 	 se->expr is the descriptor for the previously-evaluated
 	 left-hand side.  The function creates an assignment from
-	 EXPR to se->expr.  */
+	 EXPR to se->expr.  
+
+
+   The se->force_tmp flag disables the non-copying descriptor optimization
+   that is used for transpose. It may be used in cases where there is an
+   alias between the transpose argument and another argument in the same
+   function call.  */
 
 void
 gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss)
@@ -5198,6 +5203,9 @@  gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss)
       need_tmp = gfc_ref_needs_temporary_p (expr->ref)
 			&& !subref_array_target;
 
+      if (se->force_tmp)
+	need_tmp = 1;
+
       if (need_tmp)
 	full = 0;
       else if (GFC_ARRAY_TYPE_P (TREE_TYPE (desc)))
@@ -5327,6 +5335,11 @@  gfc_conv_expr_descriptor (gfc_se * se, gfc_expr * expr, gfc_ss * ss)
       break;
     }
 
+  /* If we are creating a temporary, we don't need to bother about aliases
+     anymore.  */
+  if (need_tmp)
+    se->force_tmp = 0;
+
   gfc_init_loopinfo (&loop);
 
   /* Associate the SS with the loop.  */
diff --git a/trans-expr.c b/trans-expr.c
index 8d4295f..abeaa36 100644
--- a/trans-expr.c
+++ b/trans-expr.c
@@ -2770,7 +2770,7 @@  conv_isocbinding_procedure (gfc_se * se, gfc_symbol * sym,
 
 int
 gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
-			 gfc_actual_arglist * arg, gfc_expr * expr,
+			 gfc_actual_arglist * args, gfc_expr * expr,
 			 VEC(tree,gc) *append_args)
 {
   gfc_interface_mapping mapping;
@@ -2789,6 +2789,7 @@  gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
   VEC(tree,gc) *stringargs;
   tree result = NULL;
   gfc_formal_arglist *formal;
+  gfc_actual_arglist *arg;
   int has_alternate_specifier = 0;
   bool need_interface_mapping;
   bool callee_alloc;
@@ -2809,7 +2810,7 @@  gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
   gfc_clear_ts (&ts);
 
   if (sym->from_intmod == INTMOD_ISO_C_BINDING
-      && conv_isocbinding_procedure (se, sym, arg))
+      && conv_isocbinding_procedure (se, sym, args))
     return 0;
 
   gfc_is_proc_ptr_comp (expr, &comp);
@@ -2859,7 +2860,8 @@  gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
     }
 
   /* Evaluate the arguments.  */
-  for (; arg != NULL; arg = arg->next, formal = formal ? formal->next : NULL)
+  for (arg = args; arg != NULL;
+       arg = arg->next, formal = formal ? formal->next : NULL)
     {
       e = arg->expr;
       fsym = formal ? formal->sym : NULL;
@@ -3040,6 +3042,24 @@  gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
 	      else
 		f = f || !sym->attr.always_explicit;
 
+	      /* If the argument is a function call that may not create
+		 a temporary for the result, we have to check that we
+		 can do it, i.e. that there is no alias between this 
+		 argument and another one.  */
+	      if (gfc_get_noncopying_intrinsic_argument (e) != NULL)
+		{
+		  sym_intent intent;
+
+		  if (fsym != NULL)
+		    intent = fsym->attr.intent;
+		  else
+		    intent = INTENT_UNKNOWN;
+
+		  if (gfc_check_fncall_dependency (e, intent, sym, args,
+						   NOT_ELEMENTAL))
+		    parmse.force_tmp = 1;
+		}
+
 	      if (e->expr_type == EXPR_VARIABLE
 		    && is_subref_array (e))
 		/* The actual argument is a component reference to an
diff --git a/trans.h b/trans.h
index acdd3e3..a883cf5 100644
--- a/trans.h
+++ b/trans.h
@@ -81,6 +81,11 @@  typedef struct gfc_se
   /* If set, gfc_conv_procedure_call does not put byref calls into se->pre.  */
   unsigned no_function_call:1;
 
+  /* If set, we will force the creation of a temporary. Useful to disable
+     non-copying procedure argument passing optimizations, when some function
+     args alias.  */
+  unsigned force_tmp:1;
+
   /* Scalarization parameters.  */
   struct gfc_se *parent;
   struct gfc_ss *ss;