diff mbox series

[fortran] Two tweaks to argument repacking

Message ID e8c57f4d-a197-d127-4d16-162ef9684ce7@netcologne.de
State New
Headers show
Series [fortran] Two tweaks to argument repacking | expand

Commit Message

Thomas Koenig June 2, 2019, 1:34 p.m. UTC
Hello world,

this patch adds two tweaks to the argument repacking.

First, when the size of an argument is known to be one, as in a(n1:n1),
we can directly pass a pointer - the stride may not be one, but it
does not matter.

Second, the case where the array passed is actually contiguous is
more likely in practice, so it should get the fast path. I have
done this by defining a new predictor and setting the estimated
likelyhood at 75%, which ensured a path without jumps when the
arguments passed to bar were contiguous:

module y
contains
   subroutine bar(a,b)
     real, dimension(:) :: a,b
     call foo(a,b,size(a))
   end subroutine bar
end module y

Test case is only for the first part - making one for the second
part would have been a bit too much.

Regression-tested. OK for trunk?

Regards

	Thomas

2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/90539
	* trans-expr.c (gfc_conv_subref_array_arg): If the size of the
	expression can be determined to be one, treat it as contiguous.
	Set likelyhood of presence of an actual argument according to
	PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous
	according to PRED_FORTRAN_CONTIGUOUS.

2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/90539
	* predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor.

Comments

Paul Richard Thomas June 2, 2019, 3:09 p.m. UTC | #1
Hi Thomas,

It looks good to me. OK for trunk.

Thanks

Paul

On Sun, 2 Jun 2019 at 14:34, Thomas Koenig <tkoenig@netcologne.de> wrote:
>
> Hello world,
>
> this patch adds two tweaks to the argument repacking.
>
> First, when the size of an argument is known to be one, as in a(n1:n1),
> we can directly pass a pointer - the stride may not be one, but it
> does not matter.
>
> Second, the case where the array passed is actually contiguous is
> more likely in practice, so it should get the fast path. I have
> done this by defining a new predictor and setting the estimated
> likelyhood at 75%, which ensured a path without jumps when the
> arguments passed to bar were contiguous:
>
> module y
> contains
>    subroutine bar(a,b)
>      real, dimension(:) :: a,b
>      call foo(a,b,size(a))
>    end subroutine bar
> end module y
>
> Test case is only for the first part - making one for the second
> part would have been a bit too much.
>
> Regression-tested. OK for trunk?
>
> Regards
>
>         Thomas
>
> 2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>
>
>         PR fortran/90539
>         * trans-expr.c (gfc_conv_subref_array_arg): If the size of the
>         expression can be determined to be one, treat it as contiguous.
>         Set likelyhood of presence of an actual argument according to
>         PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous
>         according to PRED_FORTRAN_CONTIGUOUS.
>
> 2019-06-02  Thomas Koenig  <tkoenig@gcc.gnu.org>
>
>         PR fortran/90539
>         * predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor.
diff mbox series

Patch

Index: fortran/trans-expr.c
===================================================================
--- fortran/trans-expr.c	(Revision 271751)
+++ fortran/trans-expr.c	(Arbeitskopie)
@@ -4922,16 +4922,36 @@  class_array_fcn:
 	  gfc_se cont_se, array_se;
 	  stmtblock_t if_block, else_block;
 	  tree if_stmt, else_stmt;
+	  mpz_t size;
+	  bool size_set;
 
 	  cont_var = gfc_create_var (boolean_type_node, "contiguous");
 
-	  /* cont_var = is_contiguous (expr); .  */
-	  gfc_init_se (&cont_se, parmse);
-	  gfc_conv_is_contiguous_expr (&cont_se, expr);
-	  gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
-	  gfc_add_modify (&se->pre, cont_var, cont_se.expr);
-	  gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+	  /* If the size is known to be one at compile-time, set
+	     cont_var to true unconditionally.  This may look
+	     inelegant, but we're only doing this during
+	     optimization, so the statements will be optimized away,
+	     and this saves complexity here.  */
 
+	  size_set = gfc_array_size (expr, &size);
+	  if (size_set && mpz_cmp_ui (size, 1) == 0)
+	    {
+	      gfc_add_modify (&se->pre, cont_var,
+			      build_one_cst (boolean_type_node));
+	    }
+	  else
+	    {
+	      /* cont_var = is_contiguous (expr); .  */
+	      gfc_init_se (&cont_se, parmse);
+	      gfc_conv_is_contiguous_expr (&cont_se, expr);
+	      gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
+	      gfc_add_modify (&se->pre, cont_var, cont_se.expr);
+	      gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
+	    }
+
+	  if (size_set)
+	    mpz_clear (size);
+
 	  /* arrayse->expr = descriptor of a.  */
 	  gfc_init_se (&array_se, se);
 	  gfc_conv_expr_descriptor (&array_se, expr);
@@ -4953,7 +4973,9 @@  class_array_fcn:
 
 	  /* And put the above into an if statement.  */
 	  pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node,
-				      cont_var, if_stmt, else_stmt);
+				       gfc_likely (cont_var,
+						   PRED_FORTRAN_CONTIGUOUS),
+				       if_stmt, else_stmt);
 	}
       else
 	{
@@ -4976,11 +4998,11 @@  class_array_fcn:
 	  gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
 	  else_stmt = gfc_finish_block (&else_block);
 
-	  tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var,
+	  tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
+				 gfc_likely (present_var,
+					     PRED_FORTRAN_ABSENT_DUMMY),
 				 pre_stmts, else_stmt);
 	  gfc_add_expr_to_block (&se->pre, tmp);
-
-
 	}
       else
 	gfc_add_expr_to_block (&se->pre, pre_stmts);
@@ -4995,9 +5017,16 @@  class_array_fcn:
 	  tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
 				 cont_var,
 				 build_zero_cst (boolean_type_node));
+	  tmp = gfc_unlikely (tmp, PRED_FORTRAN_CONTIGUOUS);
+
 	  if (pass_optional)
-	    post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
-					 boolean_type_node, present_var, tmp);
+	    {
+	      tree present_likely = gfc_likely (present_var,
+						PRED_FORTRAN_ABSENT_DUMMY);
+	      post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
+					   boolean_type_node, present_likely,
+					   tmp);
+	    }
 	  else
 	    post_cond = tmp;
 	}
Index: predict.def
===================================================================
--- predict.def	(Revision 271629)
+++ predict.def	(Arbeitskopie)
@@ -229,3 +229,10 @@  DEF_PREDICTOR (PRED_FORTRAN_ABSENT_DUMMY, "Fortran
    to be very likely.  */
 DEF_PREDICTOR (PRED_FORTRAN_LOOP_PREHEADER, "Fortran loop preheader", \
 	       HITRATE (99), 0)
+
+/* Fortran assumed size arrays can be non-contiguous, so they need
+   to be repacked.  */
+
+DEF_PREDICTOR (PRED_FORTRAN_CONTIGUOUS, "Fortran contiguous", \
+	       HITRATE (75), 0)
+