diff mbox series

[fortran] Also use size estimate for vector-matrix matmul

Message ID 01315155-a238-c1ff-c08a-b0319422f001@netcologne.de
State New
Headers show
Series [fortran] Also use size estimate for vector-matrix matmul | expand

Commit Message

Thomas Koenig March 19, 2021, 5:37 p.m. UTC
Hell world,

here is the patch I talked about earlier.  It passes regression testing.

OK for trunk?

Best regards

	Thomas

Add size check to vector-matrix matmul.

It turns out the library version is much faster for vector-matrix
multiplications for large sizes than what inlining can produce.
Use size checks for switching between this and inlining for
that case to.

gcc/fortran/ChangeLog:

	* frontend-passes.c (inline_limit_check): Add rank_a
	argument. If a is rank 1, set the second dimension to 1.
	(inline_matmul_assign): Pass rank_a argument to inline_limit_check.
	(call_external_blas): Likewise.

gcc/testsuite/ChangeLog:

	* gfortran.dg/inline_matmul_6.f90: Adjust count for _gfortran_matmul.

Comments

Jerry DeLisle March 19, 2021, 7:49 p.m. UTC | #1
Yes Ok for trunk.

Thanks much!

On 3/19/21 10:37 AM, Thomas Koenig via Fortran wrote:
> Hell world,
>
> here is the patch I talked about earlier.  It passes regression testing.
>
> OK for trunk?
>
> Best regards
>
>     Thomas
>
> Add size check to vector-matrix matmul.
>
> It turns out the library version is much faster for vector-matrix
> multiplications for large sizes than what inlining can produce.
> Use size checks for switching between this and inlining for
> that case to.
>
> gcc/fortran/ChangeLog:
>
>     * frontend-passes.c (inline_limit_check): Add rank_a
>     argument. If a is rank 1, set the second dimension to 1.
>     (inline_matmul_assign): Pass rank_a argument to inline_limit_check.
>     (call_external_blas): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>     * gfortran.dg/inline_matmul_6.f90: Adjust count for _gfortran_matmul.
Thomas Koenig March 20, 2021, 9:57 a.m. UTC | #2
Hi Jerry and Steve,

> Yes Ok for trunk.

Thanks for the heads-up and the review, committed as r11-7742.

Best regards

	Thomas
diff mbox series

Patch

diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c
index cfc47471cf1..7d3eae67632 100644
--- a/gcc/fortran/frontend-passes.c
+++ b/gcc/fortran/frontend-passes.c
@@ -3307,7 +3307,7 @@  get_operand (gfc_intrinsic_op op, gfc_expr *e1, gfc_expr *e2)
    removed by DCE. Only called for rank-two matrices A and B.  */
 
 static gfc_code *
-inline_limit_check (gfc_expr *a, gfc_expr *b, int limit)
+inline_limit_check (gfc_expr *a, gfc_expr *b, int limit, int rank_a)
 {
   gfc_expr *inline_limit;
   gfc_code *if_1, *if_2, *else_2;
@@ -3315,16 +3315,28 @@  inline_limit_check (gfc_expr *a, gfc_expr *b, int limit)
   gfc_typespec ts;
   gfc_expr *cond;
 
+  gcc_assert (rank_a == 1 || rank_a == 2);
+
   /* Calculation is done in real to avoid integer overflow.  */
 
   inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind,
 					&a->where);
   mpfr_set_si (inline_limit->value.real, limit, GFC_RND_MODE);
-  mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3,
+
+  /* Set the limit according to the rank.  */
+  mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, rank_a + 1,
 	       GFC_RND_MODE);
 
   a1 = get_array_inq_function (GFC_ISYM_SIZE, a, 1);
-  a2 = get_array_inq_function (GFC_ISYM_SIZE, a, 2);
+
+  /* For a_rank = 1, must use one as the size of a along the second
+     dimension as to avoid too much code duplication.  */
+
+  if (rank_a == 2)
+    a2 = get_array_inq_function (GFC_ISYM_SIZE, a, 2);
+  else
+    a2 = gfc_get_int_expr (gfc_index_integer_kind, &a->where, 1);
+
   b2 = get_array_inq_function (GFC_ISYM_SIZE, b, 2);
 
   gfc_clear_ts (&ts);
@@ -4243,11 +4255,13 @@  inline_matmul_assign (gfc_code **c, int *walk_subtrees,
   /* Take care of the inline flag.  If the limit check evaluates to a
      constant, dead code elimination will eliminate the unneeded branch.  */
 
-  if (flag_inline_matmul_limit > 0 && matrix_a->rank == 2
+  if (flag_inline_matmul_limit > 0
+      && (matrix_a->rank == 1 || matrix_a->rank == 2)
       && matrix_b->rank == 2)
     {
       if_limit = inline_limit_check (matrix_a, matrix_b,
-				     flag_inline_matmul_limit);
+				     flag_inline_matmul_limit,
+				     matrix_a->rank);
 
       /* Insert the original statement into the else branch.  */
       if_limit->block->block->next = co;
@@ -4757,7 +4771,7 @@  call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
     return 0;
 
   /* Generate the if statement and hang it into the tree.  */
-  if_limit = inline_limit_check (matrix_a, matrix_b, flag_blas_matmul_limit);
+  if_limit = inline_limit_check (matrix_a, matrix_b, flag_blas_matmul_limit, 2);
   co_next = co->next;
   (*current_code) = if_limit;
   co->next = NULL;
diff --git a/gcc/testsuite/gfortran.dg/inline_matmul_6.f90 b/gcc/testsuite/gfortran.dg/inline_matmul_6.f90
index 491a7215258..da717bda017 100644
--- a/gcc/testsuite/gfortran.dg/inline_matmul_6.f90
+++ b/gcc/testsuite/gfortran.dg/inline_matmul_6.f90
@@ -45,4 +45,4 @@  program main
   if (any(abs(c2 - (/39., -61., 75./)) > 1e-3)) STOP 2
 end program main
 
-! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } }
+! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "original" } }