diff mbox series

RISC-V: Refine the condition for add additional vars in RVV cost model

Message ID 20240328103138.297823-1-demin.han@starfivetech.com
State New
Headers show
Series RISC-V: Refine the condition for add additional vars in RVV cost model | expand

Commit Message

demin.han March 28, 2024, 10:31 a.m. UTC
The adjacent_dr_p is sufficient and unnecessary condition for contiguous access.
So unnecessary live-ranges are added and result in spill.

This patch uses MEMORY_ACCESS_TYPE as condition and constrains segment
load/store.

Tested on RV64 and no regression.

	PR target/114506

gcc/ChangeLog:

	* config/riscv/riscv-vector-costs.cc (non_contiguous_memory_access_p): Rename
	(need_additional_vector_vars_p): Rename and refine condition

gcc/testsuite/ChangeLog:

	* gcc.dg/vect/costmodel/riscv/rvv/pr114506.c: New test.

Signed-off-by: demin.han <demin.han@starfivetech.com>
---
 gcc/config/riscv/riscv-vector-costs.cc        | 25 ++++++++++++-------
 .../vect/costmodel/riscv/rvv/pr114506.c       | 23 +++++++++++++++++
 2 files changed, 39 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c

Comments

juzhe.zhong@rivai.ai March 28, 2024, 10:44 a.m. UTC | #1
Thanks a lot for trying to optimize the dynamic LMUL cost model.

The need_additional_vector_vars_p looks good to me.

But
-		= (*program_points_per_bb.get (bb)).length () - 1;
+		= (*program_points_per_bb.get (bb)).length ();
I wonder why you remove - 1?



juzhe.zhong@rivai.ai
 
From: demin.han
Date: 2024-03-28 18:31
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; jeffreyalaw; rdapp.gcc
Subject: [PATCH] RISC-V: Refine the condition for add additional vars in RVV cost model
The adjacent_dr_p is sufficient and unnecessary condition for contiguous access.
So unnecessary live-ranges are added and result in spill.
 
This patch uses MEMORY_ACCESS_TYPE as condition and constrains segment
load/store.
 
Tested on RV64 and no regression.
 
PR target/114506
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-costs.cc (non_contiguous_memory_access_p): Rename
(need_additional_vector_vars_p): Rename and refine condition
 
gcc/testsuite/ChangeLog:
 
* gcc.dg/vect/costmodel/riscv/rvv/pr114506.c: New test.
 
Signed-off-by: demin.han <demin.han@starfivetech.com>
---
gcc/config/riscv/riscv-vector-costs.cc        | 25 ++++++++++++-------
.../vect/costmodel/riscv/rvv/pr114506.c       | 23 +++++++++++++++++
2 files changed, 39 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
 
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index f462c272a6e..9f7fe936a29 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -563,14 +563,24 @@ get_store_value (gimple *stmt)
     return gimple_assign_rhs1 (stmt);
}
-/* Return true if it is non-contiguous load/store.  */
+/* Return true if addtional vector vars needed.  */
static bool
-non_contiguous_memory_access_p (stmt_vec_info stmt_info)
+need_additional_vector_vars_p (stmt_vec_info stmt_info)
{
   enum stmt_vec_info_type type
     = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-  return ((type == load_vec_info_type || type == store_vec_info_type)
-   && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info)));
+  if (type == load_vec_info_type || type == store_vec_info_type)
+    {
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+   && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+ return true;
+
+      machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
+      int lmul = riscv_get_v_regno_alignment (mode);
+      if (DR_GROUP_SIZE (stmt_info) * lmul > RVV_M8)
+ return true;
+    }
+  return false;
}
/* Return the LMUL of the current analysis.  */
@@ -739,10 +749,7 @@ update_local_live_ranges (
  stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
  enum stmt_vec_info_type type
    = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-   if (non_contiguous_memory_access_p (stmt_info)
-       /* LOAD_LANES/STORE_LANES doesn't need a perm indice.  */
-       && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
-    != VMAT_LOAD_STORE_LANES)
+   if (need_additional_vector_vars_p (stmt_info))
    {
      /* For non-adjacent load/store STMT, we will potentially
convert it into:
@@ -752,7 +759,7 @@ update_local_live_ranges (
We will be likely using one more vector variable.  */
      unsigned int max_point
- = (*program_points_per_bb.get (bb)).length () - 1;
+ = (*program_points_per_bb.get (bb)).length ();
      auto *live_ranges = live_ranges_per_bb.get (bb);
      bool existed_p = false;
      tree var = type == load_vec_info_type
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
new file mode 100644
index 00000000000..a88d24b2d2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */
+
+float a[32000], b[32000], c[32000], d[32000];
+float aa[256][256], bb[256][256], cc[256][256];
+
+void
+s2275 ()
+{
+  for (int i = 0; i < 256; i++)
+    {
+      for (int j = 0; j < 256; j++)
+ {
+   aa[j][i] = aa[j][i] + bb[j][i] * cc[j][i];
+ }
+      a[i] = b[i] + c[i] * d[i];
+    }
+}
+
+/* { dg-final { scan-assembler-times {e32,m8} 1 } } */
+/* { dg-final { scan-assembler-not {e32,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */
demin.han March 28, 2024, 11:06 a.m. UTC | #2
Hi,
the point starts from 1. the max_point should equal to length();

Should I prepare an individual patch for this?

From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: 2024年3月28日 18:45
To: Demin Han <demin.han@starfivetech.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>; pan2.li <pan2.li@intel.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>
Subject: Re: [PATCH] RISC-V: Refine the condition for add additional vars in RVV cost model

Thanks a lot for trying to optimize the dynamic LMUL cost model.

The need_additional_vector_vars_p looks good to me.


But

-              = (*program_points_per_bb.get (bb)).length () - 1;

+              = (*program_points_per_bb.get (bb)).length ();
I wonder why you remove - 1?
Jeff Law March 28, 2024, 2:37 p.m. UTC | #3
On 3/28/24 4:31 AM, demin.han wrote:
> The adjacent_dr_p is sufficient and unnecessary condition for contiguous access.
> So unnecessary live-ranges are added and result in spill.
> 
> This patch uses MEMORY_ACCESS_TYPE as condition and constrains segment
> load/store.
> 
> Tested on RV64 and no regression.
> 
> 	PR target/114506
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-vector-costs.cc (non_contiguous_memory_access_p): Rename
> 	(need_additional_vector_vars_p): Rename and refine condition
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/vect/costmodel/riscv/rvv/pr114506.c: New test.
Note I think this should defer to gcc-15.  It doesn't affect code 
correctness AFAICT and it's not a regression relative to gcc-13.

jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index f462c272a6e..9f7fe936a29 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -563,14 +563,24 @@  get_store_value (gimple *stmt)
     return gimple_assign_rhs1 (stmt);
 }
 
-/* Return true if it is non-contiguous load/store.  */
+/* Return true if addtional vector vars needed.  */
 static bool
-non_contiguous_memory_access_p (stmt_vec_info stmt_info)
+need_additional_vector_vars_p (stmt_vec_info stmt_info)
 {
   enum stmt_vec_info_type type
     = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-  return ((type == load_vec_info_type || type == store_vec_info_type)
-	  && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info)));
+  if (type == load_vec_info_type || type == store_vec_info_type)
+    {
+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+	  && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+	return true;
+
+      machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
+      int lmul = riscv_get_v_regno_alignment (mode);
+      if (DR_GROUP_SIZE (stmt_info) * lmul > RVV_M8)
+	return true;
+    }
+  return false;
 }
 
 /* Return the LMUL of the current analysis.  */
@@ -739,10 +749,7 @@  update_local_live_ranges (
 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
 	  enum stmt_vec_info_type type
 	    = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-	  if (non_contiguous_memory_access_p (stmt_info)
-	      /* LOAD_LANES/STORE_LANES doesn't need a perm indice.  */
-	      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
-		   != VMAT_LOAD_STORE_LANES)
+	  if (need_additional_vector_vars_p (stmt_info))
 	    {
 	      /* For non-adjacent load/store STMT, we will potentially
 		 convert it into:
@@ -752,7 +759,7 @@  update_local_live_ranges (
 
 		We will be likely using one more vector variable.  */
 	      unsigned int max_point
-		= (*program_points_per_bb.get (bb)).length () - 1;
+		= (*program_points_per_bb.get (bb)).length ();
 	      auto *live_ranges = live_ranges_per_bb.get (bb);
 	      bool existed_p = false;
 	      tree var = type == load_vec_info_type
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
new file mode 100644
index 00000000000..a88d24b2d2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */
+
+float a[32000], b[32000], c[32000], d[32000];
+float aa[256][256], bb[256][256], cc[256][256];
+
+void
+s2275 ()
+{
+  for (int i = 0; i < 256; i++)
+    {
+      for (int j = 0; j < 256; j++)
+	{
+	  aa[j][i] = aa[j][i] + bb[j][i] * cc[j][i];
+	}
+      a[i] = b[i] + c[i] * d[i];
+    }
+}
+
+/* { dg-final { scan-assembler-times {e32,m8} 1 } } */
+/* { dg-final { scan-assembler-not {e32,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */