diff mbox series

[3/4] RISC-V: tune: Add setting for overlapping mem ops to tuning struct

Message ID 20240508051756.3999080-4-christoph.muellner@vrull.eu
State New
Headers show
Series RISC-V: Enhance unaligned/overlapping codegen | expand

Commit Message

Christoph Müllner May 8, 2024, 5:17 a.m. UTC
This patch adds the field overlap_op_by_pieces to the struct
riscv_tune_param, which is used by the TARGET_OVERLAP_OP_BY_PIECES_P()
hook. This hook is used by the by-pieces infrastructure to decide
if overlapping memory accesses should be emitted.

The new property is set to false in all tune structs except for
generic-ooo.

The changes in the expansion can be seen in the adjustments of the
cpymem test cases. These tests also reveal a limitation in the
RISC-V cpymem expansion that prevents this optimization as only
by-pieces cpymem expansions emit overlapping memory accesses.

gcc/ChangeLog:

	* config/riscv/riscv.cc (struct riscv_tune_param): New field
	overlap_op_by_pieces.
	(riscv_overlap_op_by_pieces): New function.
	(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
	riscv_overlap_op_by_pieces.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/cpymem-32-ooo.c: Adjust for overlapping
	access.
	* gcc.target/riscv/cpymem-64-ooo.c: Likewise.

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
 gcc/config/riscv/riscv.cc                     | 20 +++++++++++
 .../gcc.target/riscv/cpymem-32-ooo.c          | 20 +++++------
 .../gcc.target/riscv/cpymem-64-ooo.c          | 33 +++++++------------
 3 files changed, 40 insertions(+), 33 deletions(-)

Comments

Jeff Law May 10, 2024, 10:40 p.m. UTC | #1
On 5/7/24 11:17 PM, Christoph Müllner wrote:
> This patch adds the field overlap_op_by_pieces to the struct
> riscv_tune_param, which is used by the TARGET_OVERLAP_OP_BY_PIECES_P()
> hook. This hook is used by the by-pieces infrastructure to decide
> if overlapping memory accesses should be emitted.
> 
> The new property is set to false in all tune structs except for
> generic-ooo.
> 
> The changes in the expansion can be seen in the adjustments of the
> cpymem test cases. These tests also reveal a limitation in the
> RISC-V cpymem expansion that prevents this optimization as only
> by-pieces cpymem expansions emit overlapping memory accesses.
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv.cc (struct riscv_tune_param): New field
> 	overlap_op_by_pieces.
> 	(riscv_overlap_op_by_pieces): New function.
> 	(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> 	riscv_overlap_op_by_pieces.
I think these are redundant with the changes I installed earlier this 
week :-)

> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/cpymem-32-ooo.c: Adjust for overlapping
> 	access.
> 	* gcc.target/riscv/cpymem-64-ooo.c: Likewise.
OK once prereqs are in.

jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 44945d47fd6..793ec3155b9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -286,6 +286,7 @@  struct riscv_tune_param
   unsigned short memory_cost;
   unsigned short fmv_cost;
   bool slow_unaligned_access;
+  bool overlap_op_by_pieces;
   bool use_divmod_expansion;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
@@ -425,6 +426,7 @@  static const struct riscv_tune_param rocket_tune_info = {
   5,						/* memory_cost */
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
   false,					/* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
@@ -442,6 +444,7 @@  static const struct riscv_tune_param sifive_7_tune_info = {
   3,						/* memory_cost */
   8,						/* fmv_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
   false,					/* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
@@ -459,6 +462,7 @@  static const struct riscv_tune_param sifive_p400_tune_info = {
   3,						/* memory_cost */
   4,						/* fmv_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
   false,					/* use_divmod_expansion */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
@@ -476,6 +480,7 @@  static const struct riscv_tune_param sifive_p600_tune_info = {
   3,						/* memory_cost */
   4,						/* fmv_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
   false,					/* use_divmod_expansion */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
@@ -493,6 +498,7 @@  static const struct riscv_tune_param thead_c906_tune_info = {
   5,            /* memory_cost */
   8,		/* fmv_cost */
   false,            /* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
   false,	/* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
@@ -510,6 +516,7 @@  static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
   3,						/* memory_cost */
   3,						/* fmv_cost */
   true,						/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
   false,					/* use_divmod_expansion */
   RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
   NULL,						/* vector cost */
@@ -527,6 +534,7 @@  static const struct riscv_tune_param generic_ooo_tune_info = {
   4,						/* memory_cost */
   4,						/* fmv_cost */
   false,					/* slow_unaligned_access */
+  true,						/* overlap_op_by_pieces */
   false,					/* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
@@ -544,6 +552,7 @@  static const struct riscv_tune_param optimize_size_tune_info = {
   2,						/* memory_cost */
   8,						/* fmv_cost */
   false,					/* slow_unaligned_access */
+  false,					/* overlap_op_by_pieces */
   false,					/* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
@@ -9923,6 +9932,14 @@  riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P.  */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -11340,6 +11357,9 @@  riscv_get_raw_result_mode (int regno)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
index 946a773f77a..947d58c30fa 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
@@ -24,9 +24,8 @@  void copy_aligned_##N (void *to, void *from)		\
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_N(7)
@@ -36,9 +35,8 @@  COPY_N(7)
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(7)
@@ -66,11 +64,10 @@  COPY_ALIGNED_N(8)
 **    ...
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sw\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_N(11)
@@ -79,11 +76,10 @@  COPY_N(11)
 **copy_aligned_11:
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sw\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(11)
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
index 08a927b9483..108748690cd 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
@@ -24,9 +24,8 @@  void copy_aligned_##N (void *to, void *from)		\
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_N(7)
@@ -36,9 +35,8 @@  COPY_N(7)
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(7)
@@ -66,9 +64,8 @@  COPY_ALIGNED_N(8)
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_N(11)
@@ -77,11 +74,9 @@  COPY_N(11)
 **copy_aligned_11:
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(11)
@@ -90,11 +85,9 @@  COPY_ALIGNED_N(11)
 **copy_15:
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],14\([at][0-9]\)
-**    sb\t[at][0-9],14\([at][0-9]\)
+**    ld\t[at][0-9],7\([at][0-9]\)
+**    sd\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_N(15)
@@ -103,11 +96,9 @@  COPY_N(15)
 **copy_aligned_15:
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],14\([at][0-9]\)
-**    sb\t[at][0-9],14\([at][0-9]\)
+**    ld\t[at][0-9],7\([at][0-9]\)
+**    sd\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(15)