Message ID | 20231227015222.3393770-1-juzhe.zhong@rivai.ai |
---|---|
State | New |
Headers | show |
Series | RISC-V: Disallow transformation into VLMAX AVL for cond_len_xxx when length is in range [0, 31] | expand |
send V2 with test tweak:
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641447.html
juzhe.zhong@rivai.ai
From: Juzhe-Zhong
Date: 2023-12-27 09:52
To: gcc-patches
CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong
Subject: [PATCH] RISC-V: Disallow transformation into VLMAX AVL for cond_len_xxx when length is in range [0,31]
Notice we have this following situation:
vsetivli zero,4,e32,m1,ta,ma
vlseg4e32.v v4,(a5)
vlseg4e32.v v12,(a3)
vsetvli a5,zero,e32,m1,tu,ma ---> This is redundant since VLMAX AVL = 4 when it is fixed-vlmax
vfadd.vf v3,v13,fa0
vfadd.vf v1,v12,fa1
vfmul.vv v17,v3,v5
vfmul.vv v16,v1,v5
The rootcause is that we transform COND_LEN_xxx into VLMAX AVL when len == NUNITS blindly.
However, we don't need to transform all of them since when len is range of [0,31], we don't need to
consume scalar registers.
After this patch:
vsetivli zero,4,e32,m1,tu,ma
addi a4,a5,400
vlseg4e32.v v12,(a3)
vfadd.vf v3,v13,fa0
vfadd.vf v1,v12,fa1
vlseg4e32.v v4,(a4)
vfadd.vf v2,v14,fa1
vfmul.vv v17,v3,v5
vfmul.vv v16,v1,v5
Tested on both RV32 and RV64 no regression.
Ok for trunk ?
gcc/ChangeLog:
* config/riscv/riscv-v.cc (is_vlmax_len_p): New function.
(expand_load_store): Disallow transformation into VLMAX when len is in range of [0,31]
(expand_cond_len_op): Ditto.
(expand_gather_scatter): Ditto.
(expand_lanes_load_store): Ditto.
(expand_fold_extract_last): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/post-ra-avl.c: Adapt test.
* gcc.target/riscv/rvv/base/vf_avl-2.c: New test.
---
gcc/config/riscv/riscv-v.cc | 21 +++++++++++++------
.../riscv/rvv/autovec/post-ra-avl.c | 2 +-
.../gcc.target/riscv/rvv/base/vf_avl-2.c | 21 +++++++++++++++++++
3 files changed, 37 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-2.c
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 038ab084a37..0cc7af58da6 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -68,6 +68,16 @@ imm_avl_p (machine_mode mode)
: false;
}
+/* Return true if LEN is equal to NUNITS that outbounds range of [0, 31]. */
+static bool
+is_vlmax_len_p (machine_mode mode, rtx len)
+{
+ poly_int64 value;
+ return poly_int_rtx_p (len, &value)
+ && known_eq (value, GET_MODE_NUNITS (mode))
+ && !satisfies_constraint_K (len);
+}
+
/* Helper functions for insn_flags && insn_types */
/* Return true if caller need pass mask operand for insn pattern with
@@ -3776,7 +3786,7 @@ expand_load_store (rtx *ops, bool is_load)
rtx len = ops[3];
machine_mode mode = GET_MODE (ops[0]);
- if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
{
/* If the length operand is equal to VF, it is VLMAX load/store. */
if (is_load)
@@ -3842,8 +3852,7 @@ expand_cond_len_op (unsigned icode, insn_flags op_type, rtx *ops, rtx len)
machine_mode mask_mode = GET_MODE (mask);
poly_int64 value;
bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode));
- bool is_vlmax_len
- = poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode));
+ bool is_vlmax_len = is_vlmax_len_p (mode, len);
unsigned insn_flags = HAS_DEST_P | HAS_MASK_P | HAS_MERGE_P | op_type;
if (is_dummy_mask)
@@ -4012,7 +4021,7 @@ expand_gather_scatter (rtx *ops, bool is_load)
unsigned inner_offsize = GET_MODE_BITSIZE (inner_idx_mode);
poly_int64 nunits = GET_MODE_NUNITS (vec_mode);
poly_int64 value;
- bool is_vlmax = poly_int_rtx_p (len, &value) && known_eq (value, nunits);
+ bool is_vlmax = is_vlmax_len_p (vec_mode, len);
/* Extend the offset element to address width. */
if (inner_offsize < BITS_PER_WORD)
@@ -4199,7 +4208,7 @@ expand_lanes_load_store (rtx *ops, bool is_load)
rtx reg = is_load ? ops[0] : ops[1];
machine_mode mode = GET_MODE (ops[0]);
- if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
{
/* If the length operand is equal to VF, it is VLMAX load/store. */
if (is_load)
@@ -4252,7 +4261,7 @@ expand_fold_extract_last (rtx *ops)
rtx slide_vect = gen_reg_rtx (mode);
insn_code icode;
- if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
+ if (is_vlmax_len_p (mode, len))
len = NULL_RTX;
/* Calculate the number of 1-bit in mask. */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c
index f3d12bac7cd..c77b2d187fe 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c
@@ -13,4 +13,4 @@ int foo() {
return a;
}
-/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-2.c
new file mode 100644
index 00000000000..5a94a51f308
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-2.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax" } */
+
+float f[12][100];
+
+void bad1(float v1, float v2)
+{
+ for (int r = 0; r < 100; r += 4)
+ {
+ int i = r + 1;
+ f[0][r] = f[1][r] * (f[2][r] + v2) - f[1][i] * (f[2][i] + v1);
+ f[0][i] = f[1][r] * (f[2][i] + v1) + f[1][i] * (f[2][r] + v2);
+ f[0][r+2] = f[1][r+2] * (f[2][r+2] + v2) - f[1][i+2] * (f[2][i+2] + v1);
+ f[0][i+2] = f[1][r+2] * (f[2][i+2] + v1) + f[1][i+2] * (f[2][r+2] + v2);
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 038ab084a37..0cc7af58da6 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -68,6 +68,16 @@ imm_avl_p (machine_mode mode) : false; } +/* Return true if LEN is equal to NUNITS that outbounds range of [0, 31]. */ +static bool +is_vlmax_len_p (machine_mode mode, rtx len) +{ + poly_int64 value; + return poly_int_rtx_p (len, &value) + && known_eq (value, GET_MODE_NUNITS (mode)) + && !satisfies_constraint_K (len); +} + /* Helper functions for insn_flags && insn_types */ /* Return true if caller need pass mask operand for insn pattern with @@ -3776,7 +3786,7 @@ expand_load_store (rtx *ops, bool is_load) rtx len = ops[3]; machine_mode mode = GET_MODE (ops[0]); - if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode))) + if (is_vlmax_len_p (mode, len)) { /* If the length operand is equal to VF, it is VLMAX load/store. */ if (is_load) @@ -3842,8 +3852,7 @@ expand_cond_len_op (unsigned icode, insn_flags op_type, rtx *ops, rtx len) machine_mode mask_mode = GET_MODE (mask); poly_int64 value; bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode)); - bool is_vlmax_len - = poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)); + bool is_vlmax_len = is_vlmax_len_p (mode, len); unsigned insn_flags = HAS_DEST_P | HAS_MASK_P | HAS_MERGE_P | op_type; if (is_dummy_mask) @@ -4012,7 +4021,7 @@ expand_gather_scatter (rtx *ops, bool is_load) unsigned inner_offsize = GET_MODE_BITSIZE (inner_idx_mode); poly_int64 nunits = GET_MODE_NUNITS (vec_mode); poly_int64 value; - bool is_vlmax = poly_int_rtx_p (len, &value) && known_eq (value, nunits); + bool is_vlmax = is_vlmax_len_p (vec_mode, len); /* Extend the offset element to address width. */ if (inner_offsize < BITS_PER_WORD) @@ -4199,7 +4208,7 @@ expand_lanes_load_store (rtx *ops, bool is_load) rtx reg = is_load ? ops[0] : ops[1]; machine_mode mode = GET_MODE (ops[0]); - if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode))) + if (is_vlmax_len_p (mode, len)) { /* If the length operand is equal to VF, it is VLMAX load/store. */ if (is_load) @@ -4252,7 +4261,7 @@ expand_fold_extract_last (rtx *ops) rtx slide_vect = gen_reg_rtx (mode); insn_code icode; - if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode))) + if (is_vlmax_len_p (mode, len)) len = NULL_RTX; /* Calculate the number of 1-bit in mask. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c index f3d12bac7cd..c77b2d187fe 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/post-ra-avl.c @@ -13,4 +13,4 @@ int foo() { return a; } -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-2.c new file mode 100644 index 00000000000..5a94a51f308 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vf_avl-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax" } */ + +float f[12][100]; + +void bad1(float v1, float v2) +{ + for (int r = 0; r < 100; r += 4) + { + int i = r + 1; + f[0][r] = f[1][r] * (f[2][r] + v2) - f[1][i] * (f[2][i] + v1); + f[0][i] = f[1][r] * (f[2][i] + v1) + f[1][i] * (f[2][r] + v2); + f[0][r+2] = f[1][r+2] * (f[2][r+2] + v2) - f[1][i+2] * (f[2][i+2] + v1); + f[0][i+2] = f[1][r+2] * (f[2][i+2] + v1) + f[1][i+2] * (f[2][r+2] + v2); + } +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*1,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 2 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */