diff mbox series

RISC-V: Fix error combine of pred_mov pattern

Message ID 20230808115709.380001-1-lehua.ding@rivai.ai
State New
Headers show
Series RISC-V: Fix error combine of pred_mov pattern | expand

Commit Message

Lehua Ding Aug. 8, 2023, 11:57 a.m. UTC
Hi,

This patch fix PR110943 which will produce some error code. This is because
the error combine of some pred_mov pattern. Consider this code:

```
#include <riscv_vector.h>

void foo9 (void *base, void *out, size_t vl)
{
    int64_t scalar = *(int64_t*)(base + 100);
    vint64m2_t v = __riscv_vmv_v_x_i64m2 (0, 1);
    *(vint64m2_t*)out = v;
}
```

RTL before combine pass:

```
(insn 11 10 12 2 (set (reg/v:RVVM2DI 134 [ v ])
        (if_then_else:RVVM2DI (unspec:RVVMF32BI [
                    (const_vector:RVVMF32BI repeat [
                            (const_int 1 [0x1])
                        ])
                    (const_int 1 [0x1])
                    (const_int 2 [0x2]) repeated x2
                    (const_int 0 [0])
                    (reg:SI 66 vl)
                    (reg:SI 67 vtype)
                ] UNSPEC_VPREDICATE)
            (const_vector:RVVM2DI repeat [
                    (const_int 0 [0])
                ])
            (unspec:RVVM2DI [
                    (reg:SI 0 zero)
                ] UNSPEC_VUNDEF))) "/app/example.c":6:20 1089 {pred_movrvvm2di})
(insn 14 13 0 2 (set (mem:RVVM2DI (reg/v/f:DI 136 [ out ]) [1 MEM[(vint64m2_t *)out_4(D)]+0 S[32, 32] A128])
        (reg/v:RVVM2DI 134 [ v ])) "/app/example.c":7:23 717 {*movrvvm2di_whole})
```

RTL after combine pass:
```
(insn 14 13 0 2 (set (mem:RVVM2DI (reg:DI 138) [1 MEM[(vint64m2_t *)out_4(D)]+0 S[32, 32] A128])
        (if_then_else:RVVM2DI (unspec:RVVMF32BI [
                    (const_vector:RVVMF32BI repeat [
                            (const_int 1 [0x1])
                        ])
                    (const_int 1 [0x1])
                    (const_int 2 [0x2]) repeated x2
                    (const_int 0 [0])
                    (reg:SI 66 vl)
                    (reg:SI 67 vtype)
                ] UNSPEC_VPREDICATE)
            (const_vector:RVVM2DI repeat [
                    (const_int 0 [0])
                ])
            (unspec:RVVM2DI [
                    (reg:SI 0 zero)
                ] UNSPEC_VUNDEF))) "/app/example.c":7:23 1089 {pred_movrvvm2di})
```

This combine change the semantics of insn 14. I refine the conditon of @pred_mov
pattern to a more restrict. It's Ok for trunk?

Best,
Lehua


	PR target/110943

gcc/ChangeLog:

	* config/riscv/riscv-vector-builtins.cc (function_expander::function_expander):
	  force_reg mem operand.
	* config/riscv/vector.md: Refine condition.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: Update.
	* gcc.target/riscv/rvv/base/pr110943.c: New test.

---
 gcc/config/riscv/riscv-vector-builtins.cc     |  8 ++++-
 gcc/config/riscv/vector.md                    |  5 +--
 .../gcc.target/riscv/rvv/base/pr110943.c      | 33 +++++++++++++++++++
 .../riscv/rvv/base/zvfhmin-intrinsic.c        | 10 +++---
 4 files changed, 48 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c

Comments

Jeff Law Aug. 8, 2023, 4:10 p.m. UTC | #1
On 8/8/23 05:57, Lehua Ding wrote:
> Hi,
> 
> This patch fix PR110943 which will produce some error code. This is because
> the error combine of some pred_mov pattern. Consider this code:
> 
> ```
> #include <riscv_vector.h>
> 
> void foo9 (void *base, void *out, size_t vl)
> {
>      int64_t scalar = *(int64_t*)(base + 100);
>      vint64m2_t v = __riscv_vmv_v_x_i64m2 (0, 1);
>      *(vint64m2_t*)out = v;
> }
> ```
> 
> RTL before combine pass:
> 
> ```
> (insn 11 10 12 2 (set (reg/v:RVVM2DI 134 [ v ])
>          (if_then_else:RVVM2DI (unspec:RVVMF32BI [
>                      (const_vector:RVVMF32BI repeat [
>                              (const_int 1 [0x1])
>                          ])
>                      (const_int 1 [0x1])
>                      (const_int 2 [0x2]) repeated x2
>                      (const_int 0 [0])
>                      (reg:SI 66 vl)
>                      (reg:SI 67 vtype)
>                  ] UNSPEC_VPREDICATE)
>              (const_vector:RVVM2DI repeat [
>                      (const_int 0 [0])
>                  ])
>              (unspec:RVVM2DI [
>                      (reg:SI 0 zero)
>                  ] UNSPEC_VUNDEF))) "/app/example.c":6:20 1089 {pred_movrvvm2di})
> (insn 14 13 0 2 (set (mem:RVVM2DI (reg/v/f:DI 136 [ out ]) [1 MEM[(vint64m2_t *)out_4(D)]+0 S[32, 32] A128])
>          (reg/v:RVVM2DI 134 [ v ])) "/app/example.c":7:23 717 {*movrvvm2di_whole})
> ```
> 
> RTL after combine pass:
> ```
> (insn 14 13 0 2 (set (mem:RVVM2DI (reg:DI 138) [1 MEM[(vint64m2_t *)out_4(D)]+0 S[32, 32] A128])
>          (if_then_else:RVVM2DI (unspec:RVVMF32BI [
>                      (const_vector:RVVMF32BI repeat [
>                              (const_int 1 [0x1])
>                          ])
>                      (const_int 1 [0x1])
>                      (const_int 2 [0x2]) repeated x2
>                      (const_int 0 [0])
>                      (reg:SI 66 vl)
>                      (reg:SI 67 vtype)
>                  ] UNSPEC_VPREDICATE)
>              (const_vector:RVVM2DI repeat [
>                      (const_int 0 [0])
>                  ])
>              (unspec:RVVM2DI [
>                      (reg:SI 0 zero)
>                  ] UNSPEC_VUNDEF))) "/app/example.c":7:23 1089 {pred_movrvvm2di})
> ```
> 
> This combine change the semantics of insn 14. I refine the conditon of @pred_mov
> pattern to a more restrict. It's Ok for trunk?
> 
> Best,
> Lehua
> 
> 
> 	PR target/110943
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/riscv-vector-builtins.cc (function_expander::function_expander):
> 	  force_reg mem operand.
> 	* config/riscv/vector.md: Refine condition.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: Update.
> 	* gcc.target/riscv/rvv/base/pr110943.c: New test.
So at a high level this doesn't look correct to me.

The pattern's operand 0 explicitly allows MEMs as do the constraints. 
So forcing the operand into a register just seems like it's papering 
over the real problem.

I wonder if we should just remove the memory destination from this 
pattern.  Ultimately isn't that case just trying to optimize a constant 
store into memory -- perhaps we just need a distinct pattern for that. 
We generally try to avoid that for movXX patterns, but this seems a bit 
different.


>   create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c
> 
> diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
> index 528dca7ae85..cd40fb2060f 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins.cc
> @@ -3471,7 +3471,13 @@ function_expander::function_expander (const function_instance &instance,
>       exp (exp_in), target (target_in), opno (0)
>   {
>     if (!function_returns_void_p ())
> -    create_output_operand (&m_ops[opno++], target, TYPE_MODE (TREE_TYPE (exp)));
> +    {
> +      if (target != NULL_RTX && MEM_P (target))
> +	/* Use force_reg to prevent illegal mem-to-mem pattern on -O0.  */
This comment doesn't make sense in conjuction with your earlier details. 
  In particular combine doesn't run at -O0, so your earlier comment that 
combine creates the problem seems inconsistent with the comment above.


> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index e56a2bf4bed..f0484b1162c 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -1509,8 +1509,9 @@
>            (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
>         (match_operand:V_VLS 3 "vector_move_operand"   "    m,     m,     m,    vr,    vr,    vr, viWc0, viWc0")
>         (match_operand:V_VLS 2 "vector_merge_operand"  "    0,    vu,    vu,    vu,    vu,     0,    vu,     0")))]
> -  "TARGET_VECTOR && (MEM_P (operands[0]) || MEM_P (operands[3])
> -   || CONST_VECTOR_P (operands[1]))"
> +  "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode) && MEM_P (operands[3])) ||
> +                     (MEM_P (operands[0]) && register_operand (operands[3], <MODE>mode)) ||
> +                     (register_operand (operands[0], <MODE>mode) && satisfies_constraint_Wc1 (operands[1])))"
Umm, wow.  I haven't thought deeply about this, but the complexity of 
that insn condition is a huge red flag that our operand predicates 
aren't correct for this pattern.

 From a formatting standpoint bring the wrapped operator down and 
indent.  ie

   (condition 1
    || condition 2
    || (condition 3
        && other test 4))


Jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index 528dca7ae85..cd40fb2060f 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3471,7 +3471,13 @@  function_expander::function_expander (const function_instance &instance,
     exp (exp_in), target (target_in), opno (0)
 {
   if (!function_returns_void_p ())
-    create_output_operand (&m_ops[opno++], target, TYPE_MODE (TREE_TYPE (exp)));
+    {
+      if (target != NULL_RTX && MEM_P (target))
+	/* Use force_reg to prevent illegal mem-to-mem pattern on -O0.  */
+	target = force_reg (GET_MODE (target), target);
+      create_output_operand (&m_ops[opno++], target,
+			     TYPE_MODE (TREE_TYPE (exp)));
+    }
 }
 
 /* Take argument ARGNO from EXP's argument list and convert it into
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index e56a2bf4bed..f0484b1162c 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1509,8 +1509,9 @@ 
          (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
       (match_operand:V_VLS 3 "vector_move_operand"   "    m,     m,     m,    vr,    vr,    vr, viWc0, viWc0")
       (match_operand:V_VLS 2 "vector_merge_operand"  "    0,    vu,    vu,    vu,    vu,     0,    vu,     0")))]
-  "TARGET_VECTOR && (MEM_P (operands[0]) || MEM_P (operands[3])
-   || CONST_VECTOR_P (operands[1]))"
+  "TARGET_VECTOR && ((register_operand (operands[0], <MODE>mode) && MEM_P (operands[3])) ||
+                     (MEM_P (operands[0]) && register_operand (operands[3], <MODE>mode)) ||
+                     (register_operand (operands[0], <MODE>mode) && satisfies_constraint_Wc1 (operands[1])))"
   "@
    vle<sew>.v\t%0,%3%p1
    vle<sew>.v\t%0,%3
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c
new file mode 100644
index 00000000000..8a6c00fc94d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c
@@ -0,0 +1,33 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <riscv_vector.h>
+
+/*
+** foo9:
+**   vsetivli\tzero,1,e64,m2,t[au],m[au]
+**   ...
+**   vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+**   ret
+*/
+void foo9 (void *base, void *out, size_t vl)
+{
+    int64_t scalar = *(int64_t*)(base + 100);
+    vint64m2_t v = __riscv_vmv_v_x_i64m2 (0, 1);
+    *(vint64m2_t*)out = v;
+}
+
+/*
+** foo10:
+**   vsetivli\tzero,1,e64,m2,t[au],m[au]
+**   ...
+**   vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+**   ret
+*/
+void foo10 (void *base, void *out, size_t vl)
+{
+    int64_t scalar = *(int64_t*)(base + 100);
+    vint64m2_t v = __riscv_vmv_s_x_i64m2 (0, 1);
+    *(vint64m2_t*)out = v;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
index fc70c54c7fc..500748b8e79 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c
@@ -194,12 +194,12 @@  vfloat16m4_t test_vget_v_f16m8_f16m4(vfloat16m8_t src, size_t index) {
 /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m8,\s*t[au],\s*m[au]} 5 } } */
 /* { dg-final { scan-assembler-times {vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+} 5 } } */
 /* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 5 } } */
-/* { dg-final { scan-assembler-times {vle16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 20 } } */
+/* { dg-final { scan-assembler-times {vle16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
 /* { dg-final { scan-assembler-times {vse16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */
-/* { dg-final { scan-assembler-times {vl1re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
-/* { dg-final { scan-assembler-times {vl2re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 4 } } */
-/* { dg-final { scan-assembler-times {vl8re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
-/* { dg-final { scan-assembler-times {vl4re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
+/* { dg-final { scan-assembler-times {vl1re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */
+/* { dg-final { scan-assembler-times {vl2re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
+/* { dg-final { scan-assembler-times {vl8re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 6 } } */
+/* { dg-final { scan-assembler-times {vl4re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 6 } } */
 /* { dg-final { scan-assembler-times {vs1r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
 /* { dg-final { scan-assembler-times {vs2r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */
 /* { dg-final { scan-assembler-times {vs4r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */