diff mbox series

[5/5] RISC-V: Support vmsxx.vx for autovec comparison of vec and imm

Message ID 20240301062711.207137-6-demin.han@starfivetech.com
State New
Headers show
Series RISC-V: Support vf and vx for autovec comparison of | expand

Commit Message

Demin Han March 1, 2024, 6:27 a.m. UTC
Similar to previous float change, vmsxx.vx is needed.
1. Only those which can't match vi should use vx.
2. DImode is processed by sew64_scalar_helper.

Tested on RV32 and RV64.

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
	(expand_vec_cmp): Ditto

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Update expect

Signed-off-by: demin.han <demin.han@starfivetech.com>
---
 gcc/config/riscv/riscv-v.cc                   | 33 ++++++++-----------
 .../riscv/rvv/autovec/cmp/vcond-1.c           | 14 ++++++--
 2 files changed, 26 insertions(+), 21 deletions(-)

Comments

juzhe.zhong@rivai.ai March 1, 2024, 7:50 a.m. UTC | #1
Hi, han. My comment for this patch is same as&nbsp;

[PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm

&nbsp;
&nbsp;
------------------&nbsp;Original&nbsp;------------------
From: &nbsp;"demin.han"<demin.han@starfivetech.com&gt;;
Date: &nbsp;Fri, Mar 1, 2024 02:27 PM
To: &nbsp;"gcc-patches"<gcc-patches@gcc.gnu.org&gt;; 
Cc: &nbsp;"juzhe.zhong"<juzhe.zhong@rivai.ai&gt;; "kito.cheng"<kito.cheng@gmail.com&gt;; "Li, Pan2"<pan2.li@intel.com&gt;; "jeffreyalaw"<jeffreyalaw@gmail.com&gt;; 
Subject: &nbsp;[PATCH 5/5] RISC-V: Support vmsxx.vx for autovec comparison of vec and imm

&nbsp;

Similar to previous float change, vmsxx.vx is needed.
1. Only those which can't match vi should use vx.
2. DImode is processed by sew64_scalar_helper.

Tested on RV32 and RV64.

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
	(expand_vec_cmp): Ditto

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Update expect

Signed-off-by: demin.han <demin.han@starfivetech.com&gt;
---
&nbsp;gcc/config/riscv/riscv-v.cc&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; | 33 ++++++++-----------
&nbsp;.../riscv/rvv/autovec/cmp/vcond-1.c&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; | 14 ++++++--
&nbsp;2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 2a188ac78e0..9b601a4a8ff 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2619,26 +2619,18 @@ get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
&nbsp;			: code_for_pred_cmp_scalar (mode);
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; return icode;
&nbsp;&nbsp;&nbsp;&nbsp; }
-&nbsp; switch (code)
+&nbsp; if (scalar_p)
&nbsp;&nbsp;&nbsp;&nbsp; {
-&nbsp;&nbsp;&nbsp; case EQ:
-&nbsp;&nbsp;&nbsp; case NE:
-&nbsp;&nbsp;&nbsp; case LE:
-&nbsp;&nbsp;&nbsp; case LEU:
-&nbsp;&nbsp;&nbsp; case GT:
-&nbsp;&nbsp;&nbsp; case GTU:
-&nbsp;&nbsp;&nbsp; case LTGT:
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; icode = code_for_pred_cmp (mode);
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; break;
-&nbsp;&nbsp;&nbsp; case LT:
-&nbsp;&nbsp;&nbsp; case LTU:
-&nbsp;&nbsp;&nbsp; case GE:
-&nbsp;&nbsp;&nbsp; case GEU:
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; icode = code_for_pred_ltge (mode);
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; break;
-&nbsp;&nbsp;&nbsp; default:
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; gcc_unreachable ();
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if (code == GE || code == GEU)
+	&nbsp; icode = code_for_pred_ge_scalar (mode);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; else
+	&nbsp; icode = code_for_pred_cmp_scalar (mode);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; return icode;
&nbsp;&nbsp;&nbsp;&nbsp; }
+&nbsp; if (code == LT || code == LTU || code == GE || code == GEU)
+&nbsp;&nbsp;&nbsp; icode = code_for_pred_ltge (mode);
+&nbsp; else
+&nbsp;&nbsp;&nbsp; icode = code_for_pred_cmp (mode);
&nbsp;&nbsp; return icode;
&nbsp;}
&nbsp;
@@ -2775,7 +2767,10 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
&nbsp;
&nbsp;&nbsp; rtx elt;
&nbsp;&nbsp; machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
-&nbsp; bool scalar_p = const_vec_duplicate_p (op1, &amp;elt) &amp;&amp; FLOAT_MODE_P (data_mode);
+&nbsp; bool scalar_p
+&nbsp;&nbsp;&nbsp; = const_vec_duplicate_p (op1, &amp;elt)
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; &amp;&amp; (FLOAT_MODE_P (data_mode)
+	&nbsp; || (scalar_mode != DImode &amp;&amp; !has_vi_variant_p (code, elt)));
&nbsp;&nbsp; if (scalar_p)
&nbsp;&nbsp;&nbsp;&nbsp; op1 = force_reg (scalar_mode, elt);
&nbsp;&nbsp; insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
index 7f6738518ee..e04c2a0cfbd 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -180,9 +180,19 @@ TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
&nbsp;/* { dg-final { scan-assembler-times {\tvmseq} 78 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmsne} 78 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmsgt} 82 } } */
-/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
-/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
+/* { dg-final { scan-assembler-times {\tvmslt} 50 } } */
+/* { dg-final { scan-assembler-times {\tvmsge} 26 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmseq\.vx} 16 } } */
+/* { dg-final { scan-assembler-times {\tvmsne\.vx} 16 } } */
+/* { dg-final { scan-assembler-times {\tvmsgt\.vx} 4 } } */
+/* { dg-final { scan-assembler-times {\tvmsgtu\.vx} 14 } } */
+/* { dg-final { scan-assembler-times {\tvmslt\.vx} 24 } } */
+/* { dg-final { scan-assembler-times {\tvmsltu\.vx} 0 } } */
+/* { dg-final { scan-assembler-times {\tvmsge\.vx} 0 } } */
+/* { dg-final { scan-assembler-times {\tvmsgeu\.vx} 0 } } */
+/* { dg-final { scan-assembler-times {\tvmsle\.vx} 4 } } */
+/* { dg-final { scan-assembler-times {\tvmsleu\.vx} 14 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */
Robin Dapp March 1, 2024, 12:07 p.m. UTC | #2
Hi Han,

in addition to what Juzhe mentioned (and that late-combine is going
to handle such cases) it should be noted that register pressure
should not be the only consideration here.  Many uarchs have a higher
latency for register-file-crossing moves.  At least without spilling
the vv variant is preferable, with spilling it very much depends.

Regards
 Robin
Andrew Waterman March 2, 2024, 12:24 a.m. UTC | #3
On Fri, Mar 1, 2024 at 4:07 AM Robin Dapp <rdapp.gcc@gmail.com> wrote:
>
> Hi Han,
>
> in addition to what Juzhe mentioned (and that late-combine is going
> to handle such cases) it should be noted that register pressure
> should not be the only consideration here.  Many uarchs have a higher
> latency for register-file-crossing moves.  At least without spilling
> the vv variant is preferable, with spilling it very much depends.

And of course there are uarches for which this is not the case (e.g.
post-commit decoupled vector unit), in which case the .vx and .vf
versions are preferable to the .vv form regardless of vector register
pressure, because they reduce vector regfile access energy (especially
if a splat can be avoided).  So it's a job for -mtune.

>
>
> Regards
>  Robin
>
diff mbox series

Patch

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 2a188ac78e0..9b601a4a8ff 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2619,26 +2619,18 @@  get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
 			: code_for_pred_cmp_scalar (mode);
       return icode;
     }
-  switch (code)
+  if (scalar_p)
     {
-    case EQ:
-    case NE:
-    case LE:
-    case LEU:
-    case GT:
-    case GTU:
-    case LTGT:
-      icode = code_for_pred_cmp (mode);
-      break;
-    case LT:
-    case LTU:
-    case GE:
-    case GEU:
-      icode = code_for_pred_ltge (mode);
-      break;
-    default:
-      gcc_unreachable ();
+      if (code == GE || code == GEU)
+	  icode = code_for_pred_ge_scalar (mode);
+      else
+	  icode = code_for_pred_cmp_scalar (mode);
+      return icode;
     }
+  if (code == LT || code == LTU || code == GE || code == GEU)
+    icode = code_for_pred_ltge (mode);
+  else
+    icode = code_for_pred_cmp (mode);
   return icode;
 }
 
@@ -2775,7 +2767,10 @@  expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
 
   rtx elt;
   machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
-  bool scalar_p = const_vec_duplicate_p (op1, &elt) && FLOAT_MODE_P (data_mode);
+  bool scalar_p
+    = const_vec_duplicate_p (op1, &elt)
+      && (FLOAT_MODE_P (data_mode)
+	  || (scalar_mode != DImode && !has_vi_variant_p (code, elt)));
   if (scalar_p)
     op1 = force_reg (scalar_mode, elt);
   insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
index 7f6738518ee..e04c2a0cfbd 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -180,9 +180,19 @@  TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
 /* { dg-final { scan-assembler-times {\tvmseq} 78 } } */
 /* { dg-final { scan-assembler-times {\tvmsne} 78 } } */
 /* { dg-final { scan-assembler-times {\tvmsgt} 82 } } */
-/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
-/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
+/* { dg-final { scan-assembler-times {\tvmslt} 50 } } */
+/* { dg-final { scan-assembler-times {\tvmsge} 26 } } */
 /* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmseq\.vx} 16 } } */
+/* { dg-final { scan-assembler-times {\tvmsne\.vx} 16 } } */
+/* { dg-final { scan-assembler-times {\tvmsgt\.vx} 4 } } */
+/* { dg-final { scan-assembler-times {\tvmsgtu\.vx} 14 } } */
+/* { dg-final { scan-assembler-times {\tvmslt\.vx} 24 } } */
+/* { dg-final { scan-assembler-times {\tvmsltu\.vx} 0 } } */
+/* { dg-final { scan-assembler-times {\tvmsge\.vx} 0 } } */
+/* { dg-final { scan-assembler-times {\tvmsgeu\.vx} 0 } } */
+/* { dg-final { scan-assembler-times {\tvmsle\.vx} 4 } } */
+/* { dg-final { scan-assembler-times {\tvmsleu\.vx} 14 } } */
 /* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
 /* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
 /* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */