diff mbox series

[3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm

Message ID 20240301062711.207137-4-demin.han@starfivetech.com
State New
Headers show
Series RISC-V: Support vf and vx for autovec comparison of | expand

Commit Message

Demin Han March 1, 2024, 6:27 a.m. UTC
Currently, following instructions generated in autovector:
    flw
    vsetvli
    vfmv.v.f
    ...
    vmfxx.vv
Two issues:
  1. Additional vsetvl and vfmv instructions
  2. Occupy one vector register and may results in smaller lmul

We expect:
    flw
    ...
    vmfxx.vf

Tested on RV32 and RV64

gcc/ChangeLog:

	* config/riscv/autovec.md: Accept imm
	* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
	(expand_vec_cmp): Ditto
	* config/riscv/riscv.cc (riscv_const_insns): Exclude float mode

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Add new tests

Signed-off-by: demin.han <demin.han@starfivetech.com>
---
 gcc/config/riscv/autovec.md                   |  2 +-
 gcc/config/riscv/riscv-v.cc                   | 23 +++++++++----
 gcc/config/riscv/riscv.cc                     |  2 +-
 .../riscv/rvv/autovec/cmp/vcond-1.c           | 34 +++++++++++++++++++
 4 files changed, 52 insertions(+), 9 deletions(-)

Comments

juzhe.zhong@rivai.ai March 1, 2024, 7:48 a.m. UTC | #1
Hi, han. I understand you are trying to support optimize vector-splat_vector into vector-scalar in "expand" stage, that is,


vv -&gt; vx or vv -&gt; vf.


It's a known issue that we know for a long time.


This patch is trying to transform vv-&gt;vf when the splat vector is duplicate from a constant (by recognize it is a CONST_VECTOR in expand stage),
but can't transform vv-&gt;vf when splat vector is duplicate from a register.&nbsp;


For example, like a[i] = b[i] &gt; x ? c[i] : d[i], the x is a register, this case can not be optimized with your patch.


Actually, we have a solution to do all possible transformation (including the case I mentioned above) from vv to vx or vf by late-combine PASS which
is contributed by ARM Richard Sandiford:&nbsp;https://patchwork.ozlabs.org/project/gcc/patch/mptr0ljn9eh.fsf@arm.com/
You can try to apply this patch and experiment it locally yourself.


And I believe it will be landed in GCC-15. So I don't think we need this patch to do the optimization.


Thanks.
&nbsp;
------------------&nbsp;Original&nbsp;------------------
From: &nbsp;"demin.han"<demin.han@starfivetech.com&gt;;
Date: &nbsp;Fri, Mar 1, 2024 02:27 PM
To: &nbsp;"gcc-patches"<gcc-patches@gcc.gnu.org&gt;; 
Cc: &nbsp;"juzhe.zhong"<juzhe.zhong@rivai.ai&gt;; "kito.cheng"<kito.cheng@gmail.com&gt;; "Li, Pan2"<pan2.li@intel.com&gt;; "jeffreyalaw"<jeffreyalaw@gmail.com&gt;; 
Subject: &nbsp;[PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm

&nbsp;

Currently, following instructions generated in autovector:
&nbsp;&nbsp;&nbsp; flw
&nbsp;&nbsp;&nbsp; vsetvli
&nbsp;&nbsp;&nbsp; vfmv.v.f
&nbsp;&nbsp;&nbsp; ...
&nbsp;&nbsp;&nbsp; vmfxx.vv
Two issues:
&nbsp; 1. Additional vsetvl and vfmv instructions
&nbsp; 2. Occupy one vector register and may results in smaller lmul

We expect:
&nbsp;&nbsp;&nbsp; flw
&nbsp;&nbsp;&nbsp; ...
&nbsp;&nbsp;&nbsp; vmfxx.vf

Tested on RV32 and RV64

gcc/ChangeLog:

	* config/riscv/autovec.md: Accept imm
	* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
	(expand_vec_cmp): Ditto
	* config/riscv/riscv.cc (riscv_const_insns): Exclude float mode

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Add new tests

Signed-off-by: demin.han <demin.han@starfivetech.com&gt;
---
&nbsp;gcc/config/riscv/autovec.md&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; |&nbsp; 2 +-
&nbsp;gcc/config/riscv/riscv-v.cc&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; | 23 +++++++++----
&nbsp;gcc/config/riscv/riscv.cc&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; |&nbsp; 2 +-
&nbsp;.../riscv/rvv/autovec/cmp/vcond-1.c&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; | 34 +++++++++++++++++++
&nbsp;4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 3b32369f68c..6cfb0800c45 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -690,7 +690,7 @@ (define_expand "vec_cmp<mode&gt;<vm&gt;"
&nbsp;&nbsp; [(set (match_operand:<VM&gt; 0 "register_operand")
&nbsp;	(match_operator:<VM&gt; 1 "comparison_operator"
&nbsp;	&nbsp; [(match_operand:V_VLSF 2 "register_operand")
-	&nbsp;&nbsp; (match_operand:V_VLSF 3 "register_operand")]))]
+	&nbsp;&nbsp; (match_operand:V_VLSF 3 "nonmemory_operand")]))]
&nbsp;&nbsp; "TARGET_VECTOR"
&nbsp;&nbsp; {
&nbsp;&nbsp;&nbsp;&nbsp; riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 14e75b9a117..2a188ac78e0 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2610,9 +2610,15 @@ expand_vec_init (rtx target, rtx vals)
&nbsp;/* Get insn code for corresponding comparison.&nbsp; */
&nbsp;
&nbsp;static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
&nbsp;{
&nbsp;&nbsp; insn_code icode;
+&nbsp; if (FLOAT_MODE_P (mode))
+&nbsp;&nbsp;&nbsp; {
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; icode = !scalar_p ? code_for_pred_cmp (mode)
+			: code_for_pred_cmp_scalar (mode);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; return icode;
+&nbsp;&nbsp;&nbsp; }
&nbsp;&nbsp; switch (code)
&nbsp;&nbsp;&nbsp;&nbsp; {
&nbsp;&nbsp;&nbsp;&nbsp; case EQ:
@@ -2628,10 +2634,7 @@ get_cmp_insn_code (rtx_code code, machine_mode mode)
&nbsp;&nbsp;&nbsp;&nbsp; case LTU:
&nbsp;&nbsp;&nbsp;&nbsp; case GE:
&nbsp;&nbsp;&nbsp;&nbsp; case GEU:
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if (FLOAT_MODE_P (mode))
-	icode = code_for_pred_cmp (mode);
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; else
-	icode = code_for_pred_ltge (mode);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; icode = code_for_pred_ltge (mode);
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; break;
&nbsp;&nbsp;&nbsp;&nbsp; default:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; gcc_unreachable ();
@@ -2757,7 +2760,6 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
&nbsp;{
&nbsp;&nbsp; machine_mode mask_mode = GET_MODE (target);
&nbsp;&nbsp; machine_mode data_mode = GET_MODE (op0);
-&nbsp; insn_code icode = get_cmp_insn_code (code, data_mode);
&nbsp;
&nbsp;&nbsp; if (code == LTGT)
&nbsp;&nbsp;&nbsp;&nbsp; {
@@ -2765,12 +2767,19 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; rtx gt = gen_reg_rtx (mask_mode);
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; icode = code_for_pred (IOR, mask_mode);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; insn_code icode = code_for_pred (IOR, mask_mode);
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; rtx ops[] = {target, lt, gt};
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; return;
&nbsp;&nbsp;&nbsp;&nbsp; }
&nbsp;
+&nbsp; rtx elt;
+&nbsp; machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
+&nbsp; bool scalar_p = const_vec_duplicate_p (op1, &amp;elt) &amp;&amp; FLOAT_MODE_P (data_mode);
+&nbsp; if (scalar_p)
+&nbsp;&nbsp;&nbsp; op1 = force_reg (scalar_mode, elt);
+&nbsp; insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
+
&nbsp;&nbsp; rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
&nbsp;&nbsp; if (!mask &amp;&amp; !maskoff)
&nbsp;&nbsp;&nbsp;&nbsp; {
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..1ffe4865c19 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1760,7 +1760,7 @@ riscv_const_insns (rtx x)
&nbsp;		&nbsp;&nbsp; register vec_duplicate into vmv.v.x.&nbsp; */
&nbsp;		scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
&nbsp;		if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
-		&nbsp;&nbsp;&nbsp; &amp;&amp; !immediate_operand (elt, Pmode))
+		&nbsp;&nbsp;&nbsp; &amp;&amp; !FLOAT_MODE_P (smode) &amp;&amp; !immediate_operand (elt, Pmode))
&nbsp;		&nbsp; return 0;
&nbsp;		/* Constants from -16 to 15 can be loaded with vmv.v.i.
&nbsp;		&nbsp;&nbsp; The Wc0, Wc1 constraints are already covered by the
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
index 99a230d1c8a..7f6738518ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -141,6 +141,34 @@
&nbsp;TEST_VAR_ALL (DEF_VCOND_VAR)
&nbsp;TEST_IMM_ALL (DEF_VCOND_IMM)
&nbsp;
+#define TEST_COND_IMM_FLOAT(T, COND, IMM, SUFFIX)			\
+&nbsp; T (float, float, COND, IMM, SUFFIX##_float_float)			\
+&nbsp; T (double, double, COND, IMM, SUFFIX##_double_double)
+
+#define TEST_IMM_FLOAT_ALL(T)						\
+&nbsp; TEST_COND_IMM_FLOAT (T, &gt;, 0.0, _gt)					\
+&nbsp; TEST_COND_IMM_FLOAT (T, <, 0.0, _lt)					\
+&nbsp; TEST_COND_IMM_FLOAT (T, &gt;=, 0.0, _ge)					\
+&nbsp; TEST_COND_IMM_FLOAT (T, <=, 0.0, _le)					\
+&nbsp; TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq)					\
+&nbsp; TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne)					\
+									\
+&nbsp; TEST_COND_IMM_FLOAT (T, &gt;, 1.0, _gt1)					\
+&nbsp; TEST_COND_IMM_FLOAT (T, <, 1.0, _lt1)					\
+&nbsp; TEST_COND_IMM_FLOAT (T, &gt;=, 1.0, _ge1)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, <=, 1.0, _le1)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, ==, 1.0, _eq1)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, !=, 1.0, _ne1)				\
+									\
+&nbsp; TEST_COND_IMM_FLOAT (T, &gt;, -1.0, _gt2)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, <, -1.0, _lt2)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, &gt;=, -1.0, _ge2)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, <=, -1.0, _le2)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, ==, -1.0, _eq2)				\
+&nbsp; TEST_COND_IMM_FLOAT (T, !=, -1.0, _ne2)
+
+TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
+
&nbsp;/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
@@ -155,3 +183,9 @@ TEST_IMM_ALL (DEF_VCOND_IMM)
&nbsp;/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
&nbsp;/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfle.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfeq.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfne.vf} 6 } } */
Demin Han March 1, 2024, 8:26 a.m. UTC | #2
Hi juzhe,

Yes, for comparison between vector and scalar variable, this patch is not work, because the scalar is duplicated in loop vectorize pass.
I have not found idea for this situation, so solve vector-imm comparison first.
Thanks for remind this, I will try that patch.

Thanks.

From: 钟居哲 <juzhe.zhong@rivai.ai>
Sent: 2024年3月1日 15:49
To: Demin Han <demin.han@starfivetech.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>; Li, Pan2 <pan2.li@intel.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; richard.sandiford <richard.sandiford@arm.com>
Subject: Re:[PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm

Hi, han. I understand you are trying to support optimize vector-splat_vector into vector-scalar in "expand" stage, that is,

vv -> vx or vv -> vf.

It's a known issue that we know for a long time.

This patch is trying to transform vv->vf when the splat vector is duplicate from a constant (by recognize it is a CONST_VECTOR in expand stage),
but can't transform vv->vf when splat vector is duplicate from a register.

For example, like a[i] = b[i] > x ? c[i] : d[i], the x is a register, this case can not be optimized with your patch.

Actually, we have a solution to do all possible transformation (including the case I mentioned above) from vv to vx or vf by late-combine PASS which
is contributed by ARM Richard Sandiford: https://patchwork.ozlabs.org/project/gcc/patch/mptr0ljn9eh.fsf@arm.com/
You can try to apply this patch and experiment it locally yourself.

And I believe it will be landed in GCC-15. So I don't think we need this patch to do the optimization.

Thanks.

------------------ Original ------------------
From:  "demin.han"<demin.han@starfivetech.com<mailto:demin.han@starfivetech.com>>;
Date:  Fri, Mar 1, 2024 02:27 PM
To:  "gcc-patches"<gcc-patches@gcc.gnu.org<mailto:gcc-patches@gcc.gnu.org>>;
Cc:  "juzhe.zhong"<juzhe.zhong@rivai.ai<mailto:juzhe.zhong@rivai.ai>>; "kito.cheng"<kito.cheng@gmail.com<mailto:kito.cheng@gmail.com>>; "Li, Pan2"<pan2.li@intel.com<mailto:pan2.li@intel.com>>; "jeffreyalaw"<jeffreyalaw@gmail.com<mailto:jeffreyalaw@gmail.com>>;
Subject:  [PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm

Currently, following instructions generated in autovector:
    flw
    vsetvli
    vfmv.v.f
    ...
    vmfxx.vv
Two issues:
  1. Additional vsetvl and vfmv instructions
  2. Occupy one vector register and may results in smaller lmul

We expect:
    flw
    ...
    vmfxx.vf

Tested on RV32 and RV64

gcc/ChangeLog:

* config/riscv/autovec.md: Accept imm
* config/riscv/riscv-v.cc (get_cmp_insn_code): Select scalar pattern
(expand_vec_cmp): Ditto
* config/riscv/riscv.cc (riscv_const_insns): Exclude float mode

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Add new tests

Signed-off-by: demin.han <demin.han@starfivetech.com<mailto:demin.han@starfivetech.com>>
---
 gcc/config/riscv/autovec.md                   |  2 +-
 gcc/config/riscv/riscv-v.cc                   | 23 +++++++++----
 gcc/config/riscv/riscv.cc                     |  2 +-
 .../riscv/rvv/autovec/cmp/vcond-1.c           | 34 +++++++++++++++++++
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 3b32369f68c..6cfb0800c45 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -690,7 +690,7 @@ (define_expand "vec_cmp<mode><vm>"
   [(set (match_operand:<VM> 0 "register_operand")
  (match_operator:<VM> 1 "comparison_operator"
    [(match_operand:V_VLSF 2 "register_operand")
-    (match_operand:V_VLSF 3 "register_operand")]))]
+    (match_operand:V_VLSF 3 "nonmemory_operand")]))]
   "TARGET_VECTOR"
   {
     riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 14e75b9a117..2a188ac78e0 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2610,9 +2610,15 @@ expand_vec_init (rtx target, rtx vals)
 /* Get insn code for corresponding comparison.  */

 static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
 {
   insn_code icode;
+  if (FLOAT_MODE_P (mode))
+    {
+      icode = !scalar_p ? code_for_pred_cmp (mode)
+ : code_for_pred_cmp_scalar (mode);
+      return icode;
+    }
   switch (code)
     {
     case EQ:
@@ -2628,10 +2634,7 @@ get_cmp_insn_code (rtx_code code, machine_mode mode)
     case LTU:
     case GE:
     case GEU:
-      if (FLOAT_MODE_P (mode))
- icode = code_for_pred_cmp (mode);
-      else
- icode = code_for_pred_ltge (mode);
+      icode = code_for_pred_ltge (mode);
       break;
     default:
       gcc_unreachable ();
@@ -2757,7 +2760,6 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
 {
   machine_mode mask_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
-  insn_code icode = get_cmp_insn_code (code, data_mode);

   if (code == LTGT)
     {
@@ -2765,12 +2767,19 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
       rtx gt = gen_reg_rtx (mask_mode);
       expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
       expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
-      icode = code_for_pred (IOR, mask_mode);
+      insn_code icode = code_for_pred (IOR, mask_mode);
       rtx ops[] = {target, lt, gt};
       emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
       return;
     }

+  rtx elt;
+  machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
+  bool scalar_p = const_vec_duplicate_p (op1, &elt) && FLOAT_MODE_P (data_mode);
+  if (scalar_p)
+    op1 = force_reg (scalar_mode, elt);
+  insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
+
   rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
   if (!mask && !maskoff)
     {
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..1ffe4865c19 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1760,7 +1760,7 @@ riscv_const_insns (rtx x)
     register vec_duplicate into vmv.v.x.  */
  scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
  if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
-     && !immediate_operand (elt, Pmode))
+     && !FLOAT_MODE_P (smode) && !immediate_operand (elt, Pmode))
    return 0;
  /* Constants from -16 to 15 can be loaded with vmv.v.i.
     The Wc0, Wc1 constraints are already covered by the
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
index 99a230d1c8a..7f6738518ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -141,6 +141,34 @@
 TEST_VAR_ALL (DEF_VCOND_VAR)
 TEST_IMM_ALL (DEF_VCOND_IMM)

+#define TEST_COND_IMM_FLOAT(T, COND, IMM, SUFFIX) \
+  T (float, float, COND, IMM, SUFFIX##_float_float) \
+  T (double, double, COND, IMM, SUFFIX##_double_double)
+
+#define TEST_IMM_FLOAT_ALL(T) \
+  TEST_COND_IMM_FLOAT (T, >, 0.0, _gt) \
+  TEST_COND_IMM_FLOAT (T, <, 0.0, _lt) \
+  TEST_COND_IMM_FLOAT (T, >=, 0.0, _ge) \
+  TEST_COND_IMM_FLOAT (T, <=, 0.0, _le) \
+  TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq) \
+  TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne) \
+ \
+  TEST_COND_IMM_FLOAT (T, >, 1.0, _gt1) \
+  TEST_COND_IMM_FLOAT (T, <, 1.0, _lt1) \
+  TEST_COND_IMM_FLOAT (T, >=, 1.0, _ge1) \
+  TEST_COND_IMM_FLOAT (T, <=, 1.0, _le1) \
+  TEST_COND_IMM_FLOAT (T, ==, 1.0, _eq1) \
+  TEST_COND_IMM_FLOAT (T, !=, 1.0, _ne1) \
+ \
+  TEST_COND_IMM_FLOAT (T, >, -1.0, _gt2) \
+  TEST_COND_IMM_FLOAT (T, <, -1.0, _lt2) \
+  TEST_COND_IMM_FLOAT (T, >=, -1.0, _ge2) \
+  TEST_COND_IMM_FLOAT (T, <=, -1.0, _le2) \
+  TEST_COND_IMM_FLOAT (T, ==, -1.0, _eq2) \
+  TEST_COND_IMM_FLOAT (T, !=, -1.0, _ne2)
+
+TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
+
 /* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
 /* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
 /* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
@@ -155,3 +183,9 @@ TEST_IMM_ALL (DEF_VCOND_IMM)
 /* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
 /* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
 /* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfle.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfeq.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfne.vf} 6 } } */
--
2.43.2
Demin Han March 5, 2024, 8:40 a.m. UTC | #3
Hi,

I applied the mentioned last_combine patch(https://patchwork.ozlabs.org/project/gcc/patch/mptbka7em9w.fsf@arm.com/).
And did some initial tests.

Found that:

1.      Float vector-scalar and vector-imm are OK

2.      Integer vector-scalar is OK

3.      Integer vector-imm(e.g. a[i] > 16) is not OK.

When reaches last_combine pass, vec_duplicate(0x10) form is still kept, but no pattern match this now,

because  all scalar patterns  have “register_operand” predication.


I think MD file or expand function of rvv need to change for this situation.

Regards,
Demin
juzhe.zhong@rivai.ai March 5, 2024, 9:02 a.m. UTC | #4
Yes. I think we are lacking some combine patterns to do all vector-scalar combinations.

If you are interested at this topic, you can do some investigations on that (I believe currently no body works on it for now).
I bet we should add some patterns for late-combine PASS for example:

(set (plus : (vec_duplicate) (reg))) 



juzhe.zhong@rivai.ai
 
From: Demin Han
Date: 2024-03-05 16:40
To: 钟居哲; gcc-patches
CC: kito.cheng; Li, Pan2; jeffreyalaw; Robin Dapp; richard.sandiford
Subject: RE: Re:[PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm
Hi,
 
I applied the mentioned last_combine patch(https://patchwork.ozlabs.org/project/gcc/patch/mptbka7em9w.fsf@arm.com/).
And did some initial tests. 
 
Found that:
1.      Float vector-scalar and vector-imm are OK
2.      Integer vector-scalar is OK
3.      Integer vector-imm(e.g. a[i] > 16) is not OK.
When reaches last_combine pass, vec_duplicate(0x10) form is still kept, but no pattern match this now, 
because  all scalar patterns  have “register_operand” predication. 
 
I think MD file or expand function of rvv need to change for this situation.
 
Regards,
Demin
Demin Han March 5, 2024, 10:40 a.m. UTC | #5
OK, I will solve the comparison operation first and then do some check over other operations.



Regards,

Demin


From: juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai>
Sent: 2024年3月5日 17:02
To: Demin Han <demin.han@starfivetech.com>; gcc-patches <gcc-patches@gcc.gnu.org>
Cc: kito.cheng <kito.cheng@gmail.com>; pan2.li <pan2.li@intel.com>; jeffreyalaw <jeffreyalaw@gmail.com>; Robin Dapp <rdapp.gcc@gmail.com>; richard.sandiford <richard.sandiford@arm.com>
Subject: Re: RE:[PATCH 3/5] RISC-V: Support vmfxx.vf for autovec comparison of vec and imm

Yes. I think we are lacking some combine patterns to do all vector-scalar combinations.

If you are interested at this topic, you can do some investigations on that (I believe currently no body works on it for now).
I bet we should add some patterns for late-combine PASS for example:

(set (plus : (vec_duplicate) (reg)))
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 3b32369f68c..6cfb0800c45 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -690,7 +690,7 @@  (define_expand "vec_cmp<mode><vm>"
   [(set (match_operand:<VM> 0 "register_operand")
 	(match_operator:<VM> 1 "comparison_operator"
 	  [(match_operand:V_VLSF 2 "register_operand")
-	   (match_operand:V_VLSF 3 "register_operand")]))]
+	   (match_operand:V_VLSF 3 "nonmemory_operand")]))]
   "TARGET_VECTOR"
   {
     riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 14e75b9a117..2a188ac78e0 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2610,9 +2610,15 @@  expand_vec_init (rtx target, rtx vals)
 /* Get insn code for corresponding comparison.  */
 
 static insn_code
-get_cmp_insn_code (rtx_code code, machine_mode mode)
+get_cmp_insn_code (rtx_code code, machine_mode mode, bool scalar_p)
 {
   insn_code icode;
+  if (FLOAT_MODE_P (mode))
+    {
+      icode = !scalar_p ? code_for_pred_cmp (mode)
+			: code_for_pred_cmp_scalar (mode);
+      return icode;
+    }
   switch (code)
     {
     case EQ:
@@ -2628,10 +2634,7 @@  get_cmp_insn_code (rtx_code code, machine_mode mode)
     case LTU:
     case GE:
     case GEU:
-      if (FLOAT_MODE_P (mode))
-	icode = code_for_pred_cmp (mode);
-      else
-	icode = code_for_pred_ltge (mode);
+      icode = code_for_pred_ltge (mode);
       break;
     default:
       gcc_unreachable ();
@@ -2757,7 +2760,6 @@  expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
 {
   machine_mode mask_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
-  insn_code icode = get_cmp_insn_code (code, data_mode);
 
   if (code == LTGT)
     {
@@ -2765,12 +2767,19 @@  expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask,
       rtx gt = gen_reg_rtx (mask_mode);
       expand_vec_cmp (lt, LT, op0, op1, mask, maskoff);
       expand_vec_cmp (gt, GT, op0, op1, mask, maskoff);
-      icode = code_for_pred (IOR, mask_mode);
+      insn_code icode = code_for_pred (IOR, mask_mode);
       rtx ops[] = {target, lt, gt};
       emit_vlmax_insn (icode, BINARY_MASK_OP, ops);
       return;
     }
 
+  rtx elt;
+  machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (op1));
+  bool scalar_p = const_vec_duplicate_p (op1, &elt) && FLOAT_MODE_P (data_mode);
+  if (scalar_p)
+    op1 = force_reg (scalar_mode, elt);
+  insn_code icode = get_cmp_insn_code (code, data_mode, scalar_p);
+
   rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
   if (!mask && !maskoff)
     {
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 4100abc9dd1..1ffe4865c19 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1760,7 +1760,7 @@  riscv_const_insns (rtx x)
 		   register vec_duplicate into vmv.v.x.  */
 		scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
 		if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
-		    && !immediate_operand (elt, Pmode))
+		    && !FLOAT_MODE_P (smode) && !immediate_operand (elt, Pmode))
 		  return 0;
 		/* Constants from -16 to 15 can be loaded with vmv.v.i.
 		   The Wc0, Wc1 constraints are already covered by the
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
index 99a230d1c8a..7f6738518ee 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -141,6 +141,34 @@ 
 TEST_VAR_ALL (DEF_VCOND_VAR)
 TEST_IMM_ALL (DEF_VCOND_IMM)
 
+#define TEST_COND_IMM_FLOAT(T, COND, IMM, SUFFIX)			\
+  T (float, float, COND, IMM, SUFFIX##_float_float)			\
+  T (double, double, COND, IMM, SUFFIX##_double_double)
+
+#define TEST_IMM_FLOAT_ALL(T)						\
+  TEST_COND_IMM_FLOAT (T, >, 0.0, _gt)					\
+  TEST_COND_IMM_FLOAT (T, <, 0.0, _lt)					\
+  TEST_COND_IMM_FLOAT (T, >=, 0.0, _ge)					\
+  TEST_COND_IMM_FLOAT (T, <=, 0.0, _le)					\
+  TEST_COND_IMM_FLOAT (T, ==, 0.0, _eq)					\
+  TEST_COND_IMM_FLOAT (T, !=, 0.0, _ne)					\
+									\
+  TEST_COND_IMM_FLOAT (T, >, 1.0, _gt1)					\
+  TEST_COND_IMM_FLOAT (T, <, 1.0, _lt1)					\
+  TEST_COND_IMM_FLOAT (T, >=, 1.0, _ge1)				\
+  TEST_COND_IMM_FLOAT (T, <=, 1.0, _le1)				\
+  TEST_COND_IMM_FLOAT (T, ==, 1.0, _eq1)				\
+  TEST_COND_IMM_FLOAT (T, !=, 1.0, _ne1)				\
+									\
+  TEST_COND_IMM_FLOAT (T, >, -1.0, _gt2)				\
+  TEST_COND_IMM_FLOAT (T, <, -1.0, _lt2)				\
+  TEST_COND_IMM_FLOAT (T, >=, -1.0, _ge2)				\
+  TEST_COND_IMM_FLOAT (T, <=, -1.0, _le2)				\
+  TEST_COND_IMM_FLOAT (T, ==, -1.0, _eq2)				\
+  TEST_COND_IMM_FLOAT (T, !=, -1.0, _ne2)
+
+TEST_IMM_FLOAT_ALL (DEF_VCOND_IMM)
+
 /* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
 /* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
 /* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
@@ -155,3 +183,9 @@  TEST_IMM_ALL (DEF_VCOND_IMM)
 /* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
 /* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
 /* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmflt.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfge.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfle.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfeq.vf} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfne.vf} 6 } } */