diff mbox series

[35/44] RISC-V: Avoid extraneous integer comparison for FP comparisons

Message ID alpine.DEB.2.20.2311182249380.5892@tpp.orcam.me.uk
State New
Headers show
Series RISC-V: Various if-conversion fixes and improvements | expand

Commit Message

Maciej W. Rozycki Nov. 19, 2023, 5:42 a.m. UTC
We have floating-point coditional-set machine instructions for a subset 
of FP comparisons, so avoid going through a comparison against constant 
zero in `riscv_expand_float_scc' where not necessary, preventing an 
extraneous RTL instruction from being produced that counts against the 
cost of the replacement branchless code sequence in if-conversion, e.g.:

(insn 29 6 30 2 (set (reg:DI 142)
        (ge:DI (reg/v:DF 135 [ w ])
            (reg/v:DF 136 [ x ]))) 297 {*cstoredfdi4}
     (nil))
(insn 30 29 31 2 (set (reg:DI 143)
        (ne:DI (reg:DI 142)
            (const_int 0 [0]))) 319 {*sne_zero_didi}
     (nil))
(insn 31 30 32 2 (set (reg:DI 141)
        (reg:DI 143)) 206 {*movdi_64bit}
     (nil))
(insn 32 31 33 2 (set (reg:DI 144)
        (neg:DI (reg:DI 141))) 15 {negdi2}
     (nil))
(insn 33 32 34 2 (set (reg:DI 145)
        (and:DI (reg:DI 144)
            (reg/v:DI 137 [ y ]))) 102 {*anddi3}
     (nil))
(insn 34 33 35 2 (set (reg:DI 146)
        (not:DI (reg:DI 144))) 111 {one_cmpldi2}
     (nil))
(insn 35 34 36 2 (set (reg:DI 147)
        (and:DI (reg:DI 146)
            (reg/v:DI 138 [ z ]))) 102 {*anddi3}
     (nil))
(insn 36 35 21 2 (set (reg/v:DI 138 [ z ])
        (ior:DI (reg:DI 145)
            (reg:DI 147))) 105 {iordi3}
     (nil))

where the second insn effectively just copies its input.  This now gets 
simplified to:

(insn 29 6 30 2 (set (reg:DI 141)
        (ge:DI (reg/v:DF 135 [ w ])
            (reg/v:DF 136 [ x ]))) 297 {*cstoredfdi4}
     (nil))
(insn 30 29 31 2 (set (reg:DI 142)
        (neg:DI (reg:DI 141))) 15 {negdi2}
     (nil))
(insn 31 30 32 2 (set (reg:DI 143)
        (and:DI (reg:DI 142)
            (reg/v:DI 137 [ y ]))) 102 {*anddi3}
     (nil))
(insn 32 31 33 2 (set (reg:DI 144)
        (not:DI (reg:DI 142))) 111 {one_cmpldi2}
     (nil))
(insn 33 32 34 2 (set (reg:DI 145)
        (and:DI (reg:DI 144)
            (reg/v:DI 138 [ z ]))) 102 {*anddi3}
     (nil))
(insn 34 33 21 2 (set (reg/v:DI 138 [ z ])
        (ior:DI (reg:DI 143)
            (reg:DI 145))) 105 {iordi3}
     (nil))

lowering the cost of the code sequence produced (even though combine 
would swallow the second insn anyway).

We still need to produce a comparison against constant zero where the 
instruction following a floating-point coditional-set operation is a 
branch, so add canonicalization to `riscv_expand_conditional_branch' 
instead.

	gcc/
	* config/riscv/riscv.cc (riscv_emit_float_compare) <NE>: Handle 
	separately.
	<EQ, LE, LT, GE, GT>: Return operands supplied as is.
	(riscv_emit_binary): Call `riscv_emit_binary' directly rather 
	than going through a temporary register for word-mode targets.
	(riscv_expand_conditional_branch): Canonicalize the comparison 
	if not against constant zero.
---
 gcc/config/riscv/riscv.cc |   29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

gcc-riscv-emit-float-compare-fcmp.diff

Comments

Jeff Law Nov. 19, 2023, 7:44 p.m. UTC | #1
On 11/18/23 22:42, Maciej W. Rozycki wrote:
> We have floating-point coditional-set machine instructions for a subset
> of FP comparisons, so avoid going through a comparison against constant
> zero in `riscv_expand_float_scc' where not necessary, preventing an
> extraneous RTL instruction from being produced that counts against the
> cost of the replacement branchless code sequence in if-conversion, e.g.:
> 
> (insn 29 6 30 2 (set (reg:DI 142)
>          (ge:DI (reg/v:DF 135 [ w ])
>              (reg/v:DF 136 [ x ]))) 297 {*cstoredfdi4}
>       (nil))
> (insn 30 29 31 2 (set (reg:DI 143)
>          (ne:DI (reg:DI 142)
>              (const_int 0 [0]))) 319 {*sne_zero_didi}
>       (nil))
> (insn 31 30 32 2 (set (reg:DI 141)
>          (reg:DI 143)) 206 {*movdi_64bit}
>       (nil))
> (insn 32 31 33 2 (set (reg:DI 144)
>          (neg:DI (reg:DI 141))) 15 {negdi2}
>       (nil))
> (insn 33 32 34 2 (set (reg:DI 145)
>          (and:DI (reg:DI 144)
>              (reg/v:DI 137 [ y ]))) 102 {*anddi3}
>       (nil))
> (insn 34 33 35 2 (set (reg:DI 146)
>          (not:DI (reg:DI 144))) 111 {one_cmpldi2}
>       (nil))
> (insn 35 34 36 2 (set (reg:DI 147)
>          (and:DI (reg:DI 146)
>              (reg/v:DI 138 [ z ]))) 102 {*anddi3}
>       (nil))
> (insn 36 35 21 2 (set (reg/v:DI 138 [ z ])
>          (ior:DI (reg:DI 145)
>              (reg:DI 147))) 105 {iordi3}
>       (nil))
> 
> where the second insn effectively just copies its input.  This now gets
> simplified to:
> 
> (insn 29 6 30 2 (set (reg:DI 141)
>          (ge:DI (reg/v:DF 135 [ w ])
>              (reg/v:DF 136 [ x ]))) 297 {*cstoredfdi4}
>       (nil))
> (insn 30 29 31 2 (set (reg:DI 142)
>          (neg:DI (reg:DI 141))) 15 {negdi2}
>       (nil))
> (insn 31 30 32 2 (set (reg:DI 143)
>          (and:DI (reg:DI 142)
>              (reg/v:DI 137 [ y ]))) 102 {*anddi3}
>       (nil))
> (insn 32 31 33 2 (set (reg:DI 144)
>          (not:DI (reg:DI 142))) 111 {one_cmpldi2}
>       (nil))
> (insn 33 32 34 2 (set (reg:DI 145)
>          (and:DI (reg:DI 144)
>              (reg/v:DI 138 [ z ]))) 102 {*anddi3}
>       (nil))
> (insn 34 33 21 2 (set (reg/v:DI 138 [ z ])
>          (ior:DI (reg:DI 143)
>              (reg:DI 145))) 105 {iordi3}
>       (nil))
> 
> lowering the cost of the code sequence produced (even though combine
> would swallow the second insn anyway).
> 
> We still need to produce a comparison against constant zero where the
> instruction following a floating-point coditional-set operation is a
> branch, so add canonicalization to `riscv_expand_conditional_branch'
> instead.
> 
> 	gcc/
> 	* config/riscv/riscv.cc (riscv_emit_float_compare) <NE>: Handle
> 	separately.
> 	<EQ, LE, LT, GE, GT>: Return operands supplied as is.
> 	(riscv_emit_binary): Call `riscv_emit_binary' directly rather
> 	than going through a temporary register for word-mode targets.
> 	(riscv_expand_conditional_branch): Canonicalize the comparison
> 	if not against constant zero.
OK
jeff
diff mbox series

Patch

Index: gcc/gcc/config/riscv/riscv.cc
===================================================================
--- gcc.orig/gcc/config/riscv/riscv.cc
+++ gcc/gcc/config/riscv/riscv.cc
@@ -4029,9 +4029,10 @@  riscv_emit_float_compare (enum rtx_code
 #undef UNORDERED_COMPARISON
 
     case NE:
-      fp_code = EQ;
       *code = EQ;
-      /* Fall through.  */
+      *op0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
+      *op1 = const0_rtx;
+      break;
 
     case EQ:
     case LE:
@@ -4039,8 +4040,9 @@  riscv_emit_float_compare (enum rtx_code
     case GE:
     case GT:
       /* We have instructions for these cases.  */
-      *op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
-      *op1 = const0_rtx;
+      *code = fp_code;
+      *op0 = cmp_op0;
+      *op1 = cmp_op1;
       break;
 
     case LTGT:
@@ -4080,10 +4082,14 @@  riscv_expand_float_scc (rtx target, enum
 {
   riscv_emit_float_compare (&code, &op0, &op1);
 
-  rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
-  if (GET_MODE (target) != word_mode)
-    cmp = lowpart_subreg (GET_MODE (target), cmp, word_mode);
-  riscv_emit_set (target, cmp);
+  machine_mode mode = GET_MODE (target);
+  if (mode != word_mode)
+    {
+      rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
+      riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
+    }
+  else
+    riscv_emit_binary (code, target, op0, op1);
 }
 
 /* Jump to LABEL if (CODE OP0 OP1) holds.  */
@@ -4096,6 +4102,13 @@  riscv_expand_conditional_branch (rtx lab
   else
     riscv_emit_int_compare (&code, &op0, &op1);
 
+  if (FLOAT_MODE_P (GET_MODE (op0)))
+    {
+      op0 = riscv_force_binary (word_mode, code, op0, op1);
+      op1 = const0_rtx;
+      code = NE;
+    }
+
   rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
   emit_jump_insn (gen_condjump (condition, label));
 }