Message ID | alpine.DEB.2.20.2311182249380.5892@tpp.orcam.me.uk |
---|---|
State | New |
Headers | show |
Series | RISC-V: Various if-conversion fixes and improvements | expand |
On 11/18/23 22:42, Maciej W. Rozycki wrote: > We have floating-point coditional-set machine instructions for a subset > of FP comparisons, so avoid going through a comparison against constant > zero in `riscv_expand_float_scc' where not necessary, preventing an > extraneous RTL instruction from being produced that counts against the > cost of the replacement branchless code sequence in if-conversion, e.g.: > > (insn 29 6 30 2 (set (reg:DI 142) > (ge:DI (reg/v:DF 135 [ w ]) > (reg/v:DF 136 [ x ]))) 297 {*cstoredfdi4} > (nil)) > (insn 30 29 31 2 (set (reg:DI 143) > (ne:DI (reg:DI 142) > (const_int 0 [0]))) 319 {*sne_zero_didi} > (nil)) > (insn 31 30 32 2 (set (reg:DI 141) > (reg:DI 143)) 206 {*movdi_64bit} > (nil)) > (insn 32 31 33 2 (set (reg:DI 144) > (neg:DI (reg:DI 141))) 15 {negdi2} > (nil)) > (insn 33 32 34 2 (set (reg:DI 145) > (and:DI (reg:DI 144) > (reg/v:DI 137 [ y ]))) 102 {*anddi3} > (nil)) > (insn 34 33 35 2 (set (reg:DI 146) > (not:DI (reg:DI 144))) 111 {one_cmpldi2} > (nil)) > (insn 35 34 36 2 (set (reg:DI 147) > (and:DI (reg:DI 146) > (reg/v:DI 138 [ z ]))) 102 {*anddi3} > (nil)) > (insn 36 35 21 2 (set (reg/v:DI 138 [ z ]) > (ior:DI (reg:DI 145) > (reg:DI 147))) 105 {iordi3} > (nil)) > > where the second insn effectively just copies its input. This now gets > simplified to: > > (insn 29 6 30 2 (set (reg:DI 141) > (ge:DI (reg/v:DF 135 [ w ]) > (reg/v:DF 136 [ x ]))) 297 {*cstoredfdi4} > (nil)) > (insn 30 29 31 2 (set (reg:DI 142) > (neg:DI (reg:DI 141))) 15 {negdi2} > (nil)) > (insn 31 30 32 2 (set (reg:DI 143) > (and:DI (reg:DI 142) > (reg/v:DI 137 [ y ]))) 102 {*anddi3} > (nil)) > (insn 32 31 33 2 (set (reg:DI 144) > (not:DI (reg:DI 142))) 111 {one_cmpldi2} > (nil)) > (insn 33 32 34 2 (set (reg:DI 145) > (and:DI (reg:DI 144) > (reg/v:DI 138 [ z ]))) 102 {*anddi3} > (nil)) > (insn 34 33 21 2 (set (reg/v:DI 138 [ z ]) > (ior:DI (reg:DI 143) > (reg:DI 145))) 105 {iordi3} > (nil)) > > lowering the cost of the code sequence produced (even though combine > would swallow the second insn anyway). > > We still need to produce a comparison against constant zero where the > instruction following a floating-point coditional-set operation is a > branch, so add canonicalization to `riscv_expand_conditional_branch' > instead. > > gcc/ > * config/riscv/riscv.cc (riscv_emit_float_compare) <NE>: Handle > separately. > <EQ, LE, LT, GE, GT>: Return operands supplied as is. > (riscv_emit_binary): Call `riscv_emit_binary' directly rather > than going through a temporary register for word-mode targets. > (riscv_expand_conditional_branch): Canonicalize the comparison > if not against constant zero. OK jeff
Index: gcc/gcc/config/riscv/riscv.cc =================================================================== --- gcc.orig/gcc/config/riscv/riscv.cc +++ gcc/gcc/config/riscv/riscv.cc @@ -4029,9 +4029,10 @@ riscv_emit_float_compare (enum rtx_code #undef UNORDERED_COMPARISON case NE: - fp_code = EQ; *code = EQ; - /* Fall through. */ + *op0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1); + *op1 = const0_rtx; + break; case EQ: case LE: @@ -4039,8 +4040,9 @@ riscv_emit_float_compare (enum rtx_code case GE: case GT: /* We have instructions for these cases. */ - *op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1); - *op1 = const0_rtx; + *code = fp_code; + *op0 = cmp_op0; + *op1 = cmp_op1; break; case LTGT: @@ -4080,10 +4082,14 @@ riscv_expand_float_scc (rtx target, enum { riscv_emit_float_compare (&code, &op0, &op1); - rtx cmp = riscv_force_binary (word_mode, code, op0, op1); - if (GET_MODE (target) != word_mode) - cmp = lowpart_subreg (GET_MODE (target), cmp, word_mode); - riscv_emit_set (target, cmp); + machine_mode mode = GET_MODE (target); + if (mode != word_mode) + { + rtx cmp = riscv_force_binary (word_mode, code, op0, op1); + riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode)); + } + else + riscv_emit_binary (code, target, op0, op1); } /* Jump to LABEL if (CODE OP0 OP1) holds. */ @@ -4096,6 +4102,13 @@ riscv_expand_conditional_branch (rtx lab else riscv_emit_int_compare (&code, &op0, &op1); + if (FLOAT_MODE_P (GET_MODE (op0))) + { + op0 = riscv_force_binary (word_mode, code, op0, op1); + op1 = const0_rtx; + code = NE; + } + rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); emit_jump_insn (gen_condjump (condition, label)); }