diff mbox series

[committed] Support limited setcc for H8

Message ID 3248b49d-a34e-6f3c-ff2e-a193a7caa85a@gmail.com
State New
Headers show
Series [committed] Support limited setcc for H8 | expand

Commit Message

Jeff Law Aug. 27, 2021, 9:06 p.m. UTC
One of the (few) remaining issues related to cc0 elimination from the H8 
port was the fact that I dropped all setcc support.  While we only had 
setcc on the H8/SX variant, it seems a shame to regress like that.

Meanwhile my primary motivation behind the recent work on the H8 port is 
to speed up testing, particularly the builtin-arith tests.  While I have 
some support that significantly improves things, it's still not as good 
as it could be.

"Naturally" those two issues started to intersect and I started thinking 
a lot about setcc on the H8 last weekend and cobbled together some code 
that makes another notable improvement in those tests.

This patch was extracted from the weekend's work and stands on its own 
as a nice little improvement.  In particular it allows us to support 
setcc style insns that move the state of the C bit into a GPR.

Handling the C bit is the easiest to implement.  In fact, we can 
implement C bit handling on all the H8 variants and the implementation 
is actually better than what the H8/SX was previously doing.

We have a variety of ways to get C bit information into GPRs.  We can 
insert it into an arbitrary position in an 8 bit register, we can use it 
in an 8 bit {add,subtract}-with-carry instruction, or we can use it in 
8, 16 or 32 bit rotates.

The first decision is should setcc generate -1, 0 or 1, 0.  -1, 0 is 
easiest (subx), but we're often going to need to negate the result.  -1, 
0 is also going to be painful for the other bits we're going to want to 
support (Z, N, V) as we don't have as much flexibility in getting the 
bit into a GPR.

1, 0 isn't too hard.  We can use xor to clear the target register (of 
any size), then do a bld to insert C into the low bit.  Good. We can 
also use bild to insert inverted C into the low bit.  This allows us to 
implement efficient setcc for leu/gtu.  At worst it's equivalent to a 
branchy sequence in terms of speed and space and it's often better, even 
in isolation.  It also often allows combinations with subsequent 
instructions that extend the result to wider types which further 
improves things.

While this could be easily extended to put the C bit at an arbitrary 
location in an 8 or 16 bit register or in the sign bit of a 32bit 
register, I haven't seen enough code that would significantly benefit.  
So I haven't implemented this.

While this does occasionally make things larger by twiddling register 
allocation decisions, these are relatively rare and dwarfed the regular 
and larger improvements we see in general.

Tested without regressions on the H8.  Installing onto the trunk.

Jeff
commit ee914ec4f811243ad72aceea4748687c74f38bc6
Author: Jeff Law <jlaw@localhost.localdomain>
Date:   Fri Aug 27 17:01:37 2021 -0400

    Support limited setcc for H8
    
    gcc/
    
            * config/h8300/bitfield.md (cstore<mode>4): Remove expander.
            * config/h8300/h8300.c (h8300_expand_branch): Remove function.
            * config/h8300/h8300-protos.h (h8300_expadn_branch): Remove prototype.
            * config/h8300/h8300.md (eqne): New code iterator.
            (geultu, geultu_to_c): Similarly.
            * config/h8300/testcompare.md (cstore<mode>4): Dummy expander.
            (store_c_<mode>, store_c_i_<mode>): New define_insn_and_splits
            (cmp<mode>_c): New pattern
diff mbox series

Patch

diff --git a/gcc/config/h8300/bitfield.md b/gcc/config/h8300/bitfield.md
index 82cb161d126..0d28c750a6a 100644
--- a/gcc/config/h8300/bitfield.md
+++ b/gcc/config/h8300/bitfield.md
@@ -338,17 +338,6 @@ 
 }
   [(set_attr "length_table" "bitfield")])
 
-;;(define_expand "cstore<mode>4"
-;;  [(use (match_operator 1 "eqne_operator"
-;;         [(match_operand:QHSI 2 "h8300_dst_operand" "")
-;;          (match_operand:QHSI 3 "h8300_src_operand" "")]))
-;;   (clobber (match_operand:QHSI 0 "register_operand"))]
-;;  "TARGET_H8300SX"
-;;  {
-;;    h8300_expand_store (operands);
-;;    DONE;
-;;  })
-
 ;;(define_insn "*bstzhireg"
 ;;  [(set (match_operand:HI 0 "register_operand" "=r")
 ;;	(match_operator:HI 1 "eqne_operator" [(cc0) (const_int 0)]))]
diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h
index 3d344018ff2..4a9624f91d6 100644
--- a/gcc/config/h8300/h8300-protos.h
+++ b/gcc/config/h8300/h8300-protos.h
@@ -45,7 +45,6 @@  extern int compute_a_shift_cc (rtx *, rtx_code);
 #ifdef HAVE_ATTR_cc
 extern enum attr_cc compute_plussi_cc (rtx *);
 #endif
-extern void h8300_expand_branch (rtx[]);
 extern void h8300_expand_store (rtx[]);
 extern bool expand_a_shift (machine_mode, enum rtx_code, rtx[]);
 extern int h8300_shift_needs_scratch_p (int, machine_mode, rtx_code);
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index 5f7251ab78d..a63c3220e66 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -3256,30 +3256,8 @@  compute_logical_op_length (machine_mode mode, rtx_code code, rtx *operands, rtx_
   return length;
 }
 
-
 #if 0
-/* Expand a conditional branch.  */
-
-void
-h8300_expand_branch (rtx operands[])
-{
-  enum rtx_code code = GET_CODE (operands[0]);
-  rtx op0 = operands[1];
-  rtx op1 = operands[2];
-  rtx label = operands[3];
-  rtx tmp;
-
-  tmp = gen_rtx_COMPARE (VOIDmode, op0, op1);
-  emit_insn (gen_rtx_SET (cc0_rtx, tmp));
-
-  tmp = gen_rtx_fmt_ee (code, VOIDmode, cc0_rtx, const0_rtx);
-  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
-			      gen_rtx_LABEL_REF (VOIDmode, label),
-			      pc_rtx);
-  emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
-}
-
-
+
 /* Expand a conditional store.  */
 
 void
diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
index 7f49e4284f2..89bfcf11126 100644
--- a/gcc/config/h8300/h8300.md
+++ b/gcc/config/h8300/h8300.md
@@ -233,6 +233,14 @@ 
 (define_code_iterator logicals [ior xor and])
 
 (define_code_iterator ors [ior xor])
+
+(define_code_iterator eqne [eq ne])
+
+;; For storing the C flag, map from the unsigned comparison to the right
+;; code for testing the C bit.
+(define_code_iterator geultu [geu ltu])
+(define_code_attr geultu_to_c [(geu "eq") (ltu "ne")])
+
 
 (include "movepush.md")
 (include "mova.md")
diff --git a/gcc/config/h8300/testcompare.md b/gcc/config/h8300/testcompare.md
index 29190532e49..9ff7a51077e 100644
--- a/gcc/config/h8300/testcompare.md
+++ b/gcc/config/h8300/testcompare.md
@@ -70,6 +70,22 @@ 
   "mov.w	%e0,%e0"
   [(set_attr "length" "2")])
 
+(define_insn "*cmp<mode>_c"
+  [(set (reg:CCC CC_REG)
+	(ltu (match_operand:QHSI 0 "h8300_dst_operand" "rQ")
+	     (match_operand:QHSI 1 "h8300_src_operand" "rQi")))]
+  "reload_completed"
+  {
+    if (<MODE>mode == QImode)
+      return "cmp.b	%X1,%X0";
+    else if (<MODE>mode == HImode)
+      return "cmp.w	%T1,%T0";
+    else if (<MODE>mode == SImode)
+      return "cmp.l	%S1,%S0";
+    gcc_unreachable ();
+  }
+  [(set_attr "length_table" "add")])
+
 (define_insn "*cmpqi"
   [(set (reg:CC CC_REG)
 	(compare (match_operand:QI 0 "h8300_dst_operand" "rQ")
@@ -144,3 +160,67 @@ 
   [(parallel [(set (reg:CCZN CC_REG) (compare:CCZN (match_dup 1) (const_int 0)))
 	      (set (match_dup 0) (match_dup 1))])])
 
+;; This exists solely to convince ifcvt to try some store-flag sequences.
+;;
+;; Essentially we don't want to expose a general store-flag capability.
+;; The only generally useful/profitable case is when we want to test the
+;; C bit.  In that case we can use addx, subx, bst, or bist to get the bit
+;; into a GPR.
+;;
+;; Others could be handled with stc, shifts and masking, but it likely isn't
+;; profitable.
+;;
+(define_expand "cstore<mode>4"
+  [(use (match_operator 1 "eqne_operator"
+         [(match_operand:QHSI 2 "h8300_dst_operand" "")
+          (match_operand:QHSI 3 "h8300_src_operand" "")]))
+   (clobber (match_operand:QHSI 0 "register_operand"))]
+  ""
+  {
+    FAIL;
+  })
+
+;; Storing the C bit is pretty simple since there are many ways to
+;; introduce it into a GPR.  addx, subx and a variety of bit manipulation
+;; instructions
+;;
+(define_insn "*store_c_<mode>"
+  [(set (match_operand:QHSI 0 "register_operand" "=r")
+	(eqne:QHSI (reg:CCC CC_REG) (const_int 0)))]
+  "reload_completed"
+  {
+    if (<CODE> == NE)
+      {
+	if (<MODE>mode == QImode)
+	  return "xor.b\t%X0,%X0\;bst\t#0,%X0";
+	else if (<MODE>mode == HImode)
+	  return "xor.w\t%T0,%T0\;bst\t#0,%s0";
+	else if (<MODE>mode == SImode)
+	  return "xor.l\t%S0,%S0\;bst\t#0,%w0";
+	gcc_unreachable ();
+      }
+    else if (<CODE> == EQ)
+      {
+	if (<MODE>mode == QImode)
+	  return "xor.b\t%X0,%X0\;bist\t#0,%X0";
+	else if (<MODE>mode == HImode)
+	  return "xor.w\t%T0,%T0\;bist\t#0,%s0";
+	else if (<MODE>mode == SImode)
+	  return "xor.l\t%S0,%S0\;bist\t#0,%w0";
+	gcc_unreachable ();
+      }
+  }
+  [(set (attr "length") (symbol_ref "<MODE>mode == SImode ? 6 : 4"))])
+
+;; Recognize this scc and generate code we can match
+(define_insn_and_split "*store_c_i_<mode>"
+  [(set (match_operand:QHSI 0 "register_operand" "=r")
+	(geultu:QHSI (match_operand:QHSI 1 "register_operand" "r")
+		     (match_operand:QHSI 2 "register_operand" "r")))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCC CC_REG)
+	(ltu:CCC (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(<geultu_to_c>:QHSI (reg:CCC CC_REG) (const_int 0)))])