diff mbox

[AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber

Message ID 548EFFE8.9040703@arm.com
State New
Headers show

Commit Message

Jiong Wang Dec. 15, 2014, 3:36 p.m. UTC
from the discussion here
   https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01949.html

the other problem it exposed is the unnecessary clobber of register x19 
which
is a callee-saved register, then there are unnecessary push/pop in 
pro/epilogue.

the reason comes from the following pattern:

(define_insn "tb<optab><mode>1"
(define_insn "cb<optab><mode>1"

they always declare "(clobber (match_scratch:DI 3 "=r"))" while that 
register is
used only when "get_attr_length (insn) == 8".

actually, we could clobber CC register instead of scratch register to 
avoid wasting
of general purpose registers.

this patch fix this, and give slightly improvement on spec2k.
bootstrap OK, no regression on aarch64 bare-metal.

ok for trunk?

the testcase included in the patch is for verification purpose only.
it could verify the long branch situation, while because of the code is 
very big,
it takes a couple of seconds to compile. will not commit it.

gcc/
   2014-12-15 Ramana Radhakrishnan ramana.radhakrishnan@arm.com
             Jiong Wang jiong.wang@arm.com

   * config/aarch64/aarch64.md (tb<optab><mode>1): Clobber CC reg 
instead of scratch reg.
   (cb<optab><mode>1): Likewise.
   * config/aarch64/iterators.md (bcond): New define_code_attr.

Comments

Jiong Wang Jan. 14, 2015, 3:51 p.m. UTC | #1
On 15/12/14 15:36, Jiong Wang wrote:

> from the discussion here
>     https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01949.html
>
> the other problem it exposed is the unnecessary clobber of register x19
> which
> is a callee-saved register, then there are unnecessary push/pop in
> pro/epilogue.
>
> the reason comes from the following pattern:
>
> (define_insn "tb<optab><mode>1"
> (define_insn "cb<optab><mode>1"
>
> they always declare "(clobber (match_scratch:DI 3 "=r"))" while that
> register is
> used only when "get_attr_length (insn) == 8".
>
> actually, we could clobber CC register instead of scratch register to
> avoid wasting
> of general purpose registers.
>
> this patch fix this, and give slightly improvement on spec2k.
> bootstrap OK, no regression on aarch64 bare-metal.
>
> ok for trunk?
>
> the testcase included in the patch is for verification purpose only.
> it could verify the long branch situation, while because of the code is
> very big,
> it takes a couple of seconds to compile. will not commit it.
>
> gcc/
>     2014-12-15 Ramana Radhakrishnan ramana.radhakrishnan@arm.com
>               Jiong Wang jiong.wang@arm.com
>
>     * config/aarch64/aarch64.md (tb<optab><mode>1): Clobber CC reg
> instead of scratch reg.
>     (cb<optab><mode>1): Likewise.
>     * config/aarch64/iterators.md (bcond): New define_code_attr.

Ping~
Richard Henderson Jan. 14, 2015, 10:59 p.m. UTC | #2
On 12/15/2014 07:36 AM, Jiong Wang wrote:
> +	char buf[64];
> +	uint64_t val = ((uint64_t) 1) << UINTVAL (operands[1]);
> +	sprintf (buf, "tst\t%%<w>0, %"PRId64, val);
> +	output_asm_insn (buf, operands);
> +	return "<bcond>\t%l2";

Better to simply modify the operand, as in

  operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
  return "tst\t%<w>0, %1\;<bcond>\t%l2";


r~
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 597ff8c..abf8e3f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -466,13 +466,20 @@ 
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
-   (clobber (match_scratch:DI 3 "=r"))]
+   (clobber (reg:CC CC_REGNUM))]
   ""
-  "*
-  if (get_attr_length (insn) == 8)
-    return \"ubfx\\t%<w>3, %<w>0, %1, #1\;<cbz>\\t%<w>3, %l2\";
-  return \"<tbz>\\t%<w>0, %1, %l2\";
-  "
+  {
+    if (get_attr_length (insn) == 8)
+      {
+	char buf[64];
+	uint64_t val = ((uint64_t) 1) << UINTVAL (operands[1]);
+	sprintf (buf, "tst\t%%<w>0, %"PRId64, val);
+	output_asm_insn (buf, operands);
+	return "<bcond>\t%l2";
+      }
+    else
+      return "<tbz>\t%<w>0, %1, %l2";
+  }
   [(set_attr "type" "branch")
    (set (attr "length")
 	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
@@ -486,13 +493,21 @@ 
 				 (const_int 0))
 			   (label_ref (match_operand 1 "" ""))
 			   (pc)))
-   (clobber (match_scratch:DI 2 "=r"))]
+   (clobber (reg:CC CC_REGNUM))]
   ""
-  "*
-  if (get_attr_length (insn) == 8)
-    return \"ubfx\\t%<w>2, %<w>0, <sizem1>, #1\;<cbz>\\t%<w>2, %l1\";
-  return \"<tbz>\\t%<w>0, <sizem1>, %l1\";
-  "
+  {
+    if (get_attr_length (insn) == 8)
+      {
+	char buf[64];
+	uint64_t val = ((uint64_t ) 1)
+			<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
+	sprintf (buf, "tst\t%%<w>0, %"PRId64, val);
+	output_asm_insn (buf, operands);
+	return "<bcond>\t%l1";
+      }
+    else
+      return "<tbz>\t%<w>0, <sizem1>, %l1";
+  }
   [(set_attr "type" "branch")
    (set (attr "length")
 	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 7dd3917..bd144f9 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -823,6 +823,9 @@ 
 		      (smax "s") (umax "u")
 		      (smin "s") (umin "u")])
 
+;; Emit conditional branch instructions.
+(define_code_attr bcond [(eq "beq") (ne "bne") (lt "bne") (ge "beq")])
+
 ;; Emit cbz/cbnz depending on comparison type.
 (define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c
new file mode 100644
index 0000000..d4782e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c
@@ -0,0 +1,166 @@ 
+int dec (int);
+
+#define CASE_ENTRY(n) \
+  case n: \
+    sum = a / n; \
+    sum = sum * (n - 1); \
+    sum = dec (sum); \
+    sum = sum / (n + 1); \
+    sum = dec (sum); \
+    sum = sum / (n + 2); \
+    sum = dec (sum); \
+    sum = sum / (n + 3); \
+    sum = dec (sum); \
+    sum = sum / (n + 4); \
+    sum = dec (sum); \
+    sum = sum / (n + 5); \
+    sum = dec (sum); \
+    sum = sum / (n + 6); \
+    sum = dec (sum); \
+    sum = sum / (n + 7); \
+    sum = dec (sum); \
+    sum = sum / (n + 8); \
+    sum = dec (sum); \
+    sum = sum / (n + 9); \
+    sum = dec (sum); \
+    sum = sum / (n + 10); \
+    sum = dec (sum); \
+    sum = sum / (n + 11); \
+    sum = dec (sum); \
+    sum = sum / (n + 12); \
+    sum = dec (sum); \
+    sum = sum / (n + 13); \
+    sum = dec (sum); \
+    sum = sum / (n + 14); \
+    sum = dec (sum); \
+    sum = sum / (n + 15); \
+    sum = dec (sum); \
+    sum = sum / (n + 16); \
+    sum = dec (sum); \
+    sum = sum / (n + 17); \
+    sum = dec (sum); \
+    sum = sum / (n + 18); \
+    sum = dec (sum); \
+    sum = sum / (n + 19); \
+    sum = dec (sum); \
+    sum = sum / (n + 20); \
+    sum = dec (sum); \
+    sum = sum / (n + 21); \
+    sum = dec (sum); \
+    sum = sum / (n + 22); \
+    sum = dec (sum); \
+    sum = sum / (n + 23); \
+    sum = dec (sum); \
+    sum = sum / (n + 24); \
+    sum = dec (sum); \
+    sum = sum / (n + 25); \
+    sum = dec (sum); \
+    sum = sum / (n + 26); \
+    sum = dec (sum); \
+    sum = sum / (n + 27); \
+    sum = dec (sum); \
+    sum = sum / (n + 28); \
+    sum = dec (sum); \
+    sum = sum / (n + 29); \
+    sum = dec (sum); \
+    sum = sum / (n + 30); \
+    sum = dec (sum); \
+    sum = sum / (n + 31); \
+    break;
+
+int
+cbranch (int a, int b, int c, int d, long long addend)
+{
+  long long sum;
+  if (a & 0x2)
+    {
+start:
+      sum = b * c;
+      sum = sum + b + c;
+#ifdef CB
+      if (sum < 0)
+	goto start;
+#endif
+    }
+  else
+    {
+      switch (a/d)
+	{
+	  CASE_ENTRY (1)
+	  CASE_ENTRY (2)
+	  CASE_ENTRY (3)
+	  CASE_ENTRY (4)
+	  CASE_ENTRY (5)
+	  CASE_ENTRY (6)
+	  CASE_ENTRY (7)
+	  CASE_ENTRY (8)
+	  CASE_ENTRY (9)
+	  CASE_ENTRY (10)
+	  CASE_ENTRY (11)
+	  CASE_ENTRY (12)
+	  CASE_ENTRY (13)
+	  CASE_ENTRY (14)
+	  CASE_ENTRY (15)
+	  CASE_ENTRY (16)
+	  CASE_ENTRY (17)
+	  CASE_ENTRY (18)
+	  CASE_ENTRY (19)
+	  CASE_ENTRY (20)
+	  CASE_ENTRY (21)
+	  CASE_ENTRY (22)
+	  CASE_ENTRY (23)
+	  CASE_ENTRY (24)
+	  CASE_ENTRY (25)
+	  CASE_ENTRY (26)
+	  CASE_ENTRY (27)
+	  CASE_ENTRY (28)
+	  CASE_ENTRY (29)
+	  CASE_ENTRY (30)
+	  CASE_ENTRY (31)
+	  CASE_ENTRY (32)
+	  CASE_ENTRY (33)
+	  CASE_ENTRY (34)
+	  CASE_ENTRY (35)
+	  CASE_ENTRY (36)
+	  CASE_ENTRY (37)
+	  CASE_ENTRY (38)
+	  CASE_ENTRY (39)
+	  CASE_ENTRY (40)
+	  CASE_ENTRY (41)
+	  CASE_ENTRY (42)
+	  CASE_ENTRY (43)
+	  CASE_ENTRY (44)
+	  CASE_ENTRY (45)
+	  CASE_ENTRY (46)
+	  CASE_ENTRY (47)
+	  CASE_ENTRY (48)
+	  CASE_ENTRY (49)
+	  CASE_ENTRY (50)
+	  CASE_ENTRY (51)
+	  CASE_ENTRY (52)
+	  CASE_ENTRY (53)
+	  CASE_ENTRY (54)
+	  CASE_ENTRY (55)
+	  CASE_ENTRY (56)
+	  CASE_ENTRY (57)
+	  CASE_ENTRY (58)
+	  CASE_ENTRY (59)
+	  CASE_ENTRY (60)
+	  CASE_ENTRY (61)
+	  CASE_ENTRY (62)
+	  CASE_ENTRY (63)
+	  CASE_ENTRY (64)
+	}
+
+#ifdef CB
+      if (sum < 0)
+	goto start;
+#else
+
+      if (sum & 0x2000000000)
+	goto start;
+#endif
+    }
+
+  return sum;
+}