diff mbox

[AARCH64] Fix for branch offsets over 1 MiB

Message ID 55DC69B9.4040009@arm.com
State New
Headers show

Commit Message

Andre Vieira (lists) Aug. 25, 2015, 1:12 p.m. UTC
On 25/08/15 10:52, Andrew Pinski wrote:
> On Tue, Aug 25, 2015 at 5:50 PM, Andrew Pinski <pinskia@gmail.com> wrote:
>> On Tue, Aug 25, 2015 at 5:37 PM, Andre Vieira
>> <Andre.SimoesDiasVieira@arm.com> wrote:
>>> Conditional branches have a maximum range of [-1048576, 1048572]. Any
>>> destination further away can not be reached by these.
>>> To be able to have conditional branches in very large functions, we invert
>>> the condition and change the destination to jump over an unconditional
>>> branch to the original, far away, destination.
>>>
>>> gcc/ChangeLog:
>>> 2015-08-07  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>>>              Andre Vieira  <andre.simoesdiasvieira@arm.com>
>>>
>>>          * config/aarch64/aarch64.md (*condjump): Handle functions > 1
>>>          Mib.
>>>          (*cb<optab><mode>1): Idem.
>>>          (*tb<optab><mode>1): Idem.
>>>          (*cb<optab><mode>1): Idem.
>>>          * config/aarch64/iterators.md (inv_cb): New code attribute.
>>>          (inv_tb): Idem.
>>>          * config/aarch64/aarch64.c (aarch64_gen_far_branch): New.
>>>          * config/aarch64/aarch64-protos.h (aarch64_gen_far_branch): New.
>>>
>>> gcc/testsuite/ChangeLog:
>>> 2015-08-07  Andre Vieira  <andre.simoesdiasvieira@arm.com>
>>>
>>>          * gcc.target/aarch64/long-branch.c: New test.
>>
>> Just a few comments about the testcase.  You could improve the size
>> (on disk) of the testcase by using the preprocessor some more:
>> Something like:
>> #define CASE_ENTRY2 (x) CASE_ENTRY ((x)) CASE_ENTRY ((x)+1)
>> #define CASE_ENTRY4 (x) CASE_ENTRY2 ((x)) CASE_ENTRY2 ((x)+2+1)
>> #define CASE_ENTRY8 (x) CASE_ENTRY4 ((x)) CASE_ENTRY4 ((x)+4+1)
>> #define CASE_ENTRY16 (x) CASE_ENTRY8 ((x)) CASE_ENTRY8 ((x)+8+1)
>> #define CASE_ENTRY32 (x) CASE_ENTRY16 ((x)) CASE_ENTRY16 ((x)+16)
>> #define CASE_ENTRY64 (x) CASE_ENTRY32 ((x)) CASE_ENTRY32 ((x)+32+1)
>> #define CASE_ENTRY128 (x) CASE_ENTRY64 ((x)) CASE_ENTRY16 ((x)+64+1)
>> #define CASE_ENTRY256 (x) CASE_ENTRY128 ((x)) CASE_ENTRY128 ((x)+128+1)
>
>
> I do have an off by one error but you should get the idea.  Basically
> instead of 200 lines, we only have 9 lines (log2(256) == 8).
>
> Thanks,
> Andrew
>
>>
>> And then use
>> CASE_ENTRY256 (1)
>>
>> You can do the same trick to reduce the size of CASE_ENTRY too.
>>
>> Thanks,
>> Andrew Pinski
>

Conditional branches have a maximum range of [-1048576, 1048572]. Any 
destination further away can not be reached by these.
To be able to have conditional branches in very large functions, we 
invert the condition and change the destination to jump over an 
unconditional branch to the original, far away, destination.

gcc/ChangeLog:
2015-08-07  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
             Andre Vieira  <andre.simoesdiasvieira@arm.com>

         * config/aarch64/aarch64.md (*condjump): Handle functions > 1 Mib.
         (*cb<optab><mode>1): Likewise.
         (*tb<optab><mode>1): Likewise.
         (*cb<optab><mode>1): Likewise.
         * config/aarch64/iterators.md (inv_cb): New code attribute.
         (inv_tb): Likewise.
         * config/aarch64/aarch64.c (aarch64_gen_far_branch): New.
         * config/aarch64/aarch64-protos.h (aarch64_gen_far_branch): New.

gcc/testsuite/ChangeLog:
2015-08-07  Andre Vieira  <andre.simoesdiasvieira@arm.com>

         * gcc.target/aarch64/long_branch_1.c: New test.

Comments

Marcus Shawcroft Aug. 26, 2015, 12:42 p.m. UTC | #1
On 25 August 2015 at 14:12, Andre Vieira <Andre.SimoesDiasVieira@arm.com> wrote:

> gcc/ChangeLog:
> 2015-08-07  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
>             Andre Vieira  <andre.simoesdiasvieira@arm.com>
>
>         * config/aarch64/aarch64.md (*condjump): Handle functions > 1 Mib.
>         (*cb<optab><mode>1): Likewise.
>         (*tb<optab><mode>1): Likewise.
>         (*cb<optab><mode>1): Likewise.
>         * config/aarch64/iterators.md (inv_cb): New code attribute.
>         (inv_tb): Likewise.
>         * config/aarch64/aarch64.c (aarch64_gen_far_branch): New.
>         * config/aarch64/aarch64-protos.h (aarch64_gen_far_branch): New.
>
> gcc/testsuite/ChangeLog:
> 2015-08-07  Andre Vieira  <andre.simoesdiasvieira@arm.com>
>
>         * gcc.target/aarch64/long_branch_1.c: New test.

OK /Marcus
diff mbox

Patch

From e34022ecd6f914b5a713594ca5b21b33929a3a1f Mon Sep 17 00:00:00 2001
From: Andre Simoes Dias Vieira <andsim01@arm.com>
Date: Tue, 25 Aug 2015 13:12:11 +0100
Subject: [PATCH] fix for far branches

---
 gcc/config/aarch64/aarch64-protos.h              |  1 +
 gcc/config/aarch64/aarch64.c                     | 23 ++++++
 gcc/config/aarch64/aarch64.md                    | 89 +++++++++++++++++++----
 gcc/config/aarch64/iterators.md                  |  6 ++
 gcc/testsuite/gcc.target/aarch64/long_branch_1.c | 91 ++++++++++++++++++++++++
 5 files changed, 195 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/long_branch_1.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4b3cbedbd0a5fa186619e05c0c0b400c8257b1c0..9afb7ef9afadf2b3dfeb24db230829344201deba 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -322,6 +322,7 @@  unsigned aarch64_trampoline_size (void);
 void aarch64_asm_output_labelref (FILE *, const char *);
 void aarch64_cpu_cpp_builtins (cpp_reader *);
 void aarch64_elf_asm_named_section (const char *, unsigned, tree);
+const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
 void aarch64_err_no_fpadvsimd (machine_mode, const char *);
 void aarch64_expand_epilogue (bool);
 void aarch64_expand_mov_immediate (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 87bbf6e7988e4ef796c09075ee584822483cbbce..188d0dd555d3d765aff7e78623a4e938497bec3f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -586,6 +586,29 @@  static const char * const aarch64_condition_codes[] =
   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 };
 
+/* Generate code to enable conditional branches in functions over 1 MiB.  */
+const char *
+aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
+			const char * branch_format)
+{
+    rtx_code_label * tmp_label = gen_label_rtx ();
+    char label_buf[256];
+    char buffer[128];
+    ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
+				 CODE_LABEL_NUMBER (tmp_label));
+    const char *label_ptr = targetm.strip_name_encoding (label_buf);
+    rtx dest_label = operands[pos_label];
+    operands[pos_label] = tmp_label;
+
+    snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
+    output_asm_insn (buffer, operands);
+
+    snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
+    operands[pos_label] = dest_label;
+    output_asm_insn (buffer, operands);
+    return "";
+}
+
 void
 aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
 {
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index f8c44683752130d8557dc160f7bb2b62147fb2c2..8067c5a4e9231a39ded8b65f2fc2a52d47ba508b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -181,6 +181,13 @@ 
 	     (const_string "no")
 	] (const_string "yes")))
 
+;; Attribute that specifies whether we are dealing with a branch to a
+;; label that is far away, i.e. further away than the maximum/minimum
+;; representable in a signed 21-bits number.
+;; 0 :=: no
+;; 1 :=: yes
+(define_attr "far_branch" "" (const_int 0))
+
 ;; -------------------------------------------------------------------
 ;; Pipeline descriptions and scheduling
 ;; -------------------------------------------------------------------
@@ -308,8 +315,23 @@ 
 			   (label_ref (match_operand 2 "" ""))
 			   (pc)))]
   ""
-  "b%m0\\t%l2"
-  [(set_attr "type" "branch")]
+  {
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t");
+    else
+      return  "b%m0\\t%l2";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
 )
 
 (define_expand "casesi"
@@ -488,9 +510,23 @@ 
 			   (label_ref (match_operand 1 "" ""))
 			   (pc)))]
   ""
-  "<cbz>\\t%<w>0, %l1"
-  [(set_attr "type" "branch")]
-
+  {
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
+    else
+      return "<cbz>\\t%<w>0, %l1";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
 )
 
 (define_insn "*tb<optab><mode>1"
@@ -506,8 +542,14 @@ 
   {
     if (get_attr_length (insn) == 8)
       {
-	operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	if (get_attr_far_branch (insn) == 1)
+	  return aarch64_gen_far_branch (operands, 2, "Ltb",
+					 "<inv_tb>\\t%<w>0, %1, ");
+	else
+	  {
+	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
+	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	  }
       }
     else
       return "<tbz>\t%<w>0, %1, %l2";
@@ -517,7 +559,13 @@ 
 	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
 			   (lt (minus (match_dup 2) (pc)) (const_int 32764)))
 		      (const_int 4)
-		      (const_int 8)))]
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
+
 )
 
 (define_insn "*cb<optab><mode>1"
@@ -530,12 +578,18 @@ 
   {
     if (get_attr_length (insn) == 8)
       {
-	char buf[64];
-	uint64_t val = ((uint64_t ) 1)
-			<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
-	sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
-	output_asm_insn (buf, operands);
-	return "<bcond>\t%l1";
+	if (get_attr_far_branch (insn) == 1)
+	  return aarch64_gen_far_branch (operands, 1, "Ltb",
+					 "<inv_tb>\\t%<w>0, <sizem1>, ");
+	else
+	  {
+	    char buf[64];
+	    uint64_t val = ((uint64_t) 1)
+		<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
+	    sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
+	    output_asm_insn (buf, operands);
+	    return "<bcond>\t%l1";
+	  }
       }
     else
       return "<tbz>\t%<w>0, <sizem1>, %l1";
@@ -545,7 +599,12 @@ 
 	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
 			   (lt (minus (match_dup 1) (pc)) (const_int 32764)))
 		      (const_int 4)
-		      (const_int 8)))]
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
+			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
+		      (const_int 0)
+		      (const_int 1)))]
 )
 
 ;; -------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b8a45d1d6ed8bbcfcb9a2b37edd0e24ba171649f..475aa6e6d37c78a455e22e26990504b3d96a2f80 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -817,9 +817,15 @@ 
 ;; Emit cbz/cbnz depending on comparison type.
 (define_code_attr cbz [(eq "cbz") (ne "cbnz") (lt "cbnz") (ge "cbz")])
 
+;; Emit inverted cbz/cbnz depending on comparison type.
+(define_code_attr inv_cb [(eq "cbnz") (ne "cbz") (lt "cbz") (ge "cbnz")])
+
 ;; Emit tbz/tbnz depending on comparison type.
 (define_code_attr tbz [(eq "tbz") (ne "tbnz") (lt "tbnz") (ge "tbz")])
 
+;; Emit inverted tbz/tbnz depending on comparison type.
+(define_code_attr inv_tb [(eq "tbnz") (ne "tbz") (lt "tbz") (ge "tbnz")])
+
 ;; Max/min attributes.
 (define_code_attr maxmin [(smax "max")
 			  (smin "min")
diff --git a/gcc/testsuite/gcc.target/aarch64/long_branch_1.c b/gcc/testsuite/gcc.target/aarch64/long_branch_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..46f500d36a2d9ff04f71ae0bcc7c47e3d0b92c1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/long_branch_1.c
@@ -0,0 +1,91 @@ 
+/* { dg-do assemble } */
+/* { dg-timeout-factor 2.0 } */
+/* { dg-options "-O1 -fno-reorder-blocks -fno-tree-cselim --save-temps" } */
+
+
+__attribute__((noinline, noclone)) int
+restore (int a, int b)
+{
+  return a * b;
+}
+
+__attribute__((noinline, noclone)) void
+do_nothing (int *input)
+{
+  *input = restore (*input, 1);
+  return;
+}
+#define ENTRY_SUM(n, x) \
+    sum = sum / ((n) + (x)); \
+    sum = restore (sum, (n) + (x));
+
+#define ENTRY_SUM2(n, x) ENTRY_SUM ((n), (x)) ENTRY_SUM ((n), (x)+1)
+#define ENTRY_SUM4(n, x) ENTRY_SUM2 ((n), (x)) ENTRY_SUM2 ((n), (x)+2)
+#define ENTRY_SUM8(n, x) ENTRY_SUM4 ((n), (x)) ENTRY_SUM4 ((n), (x)+4)
+#define ENTRY_SUM16(n, x) ENTRY_SUM8 ((n), (x)) ENTRY_SUM8 ((n), (x)+8)
+#define ENTRY_SUM32(n, x) ENTRY_SUM16 ((n), (x)) ENTRY_SUM16 ((n), (x)+16)
+#define ENTRY_SUM64(n, x) ENTRY_SUM32 ((n), (x)) ENTRY_SUM32 ((n), (x)+32)
+#define ENTRY_SUM128(n, x) ENTRY_SUM64 ((n), (x)) ENTRY_SUM64 ((n), (x)+64)
+
+#define CASE_ENTRY(n) \
+  case n: \
+    sum = sum / (n + 1); \
+    sum = restore (sum, n + 1); \
+    if (sum == (n + addend)) \
+      break;\
+    ENTRY_SUM128 ((n), 2) \
+    ENTRY_SUM16 ((n), 130) \
+    break;
+
+#define CASE_ENTRY2(n) CASE_ENTRY ((n)) CASE_ENTRY ((n)+1)
+#define CASE_ENTRY4(n) CASE_ENTRY2 ((n)) CASE_ENTRY2 ((n)+2)
+#define CASE_ENTRY8(n) CASE_ENTRY4 ((n)) CASE_ENTRY4 ((n)+4)
+#define CASE_ENTRY16(n) CASE_ENTRY8 ((n)) CASE_ENTRY8 ((n)+8)
+#define CASE_ENTRY32(n) CASE_ENTRY16 ((n)) CASE_ENTRY16 ((n)+16)
+#define CASE_ENTRY64(n) CASE_ENTRY32 ((n)) CASE_ENTRY32 ((n)+32)
+#define CASE_ENTRY128(n) CASE_ENTRY64 ((n)) CASE_ENTRY64 ((n)+64)
+
+__attribute__((noinline, noclone)) long long
+test_and_branch (int selector, int addend, int cond)
+{
+  long long sum = selector + 1;
+
+  if (selector > 200)
+    {
+start0:
+      return sum - 1;
+start1:
+      return sum + 1;
+start2:
+      return sum;
+start3:
+      return sum - 2;
+    }
+  else
+    {
+      switch (selector)
+	{
+	  CASE_ENTRY128 (1)
+	  CASE_ENTRY64 (129)
+	  CASE_ENTRY16 (193)
+	}
+
+      do_nothing ((int *)&sum);
+
+      if (cond == 0)
+	goto start0;
+      else if (cond < 0)
+	goto start1;
+      else if ((cond & 0x010) != 0)
+	goto start2;
+      else if (cond >= 14)
+	goto start3;
+
+    }
+
+  return -1;
+}
+
+/* { dg-final { scan-assembler "Lbcond" } } */
+/* { dg-final { scan-assembler "Lcb" } } */
+/* { dg-final { scan-assembler "Ltb" } } */
-- 
1.9.1