==============================================================================
bcl 20,31,$+4 bl $+8
.L3: .L3:
b $+8
blr
mflr 9 mflr 9
addis 9,9,.LCTOC1-.L3@ha addis 9,9,.LCTOC1-.L3@ha
addi 9,9,.LCTOC1-.L3@l addi 9,9,.LCTOC1-.L3@l
2) Normal Code: New 476 Code:
==============================================================================
bcl 20,31,$+8 bl $+12
.long _GLOBAL_OFFSET_TABLE_-$ b $+12
.long _GLOBAL_OFFSET_TABLE_-$
blr
mflr 9 mflr 9
addi 9,9,4
lwz 3,0(9) lwz 3,0(9)
I have bootstrapped and regtested the following patch with no regressiosn.
To test the code even more, I modified the patch so that we default to always
using -mpreserve-link-stack and that bootstrapped and regtested with no
regressions too.
Ok for mainline?
Peter
* config/rs6000/rs6000.opt (mpreserve-link-stack): New option.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Enable
TARGET_LINK_STACK for -mtune=476 and -mtune=476fp.
(rs6000_legitimize_tls_address): Emit the link stack preserving GOT
code if TARGET_LINK_STACK.
(rs6000_emit_load_toc_table): Likewise.
(output_function_profiler): Likewise
(macho_branch_islands): Likewise
(machopic_output_stub): Likewise
* config/rs6000/rs6000.md (load_toc_v4_PIC_1, load_toc_v4_PIC_1b):
Convert to a define_expand.
(load_toc_v4_PIC_1_normal): New define_insn.
(load_toc_v4_PIC_1_476): Likewise.
(load_toc_v4_PIC_1b_normal): Likewise.
(load_toc_v4_PIC_1b_476): Likewise.
===================================================================
@@ -528,3 +528,7 @@ Use/do not use r11 to hold the static li
msave-toc-indirect
Target Undocumented Var(TARGET_SAVE_TOC_INDIRECT) Save Init(1)
; Control whether we save the TOC in the prologue for indirect calls or generate the save inline
+
+mpreserve-link-stack
+Target Report Var(TARGET_LINK_STACK) Init(-1) Save
+Preserve the link stack on some cpus (eg, 476) by matching up a blr with the bcl/bl insns used for GOT accesses
===================================================================
@@ -3246,6 +3246,11 @@ rs6000_option_override_internal (bool gl
target_option_default_node = target_option_current_node
= build_target_option_node ();
+ /* If not explicitly specified via option, decide whether to generate the
+ extra blr's required to preserve the link stack on some cpus (eg, 476). */
+ if (TARGET_LINK_STACK == -1)
+ TARGET_LINK_STACK = (rs6000_cpu == PROCESSOR_PPC476);
+
return ret;
}
@@ -5930,6 +5935,8 @@ rs6000_legitimize_tls_address (rtx addr,
lab = gen_label_rtx ();
emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
+ if (TARGET_LINK_STACK)
+ emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
emit_move_insn (tmp2, mem);
last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
set_unique_reg_note (last, REG_EQUAL, gsym);
@@ -18927,6 +18934,8 @@ rs6000_emit_load_toc_table (int fromprol
lab = gen_label_rtx ();
emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
+ if (TARGET_LINK_STACK)
+ emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
}
emit_insn (gen_addsi3 (dest, temp0, dest));
@@ -22529,7 +22538,11 @@ output_function_profiler (FILE *file, in
}
else if (TARGET_SECURE_PLT && flag_pic)
{
- asm_fprintf (file, "\tbcl 20,31,1f\n1:\n\t{st|stw} %s,4(%s)\n",
+ if (TARGET_LINK_STACK)
+ asm_fprintf (file, "\tbl 1f\n\tb 2f\n1:\n\tblr\n2:\n");
+ else
+ asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
+ asm_fprintf (file, "\t{st|stw} %s,4(%s)\n",
reg_names[0], reg_names[1]);
asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
asm_fprintf (file, "\t{cau|addis} %s,%s,",
@@ -22554,10 +22567,22 @@ output_function_profiler (FILE *file, in
asm_fprintf (file, "\t{st|stw} %s,4(%s)\n",
reg_names[0], reg_names[1]);
/* Now, we need to get the address of the label. */
- fputs ("\tbcl 20,31,1f\n\t.long ", file);
- assemble_name (file, buf);
- fputs ("-.\n1:", file);
- asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
+ if (TARGET_LINK_STACK)
+ {
+ fputs ("\tbl 1f\n\tb 2f\n\t.long ", file);
+ assemble_name (file, buf);
+ fputs ("-.\n1:\n\tblr\n2:", file);
+ asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
+ asm_fprintf (file, "\taddi %s,%s,4\n",
+ reg_names[11], reg_names[11]);
+ }
+ else
+ {
+ fputs ("\tbcl 20,31,1f\n\t.long ", file);
+ assemble_name (file, buf);
+ fputs ("-.\n1:", file);
+ asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
+ }
asm_fprintf (file, "\t{l|lwz} %s,0(%s)\n",
reg_names[0], reg_names[11]);
asm_fprintf (file, "\t{cax|add} %s,%s,%s\n",
@@ -25040,11 +25065,22 @@ macho_branch_islands (void)
#endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
if (flag_pic)
{
- strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
- strcat (tmp_buf, label);
- strcat (tmp_buf, "_pic\n");
- strcat (tmp_buf, label);
- strcat (tmp_buf, "_pic:\n\tmflr r11\n");
+ if (TARGET_LINK_STACK)
+ {
+ strcat (tmp_buf, ":\n\tmflr r0\n\tbl $+8\n");
+ strcat (tmp_buf, label);
+ strcat (tmp_buf, "_pic:\n\tb $+8\n");
+ strcat (tmp_buf, "\tblr\n");
+ strcat (tmp_buf, "\tmflr r11\n");
+ }
+ else
+ {
+ strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
+ strcat (tmp_buf, label);
+ strcat (tmp_buf, "_pic\n");
+ strcat (tmp_buf, label);
+ strcat (tmp_buf, "_pic:\n\tmflr r11\n");
+ }
strcat (tmp_buf, "\taddis r11,r11,ha16(");
strcat (tmp_buf, name_buf);
@@ -25190,8 +25226,16 @@ machopic_output_stub (FILE *file, const
sprintf (local_label_0, "\"L%011d$spb\"", label);
fprintf (file, "\tmflr r0\n");
- fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
- fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
+ if (TARGET_LINK_STACK)
+ {
+ fprintf (file, "\tbl $+8\n");
+ fprintf (file, "%s:\n\tb $+8\n\tblr\n\tmflr r11\n", local_label_0);
+ }
+ else
+ {
+ fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
+ fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
+ }
fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
lazy_ptr_name, local_label_0);
fprintf (file, "\tmtlr r0\n");
===================================================================
@@ -12081,27 +12081,65 @@ (define_insn "load_toc_v4_pic_si"
[(set_attr "type" "branch")
(set_attr "length" "4")])
-(define_insn "load_toc_v4_PIC_1"
+(define_expand "load_toc_v4_PIC_1"
+ [(parallel [(set (reg:SI LR_REGNO)
+ (match_operand:SI 0 "immediate_operand" "s"))
+ (use (unspec [(match_dup 0)] UNSPEC_TOC))])]
+ "TARGET_ELF && DEFAULT_ABI != ABI_AIX
+ && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
+ "")
+
+(define_insn "load_toc_v4_PIC_1_normal"
[(set (reg:SI LR_REGNO)
(match_operand:SI 0 "immediate_operand" "s"))
(use (unspec [(match_dup 0)] UNSPEC_TOC))]
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX
+ "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
&& (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
"bcl 20,31,%0\\n%0:"
[(set_attr "type" "branch")
(set_attr "length" "4")])
-(define_insn "load_toc_v4_PIC_1b"
+(define_insn "load_toc_v4_PIC_1_476"
+ [(set (reg:SI LR_REGNO)
+ (match_operand:SI 0 "immediate_operand" "s"))
+ (use (unspec [(match_dup 0)] UNSPEC_TOC))]
+ "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX
+ && (flag_pic == 2 || (flag_pic && TARGET_SECURE_PLT))"
+ "bl $+8\n%0:\n\tb $+8\n\tblr"
+ [(set_attr "type" "branch")
+ (set_attr "length" "12")])
+
+(define_expand "load_toc_v4_PIC_1b"
+ [(parallel [(set (reg:SI LR_REGNO)
+ (unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+ (label_ref (match_operand 1 "" ""))]
+ UNSPEC_TOCPTR))
+ (match_dup 1)])]
+ "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+ "")
+
+(define_insn "load_toc_v4_PIC_1b_normal"
[(set (reg:SI LR_REGNO)
(unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
(label_ref (match_operand 1 "" ""))]
UNSPEC_TOCPTR))
(match_dup 1)]
- "TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+ "!TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
"bcl 20,31,$+8\;.long %0-$"
[(set_attr "type" "branch")
(set_attr "length" "8")])
+(define_insn "load_toc_v4_PIC_1b_476"
+ [(set (reg:SI LR_REGNO)
+ (unspec:SI [(match_operand:SI 0 "immediate_operand" "s")
+ (label_ref (match_operand 1 "" ""))]
+ UNSPEC_TOCPTR))
+ (match_dup 1)]
+ "TARGET_LINK_STACK && TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2"
+ "bl $+12\n\tb $+12\n\t.long %0-$\n\tblr"
+ [(set_attr "type" "branch")
+ (set_attr "length" "16")])
+
(define_insn "load_toc_v4_PIC_2"
[(set (match_operand:SI 0 "gpc_reg_operand" "=r")
(mem:SI (plus:SI (match_operand:SI 1 "gpc_reg_operand" "b")