diff mbox series

rs6000: Separate shrink-wrapping for the TOC register

Message ID d46d8cca7ef9c91b00ef06fbcb975cb39ade0540.1510224242.git.segher@kernel.crashing.org
State New
Headers show
Series rs6000: Separate shrink-wrapping for the TOC register | expand

Commit Message

Segher Boessenkool Nov. 9, 2017, 11:01 a.m. UTC
This makes the TOC register save a component.  If -msave-toc-indirect
is not explicitly disabled, it enables it, and then moves the prologue
code generated for that to a better place.  So far this only matters
for indirect calls (for direct calls the save is done in the PLT stub).
The restore is always done directly after the bl insn (the compiler
generates a nop there, the linker replaces it with a load).

Tested on powerpc64-linux {-m32,-m64}; will also test on LE, and then
commit.


Segher


2017-11-09  Segher Boessenkool  <segher@kernel.crashing.org>

	* config/rs6000/rs6000.c (machine_function): Add a bool,
	"toc_is_wrapped_separately".
	(rs6000_option_override_internal): Enable OPTION_MASK_SAVE_TOC_INDIRECT
	if it wasn't explicitly set or unset, we are optimizing for speed, and
	doing separate shrink-wrapping.
	(rs6000_get_separate_components): Enable the TOC component if
	saving the TOC register in the prologue.
	(rs6000_components_for_bb): Handle the TOC component.
	(rs6000_emit_prologue_components): Store the TOC register where needed.
	(rs6000_set_handled_components): Mark TOC as handled, if handled.
	(rs6000_emit_prologue): Don't save the TOC if that is already done.

---
 gcc/config/rs6000/rs6000.c | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index aacf3f1..2c80a2f 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -171,6 +171,7 @@  typedef struct GTY(()) machine_function
   bool gpr_is_wrapped_separately[32];
   bool fpr_is_wrapped_separately[32];
   bool lr_is_wrapped_separately;
+  bool toc_is_wrapped_separately;
 } machine_function;
 
 /* Support targetm.vectorize.builtin_mask_for_load.  */
@@ -4426,6 +4427,13 @@  rs6000_option_override_internal (bool global_init_p)
       && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
     rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
 
+  /* If we can shrink-wrap the TOC register save separately, then use
+     -msave-toc-indirect unless explicitly disabled.  */
+  if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
+      && flag_shrink_wrap_separate
+      && optimize_function_for_speed_p (cfun))
+    rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
+
   /* Enable power8 fusion if we are tuning for power8, even if we aren't
      generating power8 instructions.  */
   if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
@@ -26721,6 +26729,7 @@  rs6000_get_separate_components (void)
 	      && !(info->savres_strategy & REST_MULTIPLE));
 
   /* Component 0 is the save/restore of LR (done via GPR0).
+     Component 2 is the save of the TOC (GPR2).
      Components 13..31 are the save/restore of GPR13..GPR31.
      Components 46..63 are the save/restore of FPR14..FPR31.  */
 
@@ -26795,6 +26804,10 @@  rs6000_get_separate_components (void)
 	bitmap_set_bit (components, 0);
     }
 
+  /* Optimize saving the TOC.  This is component 2.  */
+  if (cfun->machine->save_toc_in_prologue)
+    bitmap_set_bit (components, 2);
+
   return components;
 }
 
@@ -26833,6 +26846,12 @@  rs6000_components_for_bb (basic_block bb)
       || bitmap_bit_p (kill, LR_REGNO))
     bitmap_set_bit (components, 0);
 
+  /* The TOC save.  */
+  if (bitmap_bit_p (in, TOC_REGNUM)
+      || bitmap_bit_p (gen, TOC_REGNUM)
+      || bitmap_bit_p (kill, TOC_REGNUM))
+    bitmap_set_bit (components, 2);
+
   return components;
 }
 
@@ -26887,6 +26906,14 @@  rs6000_emit_prologue_components (sbitmap components)
       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
     }
 
+  /* Prologue for TOC.  */
+  if (bitmap_bit_p (components, 2))
+    {
+      rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
+      rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+      emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
+    }
+
   /* Prologue for the GPRs.  */
   int offset = info->gp_save_offset;
   if (info->push_p)
@@ -27011,6 +27038,9 @@  rs6000_set_handled_components (sbitmap components)
 
   if (bitmap_bit_p (components, 0))
     cfun->machine->lr_is_wrapped_separately = true;
+
+  if (bitmap_bit_p (components, 2))
+    cfun->machine->toc_is_wrapped_separately = true;
 }
 
 /* VRSAVE is a bit vector representing which AltiVec registers
@@ -27968,7 +27998,8 @@  rs6000_emit_prologue (void)
      unwinder to interpret it.  R2 changes, apart from the
      calls_eh_return case earlier in this function, are handled by
      linux-unwind.h frob_update_context.  */
-  if (rs6000_save_toc_in_prologue_p ())
+  if (rs6000_save_toc_in_prologue_p ()
+      && !cfun->machine->toc_is_wrapped_separately)
     {
       rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
       emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));