From patchwork Sat Oct 16 06:48:21 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: PR 46030, powerpc -Os Date: Fri, 15 Oct 2010 20:48:21 -0000 From: Alan Modra X-Patchwork-Id: 68025 Message-Id: <20101016064821.GC26553@bubble.grove.modra.org> To: gcc-patches@gcc.gnu.org Cc: David Edelsohn I noticed recently that we don't call the out-of-line gpr save/restore functions for -Os in leaf functions, due to not setting lr_save_p. We do use out-of-line save/restore for fprs in leaf functions, so it seemed odd to me that gprs were not treated similarly. Fixing that was relatively easy, but when looking at rs6000_savres_strategy I saw that the saved_all code was quite useless and worried that we might hit the case that saved_all was trying to protect against. Sure enough, the assertions I added triggered a number of times running the gcc testsuite. A little analysis showed the reason for confusion in rs6000_savres_strategy was that the call from rs6000_emit_epilogue lacked the "using_static_chain_p" arg passed by the call in rs6000_emit_prologue, so the epilogue call didn't correctly deduce what happened in the prologue. This patch cures that fragility by calculating both prologue and epilogue save/restore strategy at the same time. At least, I calculate the normal prologue/epilogue strategy, leaving rs6000_emit_epilogue to handle a special case for sibcalls. I also implement caching of the stack info. Bootstrapped and regression tested powerpc-linux. PR target/46030 * config/rs6000/rs6000.c (struct rs6000_stack): Add reload_completed and savres_strategy. (stack_info): New file scope var. (rs6000_init_machine_status): Init stack_info. (SAVRES_INLINE_FPRS, SAVRES_INLINE_GPRS, SAVRES_NOINLINE_GPRS_SAVES_LR, SAVRES_NOINLINE_FPRS_SAVES_LR, SAVRES_NOINLINE_FPRS_DOESNT_RESTORE_LR): Replace with.. (SAVE_INLINE_FPRS, SAVE_INLINE_GPRS, REST_INLINE_FPRS, REST_INLINE_GPRS, SAVE_NOINLINE_GPRS_SAVES_LR, SAVE_NOINLINE_FPRS_SAVES_LR REST_NOINLINE_FPRS_DOESNT_RESTORE_LR): ..this. Update all refs. (rs6000_savres_strategy): Rewrite. (rs6000_stack_info): Use "stack_info" in place of local "info". Return cached stack info when reload_completed unless ENABLE_CHECKING in which case confirm newly calculated stack info matches old info. Delay calculation of lr_save_p, set it for out of line gp save/restore as well as fp. Call rs6000_savres_strategy from here.. (rs6000_emit_prologue, rs6000_emit_epilogue): ..rather than here. (rs6000_output_function_prologue): Use info->savres_strategy to determine whether fp save/restore externs need to be emitted. Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 165450) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -73,6 +73,7 @@ /* Structure used to define the rs6000 stack */ typedef struct rs6000_stack { + int reload_completed; /* stack info won't change from here on */ int first_gp_reg_save; /* first callee saved GP register used */ int first_fp_reg_save; /* first callee saved FP register used */ int first_altivec_reg_save; /* first callee saved AltiVec register used */ @@ -109,6 +110,7 @@ typedef struct rs6000_stack { int spe_padding_size; HOST_WIDE_INT total_size; /* total bytes allocated for stack */ int spe_64bit_regs_used; + int savres_strategy; } rs6000_stack_t; /* A C structure for machine-specific, per-function data. @@ -994,7 +996,6 @@ static rtx rs6000_make_savres_rtx (rs600 static bool rs6000_reg_live_or_pic_offset_p (int); static tree rs6000_builtin_vectorized_libmass (tree, tree, tree); static tree rs6000_builtin_vectorized_function (tree, tree, tree); -static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int); static void rs6000_restore_saved_cr (rtx, int); static bool rs6000_output_addr_const_extra (FILE *, rtx); static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT); @@ -15128,6 +15137,8 @@ rs6000_got_register (rtx value ATTRIBUTE return pic_offset_table_rtx; } +static rs6000_stack_t stack_info; + /* Function to init struct machine_function. This will be called, via a pointer variable, from push_function_context. */ @@ -15135,6 +15146,7 @@ rs6000_got_register (rtx value ATTRIBUTE static struct machine_function * rs6000_init_machine_status (void) { + stack_info.reload_completed = 0; return ggc_alloc_cleared_machine_function (); } @@ -18017,6 +18029,107 @@ is_altivec_return_reg (rtx reg, void *xy } +/* Determine the strategy for savings/restoring registers. */ + +enum { + SAVRES_MULTIPLE = 0x1, + SAVE_INLINE_FPRS = 0x2, + SAVE_INLINE_GPRS = 0x4, + REST_INLINE_FPRS = 0x8, + REST_INLINE_GPRS = 0x10, + SAVE_NOINLINE_GPRS_SAVES_LR = 0x20, + SAVE_NOINLINE_FPRS_SAVES_LR = 0x40, + REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80 +}; + +static int +rs6000_savres_strategy (rs6000_stack_t *info, + bool using_static_chain_p) +{ + int strategy = 0; + + if (TARGET_MULTIPLE + && !TARGET_POWERPC64 + && !(TARGET_SPE_ABI && info->spe_64bit_regs_used) + && info->first_gp_reg_save < 31 + && no_global_regs_above (info->first_gp_reg_save, /*gpr=*/true)) + strategy |= SAVRES_MULTIPLE; + + if (crtl->calls_eh_return + || cfun->machine->ra_need_lr + || info->total_size > 32767) + strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS + | SAVE_INLINE_GPRS | REST_INLINE_GPRS); + + if (info->first_fp_reg_save == 64 + || FP_SAVE_INLINE (info->first_fp_reg_save) + /* The out-of-line FP routines use double-precision stores; + we can't use those routines if we don't have such stores. */ + || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT) + || !no_global_regs_above (info->first_fp_reg_save, /*gpr=*/false)) + strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; + + if (info->first_gp_reg_save == 32 + || GP_SAVE_INLINE (info->first_gp_reg_save) + || !((strategy & SAVRES_MULTIPLE) + || no_global_regs_above (info->first_gp_reg_save, /*gpr=*/true))) + strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; + + /* Don't bother to try to save things out-of-line if r11 is occupied + by the static chain. It would require too much fiddling and the + static chain is rarely used anyway. */ + if (using_static_chain_p) + strategy |= SAVE_INLINE_FPRS | SAVE_INLINE_GPRS; + + /* If we are going to use store multiple, then don't even bother + with the out-of-line routines, since the store-multiple + instruction will always be smaller. */ + if ((strategy & SAVRES_MULTIPLE)) + strategy |= SAVE_INLINE_GPRS; + + /* The situation is more complicated with load multiple. We'd + prefer to use the out-of-line routines for restores, since the + "exit" out-of-line routines can handle the restore of LR and the + frame teardown. However if doesn't make sense to use the + out-of-line routine if that is the only reason we'd need to save + LR, and we can't use the "exit" out-of-line gpr restore if we + have saved some fprs; In those cases it is advantageous to use + load multiple when available. */ + if ((strategy & SAVRES_MULTIPLE) + && (!info->lr_save_p + || info->first_fp_reg_save != 64)) + strategy |= REST_INLINE_GPRS; + + /* We can only use load multiple or the out-of-line routines to + restore if we've used store multiple or out-of-line routines + in the prologue, i.e. if we've saved all the registers from + first_gp_reg_save. Otherwise, we risk loading garbage. */ + if ((strategy & (SAVE_INLINE_GPRS | SAVRES_MULTIPLE)) == SAVE_INLINE_GPRS) + strategy |= REST_INLINE_GPRS; + + /* Saving CR interferes with the exit routines used on the SPE, so + just punt here. */ + if (TARGET_SPE_ABI + && info->spe_64bit_regs_used + && info->cr_save_p) + strategy |= REST_INLINE_GPRS; + +#ifdef POWERPC_LINUX + if (TARGET_64BIT) + { + if (!(strategy & SAVE_INLINE_FPRS)) + strategy |= SAVE_NOINLINE_FPRS_SAVES_LR; + else if (!(strategy & SAVE_INLINE_GPRS) + && info->first_fp_reg_save == 64) + strategy |= SAVE_NOINLINE_GPRS_SAVES_LR; + } +#else + if (TARGET_AIX && !(strategy & REST_INLINE_FPRS)) + strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR; +#endif + return strategy; +} + /* Calculate the stack information for the current function. This is complicated by having two separate calling sequences, the AIX calling sequence and the V.4 calling sequence. @@ -18117,15 +18231,26 @@ is_altivec_return_reg (rtx reg, void *xy static rs6000_stack_t * rs6000_stack_info (void) { - static rs6000_stack_t info; - rs6000_stack_t *info_ptr = &info; +#ifdef ENABLE_CHECKING + static rs6000_stack_t info_save; +#endif + rs6000_stack_t *info_ptr = &stack_info; int reg_size = TARGET_32BIT ? 4 : 8; int ehrd_size; int save_align; int first_gp; HOST_WIDE_INT non_fixed_size; + bool using_static_chain_p; - memset (&info, 0, sizeof (info)); +#ifdef ENABLE_CHECKING + memcpy (&info_save, &stack_info, sizeof stack_info); +#else + if (reload_completed && info_ptr->reload_completed) + return info_ptr; +#endif + + memset (&stack_info, 0, sizeof (stack_info)); + info_ptr->reload_completed = reload_completed; if (TARGET_SPE) { @@ -18182,23 +18307,6 @@ rs6000_stack_info (void) info_ptr->calls_p = (! current_function_is_leaf || cfun->machine->ra_needs_full_frame); - /* Determine if we need to save the link register. */ - if ((DEFAULT_ABI == ABI_AIX - && crtl->profile - && !TARGET_PROFILE_KERNEL) -#ifdef TARGET_RELOCATABLE - || (TARGET_RELOCATABLE && (get_pool_size () != 0)) -#endif - || (info_ptr->first_fp_reg_save != 64 - && !FP_SAVE_INLINE (info_ptr->first_fp_reg_save)) - || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca) - || info_ptr->calls_p - || rs6000_ra_ever_killed ()) - { - info_ptr->lr_save_p = 1; - df_set_regs_ever_live (LR_REGNO, true); - } - /* Determine if we need to save the condition code registers. */ if (df_regs_ever_live_p (CR2_REGNO) || df_regs_ever_live_p (CR3_REGNO) @@ -18367,6 +18475,33 @@ rs6000_stack_info (void) info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size, ABI_STACK_BOUNDARY / BITS_PER_UNIT); + /* Determine if we need to save the link register. */ + if (info_ptr->calls_p + || (DEFAULT_ABI == ABI_AIX + && crtl->profile + && !TARGET_PROFILE_KERNEL) + || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca) +#ifdef TARGET_RELOCATABLE + || (TARGET_RELOCATABLE && (get_pool_size () != 0)) +#endif + || rs6000_ra_ever_killed ()) + info_ptr->lr_save_p = 1; + + using_static_chain_p = (cfun->static_chain_decl != NULL_TREE + && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) + && call_used_regs[STATIC_CHAIN_REGNUM]); + info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr, + using_static_chain_p); + + if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS) + || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS) + || !(info_ptr->savres_strategy & REST_INLINE_GPRS) + || !(info_ptr->savres_strategy & REST_INLINE_FPRS)) + info_ptr->lr_save_p = 1; + + if (info_ptr->lr_save_p) + df_set_regs_ever_live (LR_REGNO, true); + /* Determine if we need to allocate any stack frame: For AIX we need to push the stack if a frame pointer is needed @@ -18418,6 +18553,10 @@ rs6000_stack_info (void) if (! info_ptr->cr_save_p) info_ptr->cr_save_offset = 0; +#ifdef ENABLE_CHECKING + gcc_assert (!(reload_completed && info_save.reload_completed) + || memcmp (&info_save, &stack_info, sizeof stack_info) == 0); +#endif return info_ptr; } @@ -19676,106 +19815,6 @@ rs6000_reg_live_or_pic_offset_p (int reg || (DEFAULT_ABI == ABI_DARWIN && flag_pic)))); } -enum { - SAVRES_MULTIPLE = 0x1, - SAVRES_INLINE_FPRS = 0x2, - SAVRES_INLINE_GPRS = 0x4, - SAVRES_NOINLINE_GPRS_SAVES_LR = 0x8, - SAVRES_NOINLINE_FPRS_SAVES_LR = 0x10, - SAVRES_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x20 -}; - -/* Determine the strategy for savings/restoring registers. */ - -static int -rs6000_savres_strategy (rs6000_stack_t *info, bool savep, - int using_static_chain_p, int sibcall) -{ - bool using_multiple_p; - bool common; - bool savres_fprs_inline; - bool savres_gprs_inline; - bool noclobber_global_gprs - = no_global_regs_above (info->first_gp_reg_save, /*gpr=*/true); - int strategy; - - using_multiple_p = (TARGET_MULTIPLE && ! TARGET_POWERPC64 - && (!TARGET_SPE_ABI - || info->spe_64bit_regs_used == 0) - && info->first_gp_reg_save < 31 - && noclobber_global_gprs); - /* Don't bother to try to save things out-of-line if r11 is occupied - by the static chain. It would require too much fiddling and the - static chain is rarely used anyway. */ - common = (using_static_chain_p - || sibcall - || crtl->calls_eh_return - || !info->lr_save_p - || cfun->machine->ra_need_lr - || info->total_size > 32767); - savres_fprs_inline = (common - || info->first_fp_reg_save == 64 - || !no_global_regs_above (info->first_fp_reg_save, - /*gpr=*/false) - /* The out-of-line FP routines use - double-precision stores; we can't use those - routines if we don't have such stores. */ - || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT) - || FP_SAVE_INLINE (info->first_fp_reg_save)); - savres_gprs_inline = (common - /* Saving CR interferes with the exit routines - used on the SPE, so just punt here. */ - || (!savep - && TARGET_SPE_ABI - && info->spe_64bit_regs_used != 0 - && info->cr_save_p != 0) - || info->first_gp_reg_save == 32 - || !noclobber_global_gprs - || GP_SAVE_INLINE (info->first_gp_reg_save)); - - if (savep) - /* If we are going to use store multiple, then don't even bother - with the out-of-line routines, since the store-multiple instruction - will always be smaller. */ - savres_gprs_inline = savres_gprs_inline || using_multiple_p; - else - { - /* The situation is more complicated with load multiple. We'd - prefer to use the out-of-line routines for restores, since the - "exit" out-of-line routines can handle the restore of LR and - the frame teardown. But we can only use the out-of-line - routines if we know that we've used store multiple or - out-of-line routines in the prologue, i.e. if we've saved all - the registers from first_gp_reg_save. Otherwise, we risk - loading garbage from the stack. Furthermore, we can only use - the "exit" out-of-line gpr restore if we haven't saved any - fprs. */ - bool saved_all = !savres_gprs_inline || using_multiple_p; - - if (saved_all && info->first_fp_reg_save != 64) - /* We can't use the exit routine; use load multiple if it's - available. */ - savres_gprs_inline = savres_gprs_inline || using_multiple_p; - } - - strategy = (using_multiple_p - | (savres_fprs_inline << 1) - | (savres_gprs_inline << 2)); -#ifdef POWERPC_LINUX - if (TARGET_64BIT) - { - if (!savres_fprs_inline) - strategy |= SAVRES_NOINLINE_FPRS_SAVES_LR; - else if (!savres_gprs_inline && info->first_fp_reg_save == 64) - strategy |= SAVRES_NOINLINE_GPRS_SAVES_LR; - } -#else - if (TARGET_AIX && !savres_fprs_inline) - strategy |= SAVRES_NOINLINE_FPRS_DOESNT_RESTORE_LR; -#endif - return strategy; -} - /* Emit function prologue as insns. */ void @@ -19824,12 +19863,10 @@ rs6000_emit_prologue (void) reg_size = 8; } - strategy = rs6000_savres_strategy (info, /*savep=*/true, - /*static_chain_p=*/using_static_chain_p, - /*sibcall=*/0); + strategy = info->savres_strategy; using_store_multiple = strategy & SAVRES_MULTIPLE; - saving_FPRs_inline = strategy & SAVRES_INLINE_FPRS; - saving_GPRs_inline = strategy & SAVRES_INLINE_GPRS; + saving_FPRs_inline = strategy & SAVE_INLINE_FPRS; + saving_GPRs_inline = strategy & SAVE_INLINE_GPRS; /* For V.4, update stack before we do any saving and set back pointer. */ if (! WORLD_SAVE_P (info) @@ -19999,8 +20036,8 @@ rs6000_emit_prologue (void) gen_rtx_REG (Pmode, LR_REGNO)); RTX_FRAME_RELATED_P (insn) = 1; - if (!(strategy & (SAVRES_NOINLINE_GPRS_SAVES_LR - | SAVRES_NOINLINE_FPRS_SAVES_LR))) + if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR + | SAVE_NOINLINE_FPRS_SAVES_LR))) { addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, GEN_INT (info->lr_save_offset + sp_offset)); @@ -20060,7 +20097,7 @@ rs6000_emit_prologue (void) DFmode, /*savep=*/true, /*gpr=*/false, /*lr=*/(strategy - & SAVRES_NOINLINE_FPRS_SAVES_LR) + & SAVE_NOINLINE_FPRS_SAVES_LR) != 0); insn = emit_insn (par); rs6000_frame_related (insn, frame_ptr_rtx, info->total_size, @@ -20187,7 +20224,7 @@ rs6000_emit_prologue (void) reg_mode, /*savep=*/true, /*gpr=*/true, /*lr=*/(strategy - & SAVRES_NOINLINE_GPRS_SAVES_LR) + & SAVE_NOINLINE_GPRS_SAVES_LR) != 0); insn = emit_insn (par); rs6000_frame_related (insn, frame_ptr_rtx, info->total_size, @@ -20495,19 +20532,23 @@ rs6000_output_function_prologue (FILE *f /* Write .extern for any function we will call to save and restore fp values. */ - if (info->first_fp_reg_save < 64 - && !FP_SAVE_INLINE (info->first_fp_reg_save)) + if (info->first_fp_reg_save < 64) { char *name; int regno = info->first_fp_reg_save - 32; - name = rs6000_savres_routine_name (info, regno, /*savep=*/true, - /*gpr=*/false, /*lr=*/false); - fprintf (file, "\t.extern %s\n", name); - - name = rs6000_savres_routine_name (info, regno, /*savep=*/false, - /*gpr=*/false, /*lr=*/true); - fprintf (file, "\t.extern %s\n", name); + if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0) + { + name = rs6000_savres_routine_name (info, regno, /*savep=*/true, + /*gpr=*/false, /*lr=*/false); + fprintf (file, "\t.extern %s\n", name); + } + if ((info->savres_strategy & REST_INLINE_FPRS) == 0) + { + name = rs6000_savres_routine_name (info, regno, /*savep=*/false, + /*gpr=*/false, /*lr=*/true); + fprintf (file, "\t.extern %s\n", name); + } } /* Write .extern for AIX common mode routines, if needed. */ @@ -20653,11 +20694,10 @@ rs6000_emit_epilogue (int sibcall) reg_size = 8; } - strategy = rs6000_savres_strategy (info, /*savep=*/false, - /*static_chain_p=*/0, sibcall); + strategy = info->savres_strategy; using_load_multiple = strategy & SAVRES_MULTIPLE; - restoring_FPRs_inline = strategy & SAVRES_INLINE_FPRS; - restoring_GPRs_inline = strategy & SAVRES_INLINE_GPRS; + restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS); + restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS); using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603 || rs6000_cpu == PROCESSOR_PPC750 @@ -20675,7 +20715,7 @@ rs6000_emit_epilogue (int sibcall) && !frame_pointer_needed)); restore_lr = (info->lr_save_p && (restoring_FPRs_inline - || (strategy & SAVRES_NOINLINE_FPRS_DOESNT_RESTORE_LR)) + || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR)) && (restoring_GPRs_inline || info->first_fp_reg_save < 64)); @@ -21314,7 +21354,7 @@ rs6000_emit_epilogue (int sibcall) if (!sibcall) { rtvec p; - bool lr = (strategy & SAVRES_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; + bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; if (! restoring_FPRs_inline) p = rtvec_alloc (4 + 64 - info->first_fp_reg_save); else