From patchwork Mon Nov 7 09:48:03 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sameera Deshpande X-Patchwork-Id: 124027 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id DF7AE1007D3 for ; Mon, 7 Nov 2011 20:48:35 +1100 (EST) Received: (qmail 6462 invoked by alias); 7 Nov 2011 09:48:33 -0000 Received: (qmail 6417 invoked by uid 22791); 7 Nov 2011 09:48:27 -0000 X-SWARE-Spam-Status: No, hits=-1.1 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_LOW, TW_CP, TW_LR, TW_VF X-Spam-Check-By: sourceware.org Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 07 Nov 2011 09:48:10 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Mon, 07 Nov 2011 09:48:06 +0000 Received: from [10.1.79.40] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.0); Mon, 7 Nov 2011 09:48:03 +0000 Subject: Re: Ping! Re: [RFA/ARM][Patch 02/02]: ARM epilogues in RTL From: Sameera Deshpande To: "gcc-patches@gcc.gnu.org" Cc: "nickc@redhat.com" , Richard Earnshaw , "paul@codesourcery.com" , Ramana Radhakrishnan References: <003701cc7df9$e465d770$ad318650$@deshpande@arm.com> <1317830686.11108.27.camel@e102549-lin.cambridge.arm.com> In-Reply-To: <1317830686.11108.27.camel@e102549-lin.cambridge.arm.com> Date: Mon, 07 Nov 2011 09:48:03 +0000 Message-ID: <1320659283.30897.87.camel@e102549-lin.cambridge.arm.com> Mime-Version: 1.0 X-MC-Unique: 111110709480605901 X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi! Please find attached reworked patch. This patch fixes formatting issues which were raised in reviews of other patches. The patch is tested with check-gcc, check-gdb and bootstrap with no regression. Ok for trunk? - Thanks and regards, Sameera diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 2c38883..67fcdac 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -28,7 +28,8 @@ extern int use_return_insn (int, rtx); extern enum reg_class arm_regno_class (int); extern void arm_load_pic_register (unsigned long); extern int arm_volatile_func (void); -extern const char *arm_output_epilogue (rtx); +extern void arm_expand_epilogue (bool); +extern void arm_expand_return (bool); extern void arm_expand_prologue (void); extern const char *arm_strip_name_encoding (const char *); extern void arm_asm_output_labelref (FILE *, const char *); @@ -181,7 +182,7 @@ extern const char *thumb1_unexpanded_epilogue (void); extern void thumb1_expand_prologue (void); extern void thumb1_expand_epilogue (void); extern const char *thumb1_output_interwork (void); -extern void thumb2_expand_epilogue (void); +extern void thumb2_expand_epilogue (bool); extern void thumb2_output_return (rtx); extern void thumb2_expand_return (void); #ifdef TREE_CODE diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index ec87892..944b79b 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13500,86 +13500,6 @@ fp_const_from_val (REAL_VALUE_TYPE *r) gcc_unreachable (); } -/* Output the operands of a LDM/STM instruction to STREAM. - MASK is the ARM register set mask of which only bits 0-15 are important. - REG is the base register, either the frame pointer or the stack pointer, - INSTR is the possibly suffixed load or store instruction. - RFE is nonzero if the instruction should also copy spsr to cpsr. */ - -static void -print_multi_reg (FILE *stream, const char *instr, unsigned reg, - unsigned long mask, int rfe) -{ - unsigned i; - bool not_first = FALSE; - - gcc_assert (!rfe || (mask & (1 << PC_REGNUM))); - fputc ('\t', stream); - asm_fprintf (stream, instr, reg); - fputc ('{', stream); - - for (i = 0; i <= LAST_ARM_REGNUM; i++) - if (mask & (1 << i)) - { - if (not_first) - fprintf (stream, ", "); - - asm_fprintf (stream, "%r", i); - not_first = TRUE; - } - - if (rfe) - fprintf (stream, "}^\n"); - else - fprintf (stream, "}\n"); -} - - -/* Output a FLDMD instruction to STREAM. - BASE if the register containing the address. - REG and COUNT specify the register range. - Extra registers may be added to avoid hardware bugs. - - We output FLDMD even for ARMv5 VFP implementations. Although - FLDMD is technically not supported until ARMv6, it is believed - that all VFP implementations support its use in this context. */ - -static void -vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count) -{ - int i; - - /* Workaround ARM10 VFPr1 bug. */ - if (count == 2 && !arm_arch6) - { - if (reg == 15) - reg--; - count++; - } - - /* FLDMD may not load more than 16 doubleword registers at a time. Split the - load into multiple parts if we have to handle more than 16 registers. */ - if (count > 16) - { - vfp_output_fldmd (stream, base, reg, 16); - vfp_output_fldmd (stream, base, reg + 16, count - 16); - return; - } - - fputc ('\t', stream); - asm_fprintf (stream, "fldmfdd\t%r!, {", base); - - for (i = reg; i < reg + count; i++) - { - if (i > reg) - fputs (", ", stream); - asm_fprintf (stream, "d%d", i); - } - fputs ("}\n", stream); - -} - - /* Output the assembly for a store multiple. */ const char * @@ -15282,10 +15202,7 @@ output_return_instruction (rtx operand, int really_return, int reverse) { char conditional[10]; char instr[100]; - unsigned reg; - unsigned long live_regs_mask; unsigned long func_type; - arm_stack_offsets *offsets; func_type = arm_current_func_type (); @@ -15312,147 +15229,10 @@ output_return_instruction (rtx operand, int really_return, int reverse) return ""; } - gcc_assert (!cfun->calls_alloca || really_return); - sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd'); cfun->machine->return_used_this_function = 1; - offsets = arm_get_frame_offsets (); - live_regs_mask = offsets->saved_regs_mask; - - if (live_regs_mask) - { - const char * return_reg; - - /* If we do not have any special requirements for function exit - (e.g. interworking) then we can load the return address - directly into the PC. Otherwise we must load it into LR. */ - if (really_return - && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK)) - return_reg = reg_names[PC_REGNUM]; - else - return_reg = reg_names[LR_REGNUM]; - - if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM)) - { - /* There are three possible reasons for the IP register - being saved. 1) a stack frame was created, in which case - IP contains the old stack pointer, or 2) an ISR routine - corrupted it, or 3) it was saved to align the stack on - iWMMXt. In case 1, restore IP into SP, otherwise just - restore IP. */ - if (frame_pointer_needed) - { - live_regs_mask &= ~ (1 << IP_REGNUM); - live_regs_mask |= (1 << SP_REGNUM); - } - else - gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT); - } - - /* On some ARM architectures it is faster to use LDR rather than - LDM to load a single register. On other architectures, the - cost is the same. In 26 bit mode, or for exception handlers, - we have to use LDM to load the PC so that the CPSR is also - restored. */ - for (reg = 0; reg <= LAST_ARM_REGNUM; reg++) - if (live_regs_mask == (1U << reg)) - break; - - if (reg <= LAST_ARM_REGNUM - && (reg != LR_REGNUM - || ! really_return - || ! IS_INTERRUPT (func_type))) - { - sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional, - (reg == LR_REGNUM) ? return_reg : reg_names[reg]); - } - else - { - char *p; - int first = 1; - - /* Generate the load multiple instruction to restore the - registers. Note we can get here, even if - frame_pointer_needed is true, but only if sp already - points to the base of the saved core registers. */ - if (live_regs_mask & (1 << SP_REGNUM)) - { - unsigned HOST_WIDE_INT stack_adjust; - - stack_adjust = offsets->outgoing_args - offsets->saved_regs; - gcc_assert (stack_adjust == 0 || stack_adjust == 4); - - if (stack_adjust && arm_arch5 && TARGET_ARM) - if (TARGET_UNIFIED_ASM) - sprintf (instr, "ldmib%s\t%%|sp, {", conditional); - else - sprintf (instr, "ldm%sib\t%%|sp, {", conditional); - else - { - /* If we can't use ldmib (SA110 bug), - then try to pop r3 instead. */ - if (stack_adjust) - live_regs_mask |= 1 << 3; - - if (TARGET_UNIFIED_ASM) - sprintf (instr, "ldmfd%s\t%%|sp, {", conditional); - else - sprintf (instr, "ldm%sfd\t%%|sp, {", conditional); - } - } - else - if (TARGET_UNIFIED_ASM) - sprintf (instr, "pop%s\t{", conditional); - else - sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional); - - p = instr + strlen (instr); - - for (reg = 0; reg <= SP_REGNUM; reg++) - if (live_regs_mask & (1 << reg)) - { - int l = strlen (reg_names[reg]); - - if (first) - first = 0; - else - { - memcpy (p, ", ", 2); - p += 2; - } - - memcpy (p, "%|", 2); - memcpy (p + 2, reg_names[reg], l); - p += l + 2; - } - - if (live_regs_mask & (1 << LR_REGNUM)) - { - sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg); - /* If returning from an interrupt, restore the CPSR. */ - if (IS_INTERRUPT (func_type)) - strcat (p, "^"); - } - else - strcpy (p, "}"); - } - - output_asm_insn (instr, & operand); - - /* See if we need to generate an extra instruction to - perform the actual function return. */ - if (really_return - && func_type != ARM_FT_INTERWORKED - && (live_regs_mask & (1 << LR_REGNUM)) != 0) - { - /* The return has already been handled - by loading the LR into the PC. */ - really_return = 0; - } - } - if (really_return) { switch ((int) ARM_FUNC_TYPE (func_type)) @@ -15591,451 +15371,6 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) } -const char * -arm_output_epilogue (rtx sibling) -{ - int reg; - unsigned long saved_regs_mask; - unsigned long func_type; - /* Floats_offset is the offset from the "virtual" frame. In an APCS - frame that is $fp + 4 for a non-variadic function. */ - int floats_offset = 0; - rtx operands[3]; - FILE * f = asm_out_file; - unsigned int lrm_count = 0; - int really_return = (sibling == NULL); - int start_reg; - arm_stack_offsets *offsets; - - /* If we have already generated the return instruction - then it is futile to generate anything else. */ - if (use_return_insn (FALSE, sibling) && - (cfun->machine->return_used_this_function != 0)) - return ""; - - func_type = arm_current_func_type (); - - if (IS_NAKED (func_type)) - /* Naked functions don't have epilogues. */ - return ""; - - if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN) - { - rtx op; - - /* A volatile function should never return. Call abort. */ - op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort"); - assemble_external_libcall (op); - output_asm_insn ("bl\t%a0", &op); - - return ""; - } - - /* If we are throwing an exception, then we really must be doing a - return, so we can't tail-call. */ - gcc_assert (!crtl->calls_eh_return || really_return); - - offsets = arm_get_frame_offsets (); - saved_regs_mask = offsets->saved_regs_mask; - - if (TARGET_IWMMXT) - lrm_count = bit_count (saved_regs_mask); - - floats_offset = offsets->saved_args; - /* Compute how far away the floats will be. */ - for (reg = 0; reg <= LAST_ARM_REGNUM; reg++) - if (saved_regs_mask & (1 << reg)) - floats_offset += 4; - - if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) - { - /* This variable is for the Virtual Frame Pointer, not VFP regs. */ - int vfp_offset = offsets->frame; - - if (TARGET_FPA_EMU2) - { - for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) - if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) - { - floats_offset += 12; - asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n", - reg, FP_REGNUM, floats_offset - vfp_offset); - } - } - else - { - start_reg = LAST_FPA_REGNUM; - - for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) - { - if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) - { - floats_offset += 12; - - /* We can't unstack more than four registers at once. */ - if (start_reg - reg == 3) - { - asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n", - reg, FP_REGNUM, floats_offset - vfp_offset); - start_reg = reg - 1; - } - } - else - { - if (reg != start_reg) - asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n", - reg + 1, start_reg - reg, - FP_REGNUM, floats_offset - vfp_offset); - start_reg = reg - 1; - } - } - - /* Just in case the last register checked also needs unstacking. */ - if (reg != start_reg) - asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n", - reg + 1, start_reg - reg, - FP_REGNUM, floats_offset - vfp_offset); - } - - if (TARGET_HARD_FLOAT && TARGET_VFP) - { - int saved_size; - - /* The fldmd insns do not have base+offset addressing - modes, so we use IP to hold the address. */ - saved_size = arm_get_vfp_saved_size (); - - if (saved_size > 0) - { - floats_offset += saved_size; - asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM, - FP_REGNUM, floats_offset - vfp_offset); - } - start_reg = FIRST_VFP_REGNUM; - for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) - { - if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) - && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) - { - if (start_reg != reg) - vfp_output_fldmd (f, IP_REGNUM, - (start_reg - FIRST_VFP_REGNUM) / 2, - (reg - start_reg) / 2); - start_reg = reg + 2; - } - } - if (start_reg != reg) - vfp_output_fldmd (f, IP_REGNUM, - (start_reg - FIRST_VFP_REGNUM) / 2, - (reg - start_reg) / 2); - } - - if (TARGET_IWMMXT) - { - /* The frame pointer is guaranteed to be non-double-word aligned. - This is because it is set to (old_stack_pointer - 4) and the - old_stack_pointer was double word aligned. Thus the offset to - the iWMMXt registers to be loaded must also be non-double-word - sized, so that the resultant address *is* double-word aligned. - We can ignore floats_offset since that was already included in - the live_regs_mask. */ - lrm_count += (lrm_count % 2 ? 2 : 1); - - for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) - if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) - { - asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n", - reg, FP_REGNUM, lrm_count * 4); - lrm_count += 2; - } - } - - /* saved_regs_mask should contain the IP, which at the time of stack - frame generation actually contains the old stack pointer. So a - quick way to unwind the stack is just pop the IP register directly - into the stack pointer. */ - gcc_assert (saved_regs_mask & (1 << IP_REGNUM)); - saved_regs_mask &= ~ (1 << IP_REGNUM); - saved_regs_mask |= (1 << SP_REGNUM); - - /* There are two registers left in saved_regs_mask - LR and PC. We - only need to restore the LR register (the return address), but to - save time we can load it directly into the PC, unless we need a - special function exit sequence, or we are not really returning. */ - if (really_return - && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL - && !crtl->calls_eh_return) - /* Delete the LR from the register mask, so that the LR on - the stack is loaded into the PC in the register mask. */ - saved_regs_mask &= ~ (1 << LR_REGNUM); - else - saved_regs_mask &= ~ (1 << PC_REGNUM); - - /* We must use SP as the base register, because SP is one of the - registers being restored. If an interrupt or page fault - happens in the ldm instruction, the SP might or might not - have been restored. That would be bad, as then SP will no - longer indicate the safe area of stack, and we can get stack - corruption. Using SP as the base register means that it will - be reset correctly to the original value, should an interrupt - occur. If the stack pointer already points at the right - place, then omit the subtraction. */ - if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask)) - || cfun->calls_alloca) - asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM, - 4 * bit_count (saved_regs_mask)); - print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0); - - if (IS_INTERRUPT (func_type)) - /* Interrupt handlers will have pushed the - IP onto the stack, so restore it now. */ - print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0); - } - else - { - /* This branch is executed for ARM mode (non-apcs frames) and - Thumb-2 mode. Frame layout is essentially the same for those - cases, except that in ARM mode frame pointer points to the - first saved register, while in Thumb-2 mode the frame pointer points - to the last saved register. - - It is possible to make frame pointer point to last saved - register in both cases, and remove some conditionals below. - That means that fp setup in prologue would be just "mov fp, sp" - and sp restore in epilogue would be just "mov sp, fp", whereas - now we have to use add/sub in those cases. However, the value - of that would be marginal, as both mov and add/sub are 32-bit - in ARM mode, and it would require extra conditionals - in arm_expand_prologue to distingish ARM-apcs-frame case - (where frame pointer is required to point at first register) - and ARM-non-apcs-frame. Therefore, such change is postponed - until real need arise. */ - unsigned HOST_WIDE_INT amount; - int rfe; - /* Restore stack pointer if necessary. */ - if (TARGET_ARM && frame_pointer_needed) - { - operands[0] = stack_pointer_rtx; - operands[1] = hard_frame_pointer_rtx; - - operands[2] = GEN_INT (offsets->frame - offsets->saved_regs); - output_add_immediate (operands); - } - else - { - if (frame_pointer_needed) - { - /* For Thumb-2 restore sp from the frame pointer. - Operand restrictions mean we have to incrememnt FP, then copy - to SP. */ - amount = offsets->locals_base - offsets->saved_regs; - operands[0] = hard_frame_pointer_rtx; - } - else - { - unsigned long count; - operands[0] = stack_pointer_rtx; - amount = offsets->outgoing_args - offsets->saved_regs; - /* pop call clobbered registers if it avoids a - separate stack adjustment. */ - count = offsets->saved_regs - offsets->saved_args; - if (optimize_size - && count != 0 - && !crtl->calls_eh_return - && bit_count(saved_regs_mask) * 4 == count - && !IS_INTERRUPT (func_type) - && !IS_STACKALIGN (func_type) - && !crtl->tail_call_emit) - { - unsigned long mask; - /* Preserve return values, of any size. */ - mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1; - mask ^= 0xf; - mask &= ~saved_regs_mask; - reg = 0; - while (bit_count (mask) * 4 > amount) - { - while ((mask & (1 << reg)) == 0) - reg++; - mask &= ~(1 << reg); - } - if (bit_count (mask) * 4 == amount) { - amount = 0; - saved_regs_mask |= mask; - } - } - } - - if (amount) - { - operands[1] = operands[0]; - operands[2] = GEN_INT (amount); - output_add_immediate (operands); - } - if (frame_pointer_needed) - asm_fprintf (f, "\tmov\t%r, %r\n", - SP_REGNUM, HARD_FRAME_POINTER_REGNUM); - } - - if (TARGET_FPA_EMU2) - { - for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) - if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) - asm_fprintf (f, "\tldfe\t%r, [%r], #12\n", - reg, SP_REGNUM); - } - else - { - start_reg = FIRST_FPA_REGNUM; - - for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) - { - if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) - { - if (reg - start_reg == 3) - { - asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n", - start_reg, SP_REGNUM); - start_reg = reg + 1; - } - } - else - { - if (reg != start_reg) - asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n", - start_reg, reg - start_reg, - SP_REGNUM); - - start_reg = reg + 1; - } - } - - /* Just in case the last register checked also needs unstacking. */ - if (reg != start_reg) - asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n", - start_reg, reg - start_reg, SP_REGNUM); - } - - if (TARGET_HARD_FLOAT && TARGET_VFP) - { - int end_reg = LAST_VFP_REGNUM + 1; - - /* Scan the registers in reverse order. We need to match - any groupings made in the prologue and generate matching - pop operations. */ - for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2) - { - if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) - && (!df_regs_ever_live_p (reg + 1) - || call_used_regs[reg + 1])) - { - if (end_reg > reg + 2) - vfp_output_fldmd (f, SP_REGNUM, - (reg + 2 - FIRST_VFP_REGNUM) / 2, - (end_reg - (reg + 2)) / 2); - end_reg = reg; - } - } - if (end_reg > reg + 2) - vfp_output_fldmd (f, SP_REGNUM, 0, - (end_reg - (reg + 2)) / 2); - } - - if (TARGET_IWMMXT) - for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++) - if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) - asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM); - - /* If we can, restore the LR into the PC. */ - if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED - && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL) - && !IS_STACKALIGN (func_type) - && really_return - && crtl->args.pretend_args_size == 0 - && saved_regs_mask & (1 << LR_REGNUM) - && !crtl->calls_eh_return) - { - saved_regs_mask &= ~ (1 << LR_REGNUM); - saved_regs_mask |= (1 << PC_REGNUM); - rfe = IS_INTERRUPT (func_type); - } - else - rfe = 0; - - /* Load the registers off the stack. If we only have one register - to load use the LDR instruction - it is faster. For Thumb-2 - always use pop and the assembler will pick the best instruction.*/ - if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM) - && !IS_INTERRUPT(func_type)) - { - asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM); - } - else if (saved_regs_mask) - { - if (saved_regs_mask & (1 << SP_REGNUM)) - /* Note - write back to the stack register is not enabled - (i.e. "ldmfd sp!..."). We know that the stack pointer is - in the list of registers and if we add writeback the - instruction becomes UNPREDICTABLE. */ - print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, - rfe); - else if (TARGET_ARM) - print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask, - rfe); - else - print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0); - } - - if (crtl->args.pretend_args_size) - { - /* Unwind the pre-pushed regs. */ - operands[0] = operands[1] = stack_pointer_rtx; - operands[2] = GEN_INT (crtl->args.pretend_args_size); - output_add_immediate (operands); - } - } - - /* We may have already restored PC directly from the stack. */ - if (!really_return || saved_regs_mask & (1 << PC_REGNUM)) - return ""; - - /* Stack adjustment for exception handler. */ - if (crtl->calls_eh_return) - asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM, - ARM_EH_STACKADJ_REGNUM); - - /* Generate the return instruction. */ - switch ((int) ARM_FUNC_TYPE (func_type)) - { - case ARM_FT_ISR: - case ARM_FT_FIQ: - asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM); - break; - - case ARM_FT_EXCEPTION: - asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM); - break; - - case ARM_FT_INTERWORKED: - asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM); - break; - - default: - if (IS_STACKALIGN (func_type)) - { - /* See comment in arm_expand_prologue. */ - asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0); - } - if (arm_arch5 || arm_arch4t) - asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM); - else - asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM); - break; - } - - return ""; -} - static void arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED) @@ -16228,13 +15563,9 @@ emit_multi_reg_push (unsigned long mask) Unfortunately, since this insn does not reflect very well the actual semantics of the operation, we need to annotate the insn for the benefit - of DWARF2 frame unwind information. - - There's no reason why this couldn't be used for Thumb-1 or ARM, in theory, - but currently the pattern that matches this in the MD file is only enabled - for Thumb-2. */ + of DWARF2 frame unwind information. */ static void -thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return) +arm_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return) { int num_regs = 0; int i, j; @@ -16303,7 +15634,7 @@ thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return) semantics of the operation, we need to annotate the insn for the benefit of DWARF2 frame unwind information. */ static void -thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs) +arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg) { int i, j; rtx par; @@ -16314,8 +15645,8 @@ thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs) if (num_regs > 16) { - thumb2_emit_vfp_multi_reg_pop (first_reg, 16); - thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16); + arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg); + arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg); return; } @@ -16326,8 +15657,8 @@ thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs) /* Increment the stack pointer, based on there being num_regs 8-byte registers to restore. */ tmp = gen_rtx_SET (VOIDmode, - stack_pointer_rtx, - plus_constant (stack_pointer_rtx, 8 * num_regs)); + base_reg, + plus_constant (base_reg, 8 * num_regs)); RTX_FRAME_RELATED_P (tmp) = 1; XVECEXP (par, 0, 0) = tmp; @@ -16340,8 +15671,7 @@ thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs) reg, gen_frame_mem (DFmode, - plus_constant (stack_pointer_rtx, - 8 * j))); + plus_constant (base_reg, 8 * j))); RTX_FRAME_RELATED_P (tmp) = 1; XVECEXP (par, 0, j + 1) = tmp; @@ -22510,6 +21840,139 @@ thumb1_expand_epilogue (void) /* Generate pattern *pop_multiple_with_stack_update_and_return if single POP instruction can be generated. LR should be replaced by PC. All the checks required are already done by USE_RETURN_INSN (). Hence, + all we need to do here is generate multi-reg pop and let return pattern + handle other instructions to be generated, if any. */ +void +arm_expand_return (bool really_return) +{ + int i, num_regs, return_reg; + unsigned long saved_regs_mask, func_type; + arm_stack_offsets *offsets; + unsigned HOST_WIDE_INT stack_adjust; + + func_type = arm_current_func_type (); + + if (IS_NAKED (func_type) + || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)) + { + emit_jump_insn (ret_rtx); + return; + } + + gcc_assert (!cfun->calls_alloca || really_return); + + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + if (saved_regs_mask) + { + /* If we do not have any special requirements for function exit (e.g. + interworking), then we can load the return address directly into + the PC. Otherwise we must load it into LR. */ + if (really_return + && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK) + && (saved_regs_mask & (1 << LR_REGNUM))) + return_reg = PC_REGNUM; + else + return_reg = LR_REGNUM; + + if (saved_regs_mask & (1 << IP_REGNUM)) + { + /* There are three possible reasons for the IP register being saved. + 1) a stack frame was created, in which case IP contains the old + stack pointer, or + 2) an ISR routine corrupted it, or + 3) it was saved to align the stack on iWMMXt. + In case 1, restore IP into SP, otherwise just restore IP. */ + if (frame_pointer_needed) + { + saved_regs_mask &= ~ (1 << IP_REGNUM); + saved_regs_mask |= (1 << SP_REGNUM); + } + else + { + gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT); + } + } + + for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++) + /* On some ARM architectures it is faster to use LDR rather than LDM + to load a single register. So identify single-register-restore + case. */ + if (saved_regs_mask == (1 << i)) + break; + + if (i <= LAST_ARM_REGNUM + && (i != LR_REGNUM + || ! really_return + || ! IS_INTERRUPT (func_type))) + { + rtx addr; + rtx reg; + + reg = gen_rtx_REG (SImode, ((i == LR_REGNUM) ? return_reg : i)); + if (saved_regs_mask & (1 << SP_REGNUM)) + addr = gen_rtx_MEM (SImode, stack_pointer_rtx); + else + addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + + set_mem_alias_set (addr, get_frame_alias_set ()); + + if (saved_regs_mask & (1 << PC_REGNUM) || return_reg == PC_REGNUM) + { + rtx insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + XVECEXP (insn, 0, 0) = ret_rtx; + XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode, reg, addr); + RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1; + emit_jump_insn (insn); + } + else + { + rtx insn = gen_rtx_SET (SImode, reg, addr); + RTX_FRAME_RELATED_P (insn) = 1; + emit_insn (insn); + } + } + else + { + if (saved_regs_mask & (1 << SP_REGNUM)) + { + stack_adjust = offsets->outgoing_args - offsets->saved_regs; + gcc_assert (stack_adjust == 0 || stack_adjust == 4); + + if (stack_adjust && !(arm_arch5 && TARGET_ARM)) + saved_regs_mask |= 1 << 3; + } + + arm_emit_multi_reg_pop (((saved_regs_mask & (~ (1 << LR_REGNUM))) + | (1 << return_reg)), + (saved_regs_mask & (1 << PC_REGNUM) + || return_reg == PC_REGNUM)); + } + + /* Check if we need to generate extra instruction to perform actual + function return. */ + if (really_return + && func_type != ARM_FT_INTERWORKED + && (saved_regs_mask & (1 << LR_REGNUM)) != 0) + /* The return has already been handled by loading LR in PC. */ + really_return = false; + } + + /* output_return_instruction () will take care of actual instruction to be + emited to return from function. */ + if (really_return) + emit_jump_insn (ret_rtx); +} + +/* Generate pattern *pop_multiple_with_stack_update_and_return if single + POP instruction can be generated. LR should be replaced by PC. All + the checks required are already done by USE_RETURN_INSN (). Hence, all we really need to check here is if single register is to be returned, or multiple register return. */ void @@ -22544,7 +22007,7 @@ thumb2_expand_return (void) { saved_regs_mask &= ~ (1 << LR_REGNUM); saved_regs_mask |= (1 << PC_REGNUM); - thumb2_emit_multi_reg_pop (saved_regs_mask, true); + arm_emit_multi_reg_pop (saved_regs_mask, true); } } else @@ -22553,13 +22016,405 @@ thumb2_expand_return (void) } } +/* Generate RTL to represent ARM epilogue. Really_return is true if the + function is not a sibcall. */ +void +arm_expand_epilogue (bool really_return) +{ + unsigned long func_type; + unsigned long saved_regs_mask; + int num_regs = 0; + int i; + int amount; + int floats_from_frame = 0; + arm_stack_offsets *offsets; + + func_type = arm_current_func_type (); + + /* Naked functions don't have epilogue. Hence, generate return pattern, and + let output_return_instruction take care of instruction emition if any. */ + if (IS_NAKED (func_type) + || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)) + { + emit_jump_insn (ret_rtx); + return; + } + + gcc_assert (!crtl->calls_eh_return || really_return); + + /* Get frame offsets for ARM. */ + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + + /* Find offset of floating point register from frame pointer. + The initialization is done in this way to take care of frame pointer and + static-chain register, if stored. */ + floats_from_frame = offsets->saved_regs - offsets->frame; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + { + num_regs++; + floats_from_frame += 4; + } + + if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM) + { + if (TARGET_FPA_EMU2) + { + for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--) + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + rtx addr; + floats_from_frame += 12; + addr = gen_rtx_MEM (XFmode, + gen_rtx_PLUS (SImode, + hard_frame_pointer_rtx, + GEN_INT (- floats_from_frame))); + set_mem_alias_set (addr, get_frame_alias_set ()); + emit_insn (gen_rtx_SET (XFmode, + gen_rtx_REG (XFmode, i), + addr)); + } + } + else + { + int idx = 0; + rtx load_seq[4]; + + for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--) + { + floats_from_frame += 12; + + if (idx == 4) + { + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (idx, load_seq))); + idx = 0; + } + + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + load_seq[idx] = gen_rtx_SET (VOIDmode, + gen_rtx_REG (XFmode, i), + gen_frame_mem (XFmode, + plus_constant (hard_frame_pointer_rtx, + - floats_from_frame))); + idx++; + } + else + { + if (idx) + { + /* Create parallel and emit. */ + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (idx, load_seq))); + idx = 0; + } + } + } + + if (idx) + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (idx, load_seq))); + } + + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + int start_reg; + /* The offset is from IP_REGNUM. */ + emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM), + hard_frame_pointer_rtx, + GEN_INT (- floats_from_frame))); + + /* Generate VFP register multi-pop. */ + start_reg = FIRST_VFP_REGNUM; + + for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2) + /* Look for a case where a reg does not need restoring. */ + if ((!df_regs_ever_live_p (i) || call_used_regs[i]) + && (!df_regs_ever_live_p (i + 1) + || call_used_regs[i + 1])) + { + if (start_reg != i) + arm_emit_vfp_multi_reg_pop (start_reg, + ((i - start_reg)) / 2, + gen_rtx_REG (SImode, + IP_REGNUM)); + start_reg = i; + } + + /* Restore the remaining regs that we have discovered (or possibly + even all of them, if the conditional in the for loop never + fired). */ + if (start_reg != i) + arm_emit_vfp_multi_reg_pop (start_reg, + (i - start_reg) / 2, + gen_rtx_REG (SImode, IP_REGNUM)); + } + + if (TARGET_IWMMXT) + { + /* The frame pointer is guaranteed to be non-double-word aligned, as + it is set to double-word-aligned old_stack_pointer - 4. */ + int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1); + for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--) + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + rtx addr = gen_frame_mem (V2SImode, + plus_constant (hard_frame_pointer_rtx, + - lrm_count * 4)); + emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr)); + lrm_count += 2; + } + } + + /* saved_regs_mask should contain the IP which contains old stack pointer + at the time of activation creation. To unwind stack quickly, pop IP + in SP. */ + gcc_assert (saved_regs_mask & (1 << IP_REGNUM)); + saved_regs_mask &= ~ (1 << IP_REGNUM); + saved_regs_mask |= (1 << SP_REGNUM); + + if (really_return + && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !crtl->calls_eh_return) + saved_regs_mask &= ~ (1 << LR_REGNUM); + else + saved_regs_mask &= ~ (1 << PC_REGNUM); + + if (offsets->outgoing_args != (1 + num_regs) + || cfun->calls_alloca) + /* Unwind the stack till saved registers. */ + gen_addsi3 (stack_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (- 4 * num_regs)); + + arm_emit_multi_reg_pop (saved_regs_mask, really_return); + + if (IS_INTERRUPT (func_type)) + { + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr)); + } + + if (!really_return || saved_regs_mask & (1 << PC_REGNUM)) + return; + + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (ARM_EH_STACKADJ_REGNUM))); + + emit_jump_insn (ret_rtx); + } + else + { + if (frame_pointer_needed) /* Handles case for TARGET_ARM only. */ + { + /* In ARM mode, frame pointer points to first saved register. + Restore stack pointer to last saved register. */ + emit_insn (gen_addsi3 (hard_frame_pointer_rtx, + hard_frame_pointer_rtx, + GEN_INT (offsets->frame - offsets->saved_regs))); + emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); + /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not + deleted. */ + emit_insn (gen_prologue_use (stack_pointer_rtx)); + } + else + { + /* Pop off outgoing args and local frame to adjust stack pointer to + last saved register. */ + amount = offsets->outgoing_args - offsets->saved_regs; + if (amount) + { + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (amount))); + /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is + not deleted. */ + emit_insn (gen_prologue_use (stack_pointer_rtx)); + } + } + + if (TARGET_FPA_EMU2) + { + for (i = FIRST_FPA_REGNUM; i <= LAST_FPA_REGNUM; i++) + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + /* Generate memory reference with write-back to SP. */ + rtx addr = gen_rtx_MEM (XFmode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + emit_insn (gen_movxf (gen_rtx_REG (XFmode, i), addr)); + } + } + else + { + int idx = 0; + rtx load_seq[5]; + + for (i = FIRST_FPA_REGNUM; i <= LAST_FPA_REGNUM; i++) + { + if (idx == 4) + { + load_seq[0] = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + 12 * idx)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (idx + 1, load_seq))); + idx = 0; + } + + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + load_seq[idx + 1] = gen_rtx_SET (VOIDmode, + gen_rtx_REG (XFmode, i), + gen_frame_mem (XFmode, + plus_constant (stack_pointer_rtx, + 12 * idx))); + idx++; + } + else + { + if (idx) + { + /* Create parallel and emit. */ + load_seq[0] = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + 12 * idx)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (idx + 1, + load_seq))); + idx = 0; + } + } + } + + if (idx) + { + load_seq[0] = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, + 12 * idx)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec_v (idx + 1, load_seq))); + idx = 0; + } + } + + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + /* Generate VFP register multi-pop. */ + int end_reg = LAST_VFP_REGNUM + 1; + + /* Scan the registers in reverse order. We need to match + any groupings made in the prologue and generate matching + fldmdd operations. The need to match groups is because, + unlike pop, fldmdd can only do consecutive regs. */ + for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2) + /* Look for a case where a reg does not need restoring. */ + if ((!df_regs_ever_live_p (i) || call_used_regs[i]) + && (!df_regs_ever_live_p (i + 1) + || call_used_regs[i + 1])) + { + /* Restore the regs discovered so far (from reg+2 to + end_reg). */ + if (end_reg > i + 2) + arm_emit_vfp_multi_reg_pop (i + 2, + (end_reg - (i + 2)) / 2, + stack_pointer_rtx); + end_reg = i; + } + + /* Restore the remaining regs that we have discovered (or possibly + even all of them, if the conditional in the for loop never + fired). */ + if (end_reg > i + 2) + arm_emit_vfp_multi_reg_pop (i + 2, + (end_reg - (i + 2)) / 2, + stack_pointer_rtx); + } + + if (TARGET_IWMMXT) + for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++) + if (df_regs_ever_live_p (i) && !call_used_regs[i]) + { + rtx addr = gen_rtx_MEM (V2SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr)); + } + + if (saved_regs_mask) + { + bool return_in_pc = false; + + if (num_regs == 1) + { + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + { + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr)); + } + } + else + { + if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED + && TARGET_ARM + && !IS_STACKALIGN (func_type) + && really_return + && crtl->args.pretend_args_size == 0 + && saved_regs_mask & (1 << LR_REGNUM) + && !crtl->calls_eh_return) + { + saved_regs_mask &= ~ (1 << LR_REGNUM); + saved_regs_mask |= (1 << PC_REGNUM); + return_in_pc = true; + } + + arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc); + if (return_in_pc == true) + return; + } + } + + if (crtl->args.pretend_args_size) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (crtl->args.pretend_args_size))); + + if (!really_return) + return; + + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (ARM_EH_STACKADJ_REGNUM))); + + emit_jump_insn (ret_rtx); + } +} + /* Generate RTL to represent a Thumb-2 epilogue. Note that this RTL does not include the Return insn, which is created separately and handled in thumb2_output_return. */ void -thumb2_expand_epilogue (void) +thumb2_expand_epilogue (bool is_sibling) { HOST_WIDE_INT amount; int reg; @@ -22628,20 +22483,23 @@ thumb2_expand_epilogue (void) { /* Restore the regs discovered so far (from reg+2 to end_reg). */ if (end_reg > reg + 2) - thumb2_emit_vfp_multi_reg_pop (reg + 2, - (end_reg - (reg + 2)) / 2); + arm_emit_vfp_multi_reg_pop (reg + 2, + (end_reg - (reg + 2)) / 2, + stack_pointer_rtx); end_reg = reg; } /* Restore the remaining regs that we have discovered (or possibly even all of them, if the conditional in the for loop never fired). */ if (end_reg > reg + 2) - thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2); + arm_emit_vfp_multi_reg_pop (reg + 2, + (end_reg - (reg + 2)) / 2, + stack_pointer_rtx); } /* iWMMXt is not supported when Thumb-2 in use. If it were, we would want to be restoring the appropriate iWMMXt regs here, in a similar - way to arm_output_epilogue. */ + way to arm_expand_epilogue. */ /* If there are registers to restore, make it happen. */ if (saved_regs_mask) @@ -22667,6 +22525,7 @@ thumb2_expand_epilogue (void) return-address' instruction. Instead, pop LR in PC. */ if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL && !IS_STACKALIGN (func_type) + && !is_sibling && crtl->args.pretend_args_size == 0 && saved_regs_mask & (1 << LR_REGNUM) && !crtl->calls_eh_return) @@ -22676,7 +22535,7 @@ thumb2_expand_epilogue (void) really_return = true; } - thumb2_emit_multi_reg_pop (saved_regs_mask, really_return); + arm_emit_multi_reg_pop (saved_regs_mask, really_return); if (really_return == true) return; } @@ -22688,6 +22547,9 @@ thumb2_expand_epilogue (void) stack_pointer_rtx, GEN_INT (crtl->args.pretend_args_size))); + if (is_sibling) + return; + /* Stack adjustment for exception handler. */ if (crtl->calls_eh_return) emit_insn (gen_addsi3 (stack_pointer_rtx, diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 64444f2..fb2db69 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -6682,26 +6682,43 @@ (plus:SI (match_dup 1) (match_operand:SI 2 "const_int_operand" "I"))) ])] - "TARGET_THUMB2" + "TARGET_32BIT" "* { int i; char pattern[100]; int num_saves = XVECLEN (operands[0], 0); - if (REGNO (operands[1]) == SP_REGNUM) + if (TARGET_THUMB2) { - strcpy (pattern, \"pop\\t{\"); + if (REGNO (operands[1]) == SP_REGNUM) + { + strcpy (pattern, \"pop%?\\t{\"); + } + else + { + strcpy (pattern, \"ldm%(ia%)\\t\"); + strcat (pattern, reg_names[REGNO (operands[1])]); + strcat (pattern, \"!, {\"); + } } else { - strcpy (pattern, \"ldm%(ia%)\\t\"); + strcpy (pattern, \"ldm%(fd%)\\t\"); strcat (pattern, reg_names[REGNO (operands[1])]); - strcat (pattern, \"!, {\"); + + for (i = 2; i < num_saves; i++) + if (REGNO (XEXP (XVECEXP (operands[0], 0, i), 0)) == SP_REGNUM) + break; + + if (i != num_saves) + strcat (pattern, \", {\"); + else + strcat (pattern, \"!, {\"); } strcat (pattern, - reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]); + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]); /* Skip over the first element and the one we just generated. */ for (i = 2; i < (num_saves); i++) @@ -6717,7 +6734,8 @@ return \"\"; } " - [(set_attr "type" "load4")] + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] ) ;; Pop with return (as used in epilogue RTL) @@ -6733,14 +6751,31 @@ (plus:SI (match_dup 1) (match_operand:SI 2 "const_int_operand" "I"))) ])] - "TARGET_THUMB2" + "TARGET_32BIT" "* { int i; char pattern[100]; int num_saves = XVECLEN (operands[0], 0); - strcpy (pattern, \"pop\\t{\"); + if (TARGET_THUMB2) + { + strcpy (pattern, \"pop%?\\t{\"); + } + else + { + strcpy (pattern, \"ldm%(fd%)\\t\"); + strcat (pattern, reg_names[REGNO (operands[1])]); + for (i = 3; i < num_saves; i++) + if (REGNO (XEXP (XVECEXP (operands[0], 0, i), 0)) == SP_REGNUM) + break; + + if (i != num_saves) + strcat (pattern, \", {\"); + else + strcat (pattern, \"!, {\"); + } + strcat (pattern, reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]); @@ -6753,19 +6788,24 @@ } strcat (pattern, \"}\"); + + if (IS_INTERRUPT (arm_current_func_type ())) + strcat (pattern, \"^\"); + output_asm_insn (pattern, operands); return \"\"; } " - [(set_attr "type" "load4")] + [(set_attr "type" "load4") + (set_attr "predicable" "yes")] ) -(define_insn "*thumb2_ldr_with_return" +(define_insn "*ldr_with_return" [(return) (set (reg:SI PC_REGNUM) (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))] - "TARGET_THUMB2" + "TARGET_32BIT" "ldr%?\t%|pc, [%0], #4" [(set_attr "type" "load1") (set_attr "predicable" "yes")] @@ -6773,12 +6813,12 @@ (define_insn "*vfp_pop_multiple_with_stack_update" [(match_parallel 0 "load_multiple_operation_fp" - [(set (match_operand:SI 1 "s_register_operand" "+k") + [(set (match_operand:SI 1 "s_register_operand" "+rk") (plus:SI (match_dup 1) (match_operand:SI 2 "const_int_operand" "I"))) (set (match_operand:DF 3 "arm_hard_register_operand" "") (mem:DF (match_dup 1)))])] - "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" "* { int num_regs = XVECLEN (operands[0], 0); @@ -6811,7 +6851,9 @@ return \"\"; } " - [(set_attr "type" "load4")] + [(set_attr "type" "load4") + (set_attr "conds" "unconditional") + (set_attr "predicable" "no")] ) (define_expand "store_multiple" @@ -8642,13 +8684,18 @@ thumb2_expand_return (); DONE; } + else if (TARGET_ARM) + { + arm_expand_return (true); + DONE; + } } ") ;; Often the return insn will be the same as loading from memory, so set attr (define_insn "*arm_return" [(return)] - "TARGET_ARM && USE_RETURN_INSN (FALSE)" + "TARGET_ARM" "* { if (arm_ccfsm_state == 2) @@ -8658,53 +8705,11 @@ } return output_return_instruction (const_true_rtx, TRUE, FALSE); }" - [(set_attr "type" "load1") - (set_attr "length" "12") + [(set_attr "type" "branch") + (set_attr "length" "4") (set_attr "predicable" "yes")] ) -(define_insn "*cond_return" - [(set (pc) - (if_then_else (match_operator 0 "arm_comparison_operator" - [(match_operand 1 "cc_register" "") (const_int 0)]) - (return) - (pc)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" - "* - { - if (arm_ccfsm_state == 2) - { - arm_ccfsm_state += 2; - return \"\"; - } - return output_return_instruction (operands[0], TRUE, FALSE); - }" - [(set_attr "conds" "use") - (set_attr "length" "12") - (set_attr "type" "load1")] -) - -(define_insn "*cond_return_inverted" - [(set (pc) - (if_then_else (match_operator 0 "arm_comparison_operator" - [(match_operand 1 "cc_register" "") (const_int 0)]) - (pc) - (return)))] - "TARGET_ARM && USE_RETURN_INSN (TRUE)" - "* - { - if (arm_ccfsm_state == 2) - { - arm_ccfsm_state += 2; - return \"\"; - } - return output_return_instruction (operands[0], TRUE, TRUE); - }" - [(set_attr "conds" "use") - (set_attr "length" "12") - (set_attr "type" "load1")] -) - ;; Generate a sequence of instructions to determine if the processor is ;; in 26-bit or 32-bit mode, and return the appropriate return address ;; mask. @@ -10684,20 +10689,27 @@ if (crtl->calls_eh_return) emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2))); if (TARGET_THUMB1) + { thumb1_expand_epilogue (); + emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, + gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE)); + DONE; + } else if (TARGET_THUMB2) { - thumb2_expand_epilogue (); + thumb2_expand_epilogue (false); DONE; } else if (USE_RETURN_INSN (FALSE)) - { - emit_jump_insn (gen_return ()); - DONE; - } - emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, - gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE)); - DONE; + { + arm_expand_return (true); + DONE; + } + else if (TARGET_ARM) + { + arm_expand_epilogue (true); + DONE; + } " ) @@ -10713,31 +10725,23 @@ ;; to add an unspec of the link register to ensure that flow ;; does not think that it is unused by the sibcall branch that ;; will replace the standard function epilogue. -(define_insn "sibcall_epilogue" +(define_expand "sibcall_epilogue" [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE) (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])] "TARGET_32BIT" - "* - if (use_return_insn (FALSE, next_nonnote_insn (insn))) - return output_return_instruction (const_true_rtx, FALSE, FALSE); - return arm_output_epilogue (next_nonnote_insn (insn)); " -;; Length is absolute worst case - [(set_attr "length" "44") - (set_attr "type" "block") - ;; We don't clobber the conditions, but the potential length of this - ;; operation is sufficient to make conditionalizing the sequence - ;; unlikely to be profitable. - (set_attr "conds" "clob")] + if (TARGET_ARM) + arm_expand_epilogue (false); + else if (TARGET_THUMB2) + thumb2_expand_epilogue (true); + DONE; + " ) (define_insn "*epilogue_insns" [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)] - "TARGET_ARM || TARGET_THUMB1" + "TARGET_THUMB1" "* - if (TARGET_32BIT) - return arm_output_epilogue (NULL); - else /* TARGET_THUMB1 */ return thumb1_unexpanded_epilogue (); " ; Length is absolute worst case