@@ -28,7 +28,8 @@ extern int use_return_insn (int, rtx);
extern enum reg_class arm_regno_class (int);
extern void arm_load_pic_register (unsigned long);
extern int arm_volatile_func (void);
-extern const char *arm_output_epilogue (rtx);
+extern void arm_expand_epilogue (bool);
+extern void arm_expand_return (bool);
extern void arm_expand_prologue (void);
extern const char *arm_strip_name_encoding (const char *);
extern void arm_asm_output_labelref (FILE *, const char *);
@@ -181,7 +182,7 @@ extern const char *thumb1_unexpanded_epilogue (void);
extern void thumb1_expand_prologue (void);
extern void thumb1_expand_epilogue (void);
extern const char *thumb1_output_interwork (void);
-extern void thumb2_expand_epilogue (void);
+extern void thumb2_expand_epilogue (bool);
extern void thumb2_output_return (rtx);
extern void thumb2_expand_return (void);
#ifdef TREE_CODE
@@ -13500,86 +13500,6 @@ fp_const_from_val (REAL_VALUE_TYPE *r)
gcc_unreachable ();
}
-/* Output the operands of a LDM/STM instruction to STREAM.
- MASK is the ARM register set mask of which only bits 0-15 are important.
- REG is the base register, either the frame pointer or the stack pointer,
- INSTR is the possibly suffixed load or store instruction.
- RFE is nonzero if the instruction should also copy spsr to cpsr. */
-
-static void
-print_multi_reg (FILE *stream, const char *instr, unsigned reg,
- unsigned long mask, int rfe)
-{
- unsigned i;
- bool not_first = FALSE;
-
- gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
- fputc ('\t', stream);
- asm_fprintf (stream, instr, reg);
- fputc ('{', stream);
-
- for (i = 0; i <= LAST_ARM_REGNUM; i++)
- if (mask & (1 << i))
- {
- if (not_first)
- fprintf (stream, ", ");
-
- asm_fprintf (stream, "%r", i);
- not_first = TRUE;
- }
-
- if (rfe)
- fprintf (stream, "}^\n");
- else
- fprintf (stream, "}\n");
-}
-
-
-/* Output a FLDMD instruction to STREAM.
- BASE if the register containing the address.
- REG and COUNT specify the register range.
- Extra registers may be added to avoid hardware bugs.
-
- We output FLDMD even for ARMv5 VFP implementations. Although
- FLDMD is technically not supported until ARMv6, it is believed
- that all VFP implementations support its use in this context. */
-
-static void
-vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
-{
- int i;
-
- /* Workaround ARM10 VFPr1 bug. */
- if (count == 2 && !arm_arch6)
- {
- if (reg == 15)
- reg--;
- count++;
- }
-
- /* FLDMD may not load more than 16 doubleword registers at a time. Split the
- load into multiple parts if we have to handle more than 16 registers. */
- if (count > 16)
- {
- vfp_output_fldmd (stream, base, reg, 16);
- vfp_output_fldmd (stream, base, reg + 16, count - 16);
- return;
- }
-
- fputc ('\t', stream);
- asm_fprintf (stream, "fldmfdd\t%r!, {", base);
-
- for (i = reg; i < reg + count; i++)
- {
- if (i > reg)
- fputs (", ", stream);
- asm_fprintf (stream, "d%d", i);
- }
- fputs ("}\n", stream);
-
-}
-
-
/* Output the assembly for a store multiple. */
const char *
@@ -15282,10 +15202,7 @@ output_return_instruction (rtx operand, int really_return, int reverse)
{
char conditional[10];
char instr[100];
- unsigned reg;
- unsigned long live_regs_mask;
unsigned long func_type;
- arm_stack_offsets *offsets;
func_type = arm_current_func_type ();
@@ -15312,147 +15229,10 @@ output_return_instruction (rtx operand, int really_return, int reverse)
return "";
}
- gcc_assert (!cfun->calls_alloca || really_return);
-
sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
cfun->machine->return_used_this_function = 1;
- offsets = arm_get_frame_offsets ();
- live_regs_mask = offsets->saved_regs_mask;
-
- if (live_regs_mask)
- {
- const char * return_reg;
-
- /* If we do not have any special requirements for function exit
- (e.g. interworking) then we can load the return address
- directly into the PC. Otherwise we must load it into LR. */
- if (really_return
- && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
- return_reg = reg_names[PC_REGNUM];
- else
- return_reg = reg_names[LR_REGNUM];
-
- if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
- {
- /* There are three possible reasons for the IP register
- being saved. 1) a stack frame was created, in which case
- IP contains the old stack pointer, or 2) an ISR routine
- corrupted it, or 3) it was saved to align the stack on
- iWMMXt. In case 1, restore IP into SP, otherwise just
- restore IP. */
- if (frame_pointer_needed)
- {
- live_regs_mask &= ~ (1 << IP_REGNUM);
- live_regs_mask |= (1 << SP_REGNUM);
- }
- else
- gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
- }
-
- /* On some ARM architectures it is faster to use LDR rather than
- LDM to load a single register. On other architectures, the
- cost is the same. In 26 bit mode, or for exception handlers,
- we have to use LDM to load the PC so that the CPSR is also
- restored. */
- for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
- if (live_regs_mask == (1U << reg))
- break;
-
- if (reg <= LAST_ARM_REGNUM
- && (reg != LR_REGNUM
- || ! really_return
- || ! IS_INTERRUPT (func_type)))
- {
- sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
- (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
- }
- else
- {
- char *p;
- int first = 1;
-
- /* Generate the load multiple instruction to restore the
- registers. Note we can get here, even if
- frame_pointer_needed is true, but only if sp already
- points to the base of the saved core registers. */
- if (live_regs_mask & (1 << SP_REGNUM))
- {
- unsigned HOST_WIDE_INT stack_adjust;
-
- stack_adjust = offsets->outgoing_args - offsets->saved_regs;
- gcc_assert (stack_adjust == 0 || stack_adjust == 4);
-
- if (stack_adjust && arm_arch5 && TARGET_ARM)
- if (TARGET_UNIFIED_ASM)
- sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
- else
- sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
- else
- {
- /* If we can't use ldmib (SA110 bug),
- then try to pop r3 instead. */
- if (stack_adjust)
- live_regs_mask |= 1 << 3;
-
- if (TARGET_UNIFIED_ASM)
- sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
- else
- sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
- }
- }
- else
- if (TARGET_UNIFIED_ASM)
- sprintf (instr, "pop%s\t{", conditional);
- else
- sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
-
- p = instr + strlen (instr);
-
- for (reg = 0; reg <= SP_REGNUM; reg++)
- if (live_regs_mask & (1 << reg))
- {
- int l = strlen (reg_names[reg]);
-
- if (first)
- first = 0;
- else
- {
- memcpy (p, ", ", 2);
- p += 2;
- }
-
- memcpy (p, "%|", 2);
- memcpy (p + 2, reg_names[reg], l);
- p += l + 2;
- }
-
- if (live_regs_mask & (1 << LR_REGNUM))
- {
- sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
- /* If returning from an interrupt, restore the CPSR. */
- if (IS_INTERRUPT (func_type))
- strcat (p, "^");
- }
- else
- strcpy (p, "}");
- }
-
- output_asm_insn (instr, & operand);
-
- /* See if we need to generate an extra instruction to
- perform the actual function return. */
- if (really_return
- && func_type != ARM_FT_INTERWORKED
- && (live_regs_mask & (1 << LR_REGNUM)) != 0)
- {
- /* The return has already been handled
- by loading the LR into the PC. */
- really_return = 0;
- }
- }
-
if (really_return)
{
switch ((int) ARM_FUNC_TYPE (func_type))
@@ -15591,451 +15371,6 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
}
-const char *
-arm_output_epilogue (rtx sibling)
-{
- int reg;
- unsigned long saved_regs_mask;
- unsigned long func_type;
- /* Floats_offset is the offset from the "virtual" frame. In an APCS
- frame that is $fp + 4 for a non-variadic function. */
- int floats_offset = 0;
- rtx operands[3];
- FILE * f = asm_out_file;
- unsigned int lrm_count = 0;
- int really_return = (sibling == NULL);
- int start_reg;
- arm_stack_offsets *offsets;
-
- /* If we have already generated the return instruction
- then it is futile to generate anything else. */
- if (use_return_insn (FALSE, sibling) &&
- (cfun->machine->return_used_this_function != 0))
- return "";
-
- func_type = arm_current_func_type ();
-
- if (IS_NAKED (func_type))
- /* Naked functions don't have epilogues. */
- return "";
-
- if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
- {
- rtx op;
-
- /* A volatile function should never return. Call abort. */
- op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
- assemble_external_libcall (op);
- output_asm_insn ("bl\t%a0", &op);
-
- return "";
- }
-
- /* If we are throwing an exception, then we really must be doing a
- return, so we can't tail-call. */
- gcc_assert (!crtl->calls_eh_return || really_return);
-
- offsets = arm_get_frame_offsets ();
- saved_regs_mask = offsets->saved_regs_mask;
-
- if (TARGET_IWMMXT)
- lrm_count = bit_count (saved_regs_mask);
-
- floats_offset = offsets->saved_args;
- /* Compute how far away the floats will be. */
- for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
- if (saved_regs_mask & (1 << reg))
- floats_offset += 4;
-
- if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
- {
- /* This variable is for the Virtual Frame Pointer, not VFP regs. */
- int vfp_offset = offsets->frame;
-
- if (TARGET_FPA_EMU2)
- {
- for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
- if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
- {
- floats_offset += 12;
- asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
- reg, FP_REGNUM, floats_offset - vfp_offset);
- }
- }
- else
- {
- start_reg = LAST_FPA_REGNUM;
-
- for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
- {
- if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
- {
- floats_offset += 12;
-
- /* We can't unstack more than four registers at once. */
- if (start_reg - reg == 3)
- {
- asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
- reg, FP_REGNUM, floats_offset - vfp_offset);
- start_reg = reg - 1;
- }
- }
- else
- {
- if (reg != start_reg)
- asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
- reg + 1, start_reg - reg,
- FP_REGNUM, floats_offset - vfp_offset);
- start_reg = reg - 1;
- }
- }
-
- /* Just in case the last register checked also needs unstacking. */
- if (reg != start_reg)
- asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
- reg + 1, start_reg - reg,
- FP_REGNUM, floats_offset - vfp_offset);
- }
-
- if (TARGET_HARD_FLOAT && TARGET_VFP)
- {
- int saved_size;
-
- /* The fldmd insns do not have base+offset addressing
- modes, so we use IP to hold the address. */
- saved_size = arm_get_vfp_saved_size ();
-
- if (saved_size > 0)
- {
- floats_offset += saved_size;
- asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
- FP_REGNUM, floats_offset - vfp_offset);
- }
- start_reg = FIRST_VFP_REGNUM;
- for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
- {
- if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
- && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
- {
- if (start_reg != reg)
- vfp_output_fldmd (f, IP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
- start_reg = reg + 2;
- }
- }
- if (start_reg != reg)
- vfp_output_fldmd (f, IP_REGNUM,
- (start_reg - FIRST_VFP_REGNUM) / 2,
- (reg - start_reg) / 2);
- }
-
- if (TARGET_IWMMXT)
- {
- /* The frame pointer is guaranteed to be non-double-word aligned.
- This is because it is set to (old_stack_pointer - 4) and the
- old_stack_pointer was double word aligned. Thus the offset to
- the iWMMXt registers to be loaded must also be non-double-word
- sized, so that the resultant address *is* double-word aligned.
- We can ignore floats_offset since that was already included in
- the live_regs_mask. */
- lrm_count += (lrm_count % 2 ? 2 : 1);
-
- for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
- if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
- {
- asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
- reg, FP_REGNUM, lrm_count * 4);
- lrm_count += 2;
- }
- }
-
- /* saved_regs_mask should contain the IP, which at the time of stack
- frame generation actually contains the old stack pointer. So a
- quick way to unwind the stack is just pop the IP register directly
- into the stack pointer. */
- gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
- saved_regs_mask &= ~ (1 << IP_REGNUM);
- saved_regs_mask |= (1 << SP_REGNUM);
-
- /* There are two registers left in saved_regs_mask - LR and PC. We
- only need to restore the LR register (the return address), but to
- save time we can load it directly into the PC, unless we need a
- special function exit sequence, or we are not really returning. */
- if (really_return
- && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
- && !crtl->calls_eh_return)
- /* Delete the LR from the register mask, so that the LR on
- the stack is loaded into the PC in the register mask. */
- saved_regs_mask &= ~ (1 << LR_REGNUM);
- else
- saved_regs_mask &= ~ (1 << PC_REGNUM);
-
- /* We must use SP as the base register, because SP is one of the
- registers being restored. If an interrupt or page fault
- happens in the ldm instruction, the SP might or might not
- have been restored. That would be bad, as then SP will no
- longer indicate the safe area of stack, and we can get stack
- corruption. Using SP as the base register means that it will
- be reset correctly to the original value, should an interrupt
- occur. If the stack pointer already points at the right
- place, then omit the subtraction. */
- if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
- || cfun->calls_alloca)
- asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
- 4 * bit_count (saved_regs_mask));
- print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
-
- if (IS_INTERRUPT (func_type))
- /* Interrupt handlers will have pushed the
- IP onto the stack, so restore it now. */
- print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
- }
- else
- {
- /* This branch is executed for ARM mode (non-apcs frames) and
- Thumb-2 mode. Frame layout is essentially the same for those
- cases, except that in ARM mode frame pointer points to the
- first saved register, while in Thumb-2 mode the frame pointer points
- to the last saved register.
-
- It is possible to make frame pointer point to last saved
- register in both cases, and remove some conditionals below.
- That means that fp setup in prologue would be just "mov fp, sp"
- and sp restore in epilogue would be just "mov sp, fp", whereas
- now we have to use add/sub in those cases. However, the value
- of that would be marginal, as both mov and add/sub are 32-bit
- in ARM mode, and it would require extra conditionals
- in arm_expand_prologue to distingish ARM-apcs-frame case
- (where frame pointer is required to point at first register)
- and ARM-non-apcs-frame. Therefore, such change is postponed
- until real need arise. */
- unsigned HOST_WIDE_INT amount;
- int rfe;
- /* Restore stack pointer if necessary. */
- if (TARGET_ARM && frame_pointer_needed)
- {
- operands[0] = stack_pointer_rtx;
- operands[1] = hard_frame_pointer_rtx;
-
- operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
- output_add_immediate (operands);
- }
- else
- {
- if (frame_pointer_needed)
- {
- /* For Thumb-2 restore sp from the frame pointer.
- Operand restrictions mean we have to incrememnt FP, then copy
- to SP. */
- amount = offsets->locals_base - offsets->saved_regs;
- operands[0] = hard_frame_pointer_rtx;
- }
- else
- {
- unsigned long count;
- operands[0] = stack_pointer_rtx;
- amount = offsets->outgoing_args - offsets->saved_regs;
- /* pop call clobbered registers if it avoids a
- separate stack adjustment. */
- count = offsets->saved_regs - offsets->saved_args;
- if (optimize_size
- && count != 0
- && !crtl->calls_eh_return
- && bit_count(saved_regs_mask) * 4 == count
- && !IS_INTERRUPT (func_type)
- && !IS_STACKALIGN (func_type)
- && !crtl->tail_call_emit)
- {
- unsigned long mask;
- /* Preserve return values, of any size. */
- mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
- mask ^= 0xf;
- mask &= ~saved_regs_mask;
- reg = 0;
- while (bit_count (mask) * 4 > amount)
- {
- while ((mask & (1 << reg)) == 0)
- reg++;
- mask &= ~(1 << reg);
- }
- if (bit_count (mask) * 4 == amount) {
- amount = 0;
- saved_regs_mask |= mask;
- }
- }
- }
-
- if (amount)
- {
- operands[1] = operands[0];
- operands[2] = GEN_INT (amount);
- output_add_immediate (operands);
- }
- if (frame_pointer_needed)
- asm_fprintf (f, "\tmov\t%r, %r\n",
- SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
- }
-
- if (TARGET_FPA_EMU2)
- {
- for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
- if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
- asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
- reg, SP_REGNUM);
- }
- else
- {
- start_reg = FIRST_FPA_REGNUM;
-
- for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
- {
- if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
- {
- if (reg - start_reg == 3)
- {
- asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
- start_reg, SP_REGNUM);
- start_reg = reg + 1;
- }
- }
- else
- {
- if (reg != start_reg)
- asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
- start_reg, reg - start_reg,
- SP_REGNUM);
-
- start_reg = reg + 1;
- }
- }
-
- /* Just in case the last register checked also needs unstacking. */
- if (reg != start_reg)
- asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
- start_reg, reg - start_reg, SP_REGNUM);
- }
-
- if (TARGET_HARD_FLOAT && TARGET_VFP)
- {
- int end_reg = LAST_VFP_REGNUM + 1;
-
- /* Scan the registers in reverse order. We need to match
- any groupings made in the prologue and generate matching
- pop operations. */
- for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
- {
- if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
- && (!df_regs_ever_live_p (reg + 1)
- || call_used_regs[reg + 1]))
- {
- if (end_reg > reg + 2)
- vfp_output_fldmd (f, SP_REGNUM,
- (reg + 2 - FIRST_VFP_REGNUM) / 2,
- (end_reg - (reg + 2)) / 2);
- end_reg = reg;
- }
- }
- if (end_reg > reg + 2)
- vfp_output_fldmd (f, SP_REGNUM, 0,
- (end_reg - (reg + 2)) / 2);
- }
-
- if (TARGET_IWMMXT)
- for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
- if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
- asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
-
- /* If we can, restore the LR into the PC. */
- if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
- && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
- && !IS_STACKALIGN (func_type)
- && really_return
- && crtl->args.pretend_args_size == 0
- && saved_regs_mask & (1 << LR_REGNUM)
- && !crtl->calls_eh_return)
- {
- saved_regs_mask &= ~ (1 << LR_REGNUM);
- saved_regs_mask |= (1 << PC_REGNUM);
- rfe = IS_INTERRUPT (func_type);
- }
- else
- rfe = 0;
-
- /* Load the registers off the stack. If we only have one register
- to load use the LDR instruction - it is faster. For Thumb-2
- always use pop and the assembler will pick the best instruction.*/
- if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
- && !IS_INTERRUPT(func_type))
- {
- asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
- }
- else if (saved_regs_mask)
- {
- if (saved_regs_mask & (1 << SP_REGNUM))
- /* Note - write back to the stack register is not enabled
- (i.e. "ldmfd sp!..."). We know that the stack pointer is
- in the list of registers and if we add writeback the
- instruction becomes UNPREDICTABLE. */
- print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
- rfe);
- else if (TARGET_ARM)
- print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
- rfe);
- else
- print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
- }
-
- if (crtl->args.pretend_args_size)
- {
- /* Unwind the pre-pushed regs. */
- operands[0] = operands[1] = stack_pointer_rtx;
- operands[2] = GEN_INT (crtl->args.pretend_args_size);
- output_add_immediate (operands);
- }
- }
-
- /* We may have already restored PC directly from the stack. */
- if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
- return "";
-
- /* Stack adjustment for exception handler. */
- if (crtl->calls_eh_return)
- asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
- ARM_EH_STACKADJ_REGNUM);
-
- /* Generate the return instruction. */
- switch ((int) ARM_FUNC_TYPE (func_type))
- {
- case ARM_FT_ISR:
- case ARM_FT_FIQ:
- asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
- break;
-
- case ARM_FT_EXCEPTION:
- asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
- break;
-
- case ARM_FT_INTERWORKED:
- asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
- break;
-
- default:
- if (IS_STACKALIGN (func_type))
- {
- /* See comment in arm_expand_prologue. */
- asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
- }
- if (arm_arch5 || arm_arch4t)
- asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
- else
- asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
- break;
- }
-
- return "";
-}
-
static void
arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
@@ -16228,13 +15563,9 @@ emit_multi_reg_push (unsigned long mask)
Unfortunately, since this insn does not reflect very well the actual
semantics of the operation, we need to annotate the insn for the benefit
- of DWARF2 frame unwind information.
-
- There's no reason why this couldn't be used for Thumb-1 or ARM, in theory,
- but currently the pattern that matches this in the MD file is only enabled
- for Thumb-2. */
+ of DWARF2 frame unwind information. */
static void
-thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
+arm_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
{
int num_regs = 0;
int i, j;
@@ -16303,7 +15634,7 @@ thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return)
semantics of the operation, we need to annotate the insn for the benefit
of DWARF2 frame unwind information. */
static void
-thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
+arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
{
int i, j;
rtx par;
@@ -16314,8 +15645,8 @@ thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
if (num_regs > 16)
{
- thumb2_emit_vfp_multi_reg_pop (first_reg, 16);
- thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16);
+ arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
+ arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
return;
}
@@ -16326,8 +15657,8 @@ thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
/* Increment the stack pointer, based on there being
num_regs 8-byte registers to restore. */
tmp = gen_rtx_SET (VOIDmode,
- stack_pointer_rtx,
- plus_constant (stack_pointer_rtx, 8 * num_regs));
+ base_reg,
+ plus_constant (base_reg, 8 * num_regs));
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (par, 0, 0) = tmp;
@@ -16340,8 +15671,7 @@ thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs)
reg,
gen_frame_mem
(DFmode,
- plus_constant (stack_pointer_rtx,
- 8 * j)));
+ plus_constant (base_reg, 8 * j)));
RTX_FRAME_RELATED_P (tmp) = 1;
XVECEXP (par, 0, j + 1) = tmp;
@@ -22510,6 +21840,139 @@ thumb1_expand_epilogue (void)
/* Generate pattern *pop_multiple_with_stack_update_and_return if single
POP instruction can be generated. LR should be replaced by PC. All
the checks required are already done by USE_RETURN_INSN (). Hence,
+ all we need to do here is generate multi-reg pop and let return pattern
+ handle other instructions to be generated, if any. */
+void
+arm_expand_return (bool really_return)
+{
+ int i, num_regs, return_reg;
+ unsigned long saved_regs_mask, func_type;
+ arm_stack_offsets *offsets;
+ unsigned HOST_WIDE_INT stack_adjust;
+
+ func_type = arm_current_func_type ();
+
+ if (IS_NAKED (func_type)
+ || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+ {
+ emit_jump_insn (ret_rtx);
+ return;
+ }
+
+ gcc_assert (!cfun->calls_alloca || really_return);
+
+ offsets = arm_get_frame_offsets ();
+ saved_regs_mask = offsets->saved_regs_mask;
+ for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ num_regs++;
+
+ if (saved_regs_mask)
+ {
+ /* If we do not have any special requirements for function exit (e.g.
+ interworking), then we can load the return address directly into
+ the PC. Otherwise we must load it into LR. */
+ if (really_return
+ && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK)
+ && (saved_regs_mask & (1 << LR_REGNUM)))
+ return_reg = PC_REGNUM;
+ else
+ return_reg = LR_REGNUM;
+
+ if (saved_regs_mask & (1 << IP_REGNUM))
+ {
+ /* There are three possible reasons for the IP register being saved.
+ 1) a stack frame was created, in which case IP contains the old
+ stack pointer, or
+ 2) an ISR routine corrupted it, or
+ 3) it was saved to align the stack on iWMMXt.
+ In case 1, restore IP into SP, otherwise just restore IP. */
+ if (frame_pointer_needed)
+ {
+ saved_regs_mask &= ~ (1 << IP_REGNUM);
+ saved_regs_mask |= (1 << SP_REGNUM);
+ }
+ else
+ {
+ gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
+ }
+ }
+
+ for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
+ /* On some ARM architectures it is faster to use LDR rather than LDM
+ to load a single register. So identify single-register-restore
+ case. */
+ if (saved_regs_mask == (1 << i))
+ break;
+
+ if (i <= LAST_ARM_REGNUM
+ && (i != LR_REGNUM
+ || ! really_return
+ || ! IS_INTERRUPT (func_type)))
+ {
+ rtx addr;
+ rtx reg;
+
+ reg = gen_rtx_REG (SImode, ((i == LR_REGNUM) ? return_reg : i));
+ if (saved_regs_mask & (1 << SP_REGNUM))
+ addr = gen_rtx_MEM (SImode, stack_pointer_rtx);
+ else
+ addr = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+
+ set_mem_alias_set (addr, get_frame_alias_set ());
+
+ if (saved_regs_mask & (1 << PC_REGNUM) || return_reg == PC_REGNUM)
+ {
+ rtx insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+ XVECEXP (insn, 0, 0) = ret_rtx;
+ XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode, reg, addr);
+ RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
+ emit_jump_insn (insn);
+ }
+ else
+ {
+ rtx insn = gen_rtx_SET (SImode, reg, addr);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ emit_insn (insn);
+ }
+ }
+ else
+ {
+ if (saved_regs_mask & (1 << SP_REGNUM))
+ {
+ stack_adjust = offsets->outgoing_args - offsets->saved_regs;
+ gcc_assert (stack_adjust == 0 || stack_adjust == 4);
+
+ if (stack_adjust && !(arm_arch5 && TARGET_ARM))
+ saved_regs_mask |= 1 << 3;
+ }
+
+ arm_emit_multi_reg_pop (((saved_regs_mask & (~ (1 << LR_REGNUM)))
+ | (1 << return_reg)),
+ (saved_regs_mask & (1 << PC_REGNUM)
+ || return_reg == PC_REGNUM));
+ }
+
+ /* Check if we need to generate extra instruction to perform actual
+ function return. */
+ if (really_return
+ && func_type != ARM_FT_INTERWORKED
+ && (saved_regs_mask & (1 << LR_REGNUM)) != 0)
+ /* The return has already been handled by loading LR in PC. */
+ really_return = false;
+ }
+
+ /* output_return_instruction () will take care of actual instruction to be
+ emited to return from function. */
+ if (really_return)
+ emit_jump_insn (ret_rtx);
+}
+
+/* Generate pattern *pop_multiple_with_stack_update_and_return if single
+ POP instruction can be generated. LR should be replaced by PC. All
+ the checks required are already done by USE_RETURN_INSN (). Hence,
all we really need to check here is if single register is to be
returned, or multiple register return. */
void
@@ -22544,7 +22007,7 @@ thumb2_expand_return (void)
{
saved_regs_mask &= ~ (1 << LR_REGNUM);
saved_regs_mask |= (1 << PC_REGNUM);
- thumb2_emit_multi_reg_pop (saved_regs_mask, true);
+ arm_emit_multi_reg_pop (saved_regs_mask, true);
}
}
else
@@ -22553,13 +22016,405 @@ thumb2_expand_return (void)
}
}
+/* Generate RTL to represent ARM epilogue. Really_return is true if the
+ function is not a sibcall. */
+void
+arm_expand_epilogue (bool really_return)
+{
+ unsigned long func_type;
+ unsigned long saved_regs_mask;
+ int num_regs = 0;
+ int i;
+ int amount;
+ int floats_from_frame = 0;
+ arm_stack_offsets *offsets;
+
+ func_type = arm_current_func_type ();
+
+ /* Naked functions don't have epilogue. Hence, generate return pattern, and
+ let output_return_instruction take care of instruction emition if any. */
+ if (IS_NAKED (func_type)
+ || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+ {
+ emit_jump_insn (ret_rtx);
+ return;
+ }
+
+ gcc_assert (!crtl->calls_eh_return || really_return);
+
+ /* Get frame offsets for ARM. */
+ offsets = arm_get_frame_offsets ();
+ saved_regs_mask = offsets->saved_regs_mask;
+
+ /* Find offset of floating point register from frame pointer.
+ The initialization is done in this way to take care of frame pointer and
+ static-chain register, if stored. */
+ floats_from_frame = offsets->saved_regs - offsets->frame;
+
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ {
+ num_regs++;
+ floats_from_frame += 4;
+ }
+
+ if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+ {
+ if (TARGET_FPA_EMU2)
+ {
+ for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--)
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ rtx addr;
+ floats_from_frame += 12;
+ addr = gen_rtx_MEM (XFmode,
+ gen_rtx_PLUS (SImode,
+ hard_frame_pointer_rtx,
+ GEN_INT (- floats_from_frame)));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ emit_insn (gen_rtx_SET (XFmode,
+ gen_rtx_REG (XFmode, i),
+ addr));
+ }
+ }
+ else
+ {
+ int idx = 0;
+ rtx load_seq[4];
+
+ for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--)
+ {
+ floats_from_frame += 12;
+
+ if (idx == 4)
+ {
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx, load_seq)));
+ idx = 0;
+ }
+
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ load_seq[idx] = gen_rtx_SET (VOIDmode,
+ gen_rtx_REG (XFmode, i),
+ gen_frame_mem (XFmode,
+ plus_constant (hard_frame_pointer_rtx,
+ - floats_from_frame)));
+ idx++;
+ }
+ else
+ {
+ if (idx)
+ {
+ /* Create parallel and emit. */
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx, load_seq)));
+ idx = 0;
+ }
+ }
+ }
+
+ if (idx)
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx, load_seq)));
+ }
+
+ if (TARGET_HARD_FLOAT && TARGET_VFP)
+ {
+ int start_reg;
+ /* The offset is from IP_REGNUM. */
+ emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
+ hard_frame_pointer_rtx,
+ GEN_INT (- floats_from_frame)));
+
+ /* Generate VFP register multi-pop. */
+ start_reg = FIRST_VFP_REGNUM;
+
+ for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
+ /* Look for a case where a reg does not need restoring. */
+ if ((!df_regs_ever_live_p (i) || call_used_regs[i])
+ && (!df_regs_ever_live_p (i + 1)
+ || call_used_regs[i + 1]))
+ {
+ if (start_reg != i)
+ arm_emit_vfp_multi_reg_pop (start_reg,
+ ((i - start_reg)) / 2,
+ gen_rtx_REG (SImode,
+ IP_REGNUM));
+ start_reg = i;
+ }
+
+ /* Restore the remaining regs that we have discovered (or possibly
+ even all of them, if the conditional in the for loop never
+ fired). */
+ if (start_reg != i)
+ arm_emit_vfp_multi_reg_pop (start_reg,
+ (i - start_reg) / 2,
+ gen_rtx_REG (SImode, IP_REGNUM));
+ }
+
+ if (TARGET_IWMMXT)
+ {
+ /* The frame pointer is guaranteed to be non-double-word aligned, as
+ it is set to double-word-aligned old_stack_pointer - 4. */
+ int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
+ for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ rtx addr = gen_frame_mem (V2SImode,
+ plus_constant (hard_frame_pointer_rtx,
+ - lrm_count * 4));
+ emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
+ lrm_count += 2;
+ }
+ }
+
+ /* saved_regs_mask should contain the IP which contains old stack pointer
+ at the time of activation creation. To unwind stack quickly, pop IP
+ in SP. */
+ gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
+ saved_regs_mask &= ~ (1 << IP_REGNUM);
+ saved_regs_mask |= (1 << SP_REGNUM);
+
+ if (really_return
+ && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+ && !crtl->calls_eh_return)
+ saved_regs_mask &= ~ (1 << LR_REGNUM);
+ else
+ saved_regs_mask &= ~ (1 << PC_REGNUM);
+
+ if (offsets->outgoing_args != (1 + num_regs)
+ || cfun->calls_alloca)
+ /* Unwind the stack till saved registers. */
+ gen_addsi3 (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (- 4 * num_regs));
+
+ arm_emit_multi_reg_pop (saved_regs_mask, really_return);
+
+ if (IS_INTERRUPT (func_type))
+ {
+ rtx addr = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
+ }
+
+ if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
+ return;
+
+ if (crtl->calls_eh_return)
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (ARM_EH_STACKADJ_REGNUM)));
+
+ emit_jump_insn (ret_rtx);
+ }
+ else
+ {
+ if (frame_pointer_needed) /* Handles case for TARGET_ARM only. */
+ {
+ /* In ARM mode, frame pointer points to first saved register.
+ Restore stack pointer to last saved register. */
+ emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (offsets->frame - offsets->saved_regs)));
+ emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+ /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
+ deleted. */
+ emit_insn (gen_prologue_use (stack_pointer_rtx));
+ }
+ else
+ {
+ /* Pop off outgoing args and local frame to adjust stack pointer to
+ last saved register. */
+ amount = offsets->outgoing_args - offsets->saved_regs;
+ if (amount)
+ {
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (amount)));
+ /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
+ not deleted. */
+ emit_insn (gen_prologue_use (stack_pointer_rtx));
+ }
+ }
+
+ if (TARGET_FPA_EMU2)
+ {
+ for (i = FIRST_FPA_REGNUM; i <= LAST_FPA_REGNUM; i++)
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ /* Generate memory reference with write-back to SP. */
+ rtx addr = gen_rtx_MEM (XFmode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ emit_insn (gen_movxf (gen_rtx_REG (XFmode, i), addr));
+ }
+ }
+ else
+ {
+ int idx = 0;
+ rtx load_seq[5];
+
+ for (i = FIRST_FPA_REGNUM; i <= LAST_FPA_REGNUM; i++)
+ {
+ if (idx == 4)
+ {
+ load_seq[0] = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ 12 * idx));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx + 1, load_seq)));
+ idx = 0;
+ }
+
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ load_seq[idx + 1] = gen_rtx_SET (VOIDmode,
+ gen_rtx_REG (XFmode, i),
+ gen_frame_mem (XFmode,
+ plus_constant (stack_pointer_rtx,
+ 12 * idx)));
+ idx++;
+ }
+ else
+ {
+ if (idx)
+ {
+ /* Create parallel and emit. */
+ load_seq[0] = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ 12 * idx));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx + 1,
+ load_seq)));
+ idx = 0;
+ }
+ }
+ }
+
+ if (idx)
+ {
+ load_seq[0] = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ 12 * idx));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (idx + 1, load_seq)));
+ idx = 0;
+ }
+ }
+
+ if (TARGET_HARD_FLOAT && TARGET_VFP)
+ {
+ /* Generate VFP register multi-pop. */
+ int end_reg = LAST_VFP_REGNUM + 1;
+
+ /* Scan the registers in reverse order. We need to match
+ any groupings made in the prologue and generate matching
+ fldmdd operations. The need to match groups is because,
+ unlike pop, fldmdd can only do consecutive regs. */
+ for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
+ /* Look for a case where a reg does not need restoring. */
+ if ((!df_regs_ever_live_p (i) || call_used_regs[i])
+ && (!df_regs_ever_live_p (i + 1)
+ || call_used_regs[i + 1]))
+ {
+ /* Restore the regs discovered so far (from reg+2 to
+ end_reg). */
+ if (end_reg > i + 2)
+ arm_emit_vfp_multi_reg_pop (i + 2,
+ (end_reg - (i + 2)) / 2,
+ stack_pointer_rtx);
+ end_reg = i;
+ }
+
+ /* Restore the remaining regs that we have discovered (or possibly
+ even all of them, if the conditional in the for loop never
+ fired). */
+ if (end_reg > i + 2)
+ arm_emit_vfp_multi_reg_pop (i + 2,
+ (end_reg - (i + 2)) / 2,
+ stack_pointer_rtx);
+ }
+
+ if (TARGET_IWMMXT)
+ for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
+ if (df_regs_ever_live_p (i) && !call_used_regs[i])
+ {
+ rtx addr = gen_rtx_MEM (V2SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
+ }
+
+ if (saved_regs_mask)
+ {
+ bool return_in_pc = false;
+
+ if (num_regs == 1)
+ {
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ {
+ rtx addr = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (addr, get_frame_alias_set ());
+ emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr));
+ }
+ }
+ else
+ {
+ if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
+ && TARGET_ARM
+ && !IS_STACKALIGN (func_type)
+ && really_return
+ && crtl->args.pretend_args_size == 0
+ && saved_regs_mask & (1 << LR_REGNUM)
+ && !crtl->calls_eh_return)
+ {
+ saved_regs_mask &= ~ (1 << LR_REGNUM);
+ saved_regs_mask |= (1 << PC_REGNUM);
+ return_in_pc = true;
+ }
+
+ arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc);
+ if (return_in_pc == true)
+ return;
+ }
+ }
+
+ if (crtl->args.pretend_args_size)
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (crtl->args.pretend_args_size)));
+
+ if (!really_return)
+ return;
+
+ if (crtl->calls_eh_return)
+ emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (ARM_EH_STACKADJ_REGNUM)));
+
+ emit_jump_insn (ret_rtx);
+ }
+}
+
/* Generate RTL to represent a Thumb-2 epilogue.
Note that this RTL does not include the
Return insn, which is created separately and
handled in thumb2_output_return. */
void
-thumb2_expand_epilogue (void)
+thumb2_expand_epilogue (bool is_sibling)
{
HOST_WIDE_INT amount;
int reg;
@@ -22628,20 +22483,23 @@ thumb2_expand_epilogue (void)
{
/* Restore the regs discovered so far (from reg+2 to end_reg). */
if (end_reg > reg + 2)
- thumb2_emit_vfp_multi_reg_pop (reg + 2,
- (end_reg - (reg + 2)) / 2);
+ arm_emit_vfp_multi_reg_pop (reg + 2,
+ (end_reg - (reg + 2)) / 2,
+ stack_pointer_rtx);
end_reg = reg;
}
/* Restore the remaining regs that we have discovered (or possibly
even all of them, if the conditional in the for loop never fired). */
if (end_reg > reg + 2)
- thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2);
+ arm_emit_vfp_multi_reg_pop (reg + 2,
+ (end_reg - (reg + 2)) / 2,
+ stack_pointer_rtx);
}
/* iWMMXt is not supported when Thumb-2 in use. If it were, we would
want to be restoring the appropriate iWMMXt regs here, in a similar
- way to arm_output_epilogue. */
+ way to arm_expand_epilogue. */
/* If there are registers to restore, make it happen. */
if (saved_regs_mask)
@@ -22667,6 +22525,7 @@ thumb2_expand_epilogue (void)
return-address' instruction. Instead, pop LR in PC. */
if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
&& !IS_STACKALIGN (func_type)
+ && !is_sibling
&& crtl->args.pretend_args_size == 0
&& saved_regs_mask & (1 << LR_REGNUM)
&& !crtl->calls_eh_return)
@@ -22676,7 +22535,7 @@ thumb2_expand_epilogue (void)
really_return = true;
}
- thumb2_emit_multi_reg_pop (saved_regs_mask, really_return);
+ arm_emit_multi_reg_pop (saved_regs_mask, really_return);
if (really_return == true)
return;
}
@@ -22688,6 +22547,9 @@ thumb2_expand_epilogue (void)
stack_pointer_rtx,
GEN_INT (crtl->args.pretend_args_size)));
+ if (is_sibling)
+ return;
+
/* Stack adjustment for exception handler. */
if (crtl->calls_eh_return)
emit_insn (gen_addsi3 (stack_pointer_rtx,
@@ -6682,26 +6682,43 @@
(plus:SI (match_dup 1)
(match_operand:SI 2 "const_int_operand" "I")))
])]
- "TARGET_THUMB2"
+ "TARGET_32BIT"
"*
{
int i;
char pattern[100];
int num_saves = XVECLEN (operands[0], 0);
- if (REGNO (operands[1]) == SP_REGNUM)
+ if (TARGET_THUMB2)
{
- strcpy (pattern, \"pop\\t{\");
+ if (REGNO (operands[1]) == SP_REGNUM)
+ {
+ strcpy (pattern, \"pop%?\\t{\");
+ }
+ else
+ {
+ strcpy (pattern, \"ldm%(ia%)\\t\");
+ strcat (pattern, reg_names[REGNO (operands[1])]);
+ strcat (pattern, \"!, {\");
+ }
}
else
{
- strcpy (pattern, \"ldm%(ia%)\\t\");
+ strcpy (pattern, \"ldm%(fd%)\\t\");
strcat (pattern, reg_names[REGNO (operands[1])]);
- strcat (pattern, \"!, {\");
+
+ for (i = 2; i < num_saves; i++)
+ if (REGNO (XEXP (XVECEXP (operands[0], 0, i), 0)) == SP_REGNUM)
+ break;
+
+ if (i != num_saves)
+ strcat (pattern, \", {\");
+ else
+ strcat (pattern, \"!, {\");
}
strcat (pattern,
- reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
+ reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]);
/* Skip over the first element and the one we just generated. */
for (i = 2; i < (num_saves); i++)
@@ -6717,7 +6734,8 @@
return \"\";
}
"
- [(set_attr "type" "load4")]
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")]
)
;; Pop with return (as used in epilogue RTL)
@@ -6733,14 +6751,31 @@
(plus:SI (match_dup 1)
(match_operand:SI 2 "const_int_operand" "I")))
])]
- "TARGET_THUMB2"
+ "TARGET_32BIT"
"*
{
int i;
char pattern[100];
int num_saves = XVECLEN (operands[0], 0);
- strcpy (pattern, \"pop\\t{\");
+ if (TARGET_THUMB2)
+ {
+ strcpy (pattern, \"pop%?\\t{\");
+ }
+ else
+ {
+ strcpy (pattern, \"ldm%(fd%)\\t\");
+ strcat (pattern, reg_names[REGNO (operands[1])]);
+ for (i = 3; i < num_saves; i++)
+ if (REGNO (XEXP (XVECEXP (operands[0], 0, i), 0)) == SP_REGNUM)
+ break;
+
+ if (i != num_saves)
+ strcat (pattern, \", {\");
+ else
+ strcat (pattern, \"!, {\");
+ }
+
strcat (pattern,
reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]);
@@ -6753,19 +6788,24 @@
}
strcat (pattern, \"}\");
+
+ if (IS_INTERRUPT (arm_current_func_type ()))
+ strcat (pattern, \"^\");
+
output_asm_insn (pattern, operands);
return \"\";
}
"
- [(set_attr "type" "load4")]
+ [(set_attr "type" "load4")
+ (set_attr "predicable" "yes")]
)
-(define_insn "*thumb2_ldr_with_return"
+(define_insn "*ldr_with_return"
[(return)
(set (reg:SI PC_REGNUM)
(mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))]
- "TARGET_THUMB2"
+ "TARGET_32BIT"
"ldr%?\t%|pc, [%0], #4"
[(set_attr "type" "load1")
(set_attr "predicable" "yes")]
@@ -6773,12 +6813,12 @@
(define_insn "*vfp_pop_multiple_with_stack_update"
[(match_parallel 0 "load_multiple_operation_fp"
- [(set (match_operand:SI 1 "s_register_operand" "+k")
+ [(set (match_operand:SI 1 "s_register_operand" "+rk")
(plus:SI (match_dup 1)
(match_operand:SI 2 "const_int_operand" "I")))
(set (match_operand:DF 3 "arm_hard_register_operand" "")
(mem:DF (match_dup 1)))])]
- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+ "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"*
{
int num_regs = XVECLEN (operands[0], 0);
@@ -6811,7 +6851,9 @@
return \"\";
}
"
- [(set_attr "type" "load4")]
+ [(set_attr "type" "load4")
+ (set_attr "conds" "unconditional")
+ (set_attr "predicable" "no")]
)
(define_expand "store_multiple"
@@ -8642,13 +8684,18 @@
thumb2_expand_return ();
DONE;
}
+ else if (TARGET_ARM)
+ {
+ arm_expand_return (true);
+ DONE;
+ }
}
")
;; Often the return insn will be the same as loading from memory, so set attr
(define_insn "*arm_return"
[(return)]
- "TARGET_ARM && USE_RETURN_INSN (FALSE)"
+ "TARGET_ARM"
"*
{
if (arm_ccfsm_state == 2)
@@ -8658,53 +8705,11 @@
}
return output_return_instruction (const_true_rtx, TRUE, FALSE);
}"
- [(set_attr "type" "load1")
- (set_attr "length" "12")
+ [(set_attr "type" "branch")
+ (set_attr "length" "4")
(set_attr "predicable" "yes")]
)
-(define_insn "*cond_return"
- [(set (pc)
- (if_then_else (match_operator 0 "arm_comparison_operator"
- [(match_operand 1 "cc_register" "") (const_int 0)])
- (return)
- (pc)))]
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
- "*
- {
- if (arm_ccfsm_state == 2)
- {
- arm_ccfsm_state += 2;
- return \"\";
- }
- return output_return_instruction (operands[0], TRUE, FALSE);
- }"
- [(set_attr "conds" "use")
- (set_attr "length" "12")
- (set_attr "type" "load1")]
-)
-
-(define_insn "*cond_return_inverted"
- [(set (pc)
- (if_then_else (match_operator 0 "arm_comparison_operator"
- [(match_operand 1 "cc_register" "") (const_int 0)])
- (pc)
- (return)))]
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
- "*
- {
- if (arm_ccfsm_state == 2)
- {
- arm_ccfsm_state += 2;
- return \"\";
- }
- return output_return_instruction (operands[0], TRUE, TRUE);
- }"
- [(set_attr "conds" "use")
- (set_attr "length" "12")
- (set_attr "type" "load1")]
-)
-
;; Generate a sequence of instructions to determine if the processor is
;; in 26-bit or 32-bit mode, and return the appropriate return address
;; mask.
@@ -10684,20 +10689,27 @@
if (crtl->calls_eh_return)
emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
if (TARGET_THUMB1)
+ {
thumb1_expand_epilogue ();
+ emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
+ gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE));
+ DONE;
+ }
else if (TARGET_THUMB2)
{
- thumb2_expand_epilogue ();
+ thumb2_expand_epilogue (false);
DONE;
}
else if (USE_RETURN_INSN (FALSE))
- {
- emit_jump_insn (gen_return ());
- DONE;
- }
- emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
- gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE));
- DONE;
+ {
+ arm_expand_return (true);
+ DONE;
+ }
+ else if (TARGET_ARM)
+ {
+ arm_expand_epilogue (true);
+ DONE;
+ }
"
)
@@ -10713,31 +10725,23 @@
;; to add an unspec of the link register to ensure that flow
;; does not think that it is unused by the sibcall branch that
;; will replace the standard function epilogue.
-(define_insn "sibcall_epilogue"
+(define_expand "sibcall_epilogue"
[(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE)
(unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
"TARGET_32BIT"
- "*
- if (use_return_insn (FALSE, next_nonnote_insn (insn)))
- return output_return_instruction (const_true_rtx, FALSE, FALSE);
- return arm_output_epilogue (next_nonnote_insn (insn));
"
-;; Length is absolute worst case
- [(set_attr "length" "44")
- (set_attr "type" "block")
- ;; We don't clobber the conditions, but the potential length of this
- ;; operation is sufficient to make conditionalizing the sequence
- ;; unlikely to be profitable.
- (set_attr "conds" "clob")]
+ if (TARGET_ARM)
+ arm_expand_epilogue (false);
+ else if (TARGET_THUMB2)
+ thumb2_expand_epilogue (true);
+ DONE;
+ "
)
(define_insn "*epilogue_insns"
[(unspec_volatile [(return)] VUNSPEC_EPILOGUE)]
- "TARGET_ARM || TARGET_THUMB1"
+ "TARGET_THUMB1"
"*
- if (TARGET_32BIT)
- return arm_output_epilogue (NULL);
- else /* TARGET_THUMB1 */
return thumb1_unexpanded_epilogue ();
"
; Length is absolute worst case