From patchwork Mon Nov 7 17:24:16 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sameera Deshpande X-Patchwork-Id: 124147 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id D289CB6F86 for ; Tue, 8 Nov 2011 04:24:48 +1100 (EST) Received: (qmail 1198 invoked by alias); 7 Nov 2011 17:24:46 -0000 Received: (qmail 1126 invoked by uid 22791); 7 Nov 2011 17:24:42 -0000 X-SWARE-Spam-Status: No, hits=-1.3 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_LOW, TW_CP X-Spam-Check-By: sourceware.org Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 07 Nov 2011 17:24:24 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Mon, 07 Nov 2011 17:24:20 +0000 Received: from [10.1.79.40] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.0); Mon, 7 Nov 2011 17:24:17 +0000 Subject: Re: [RFA/ARM][Patch 01/02]: Thumb2 epilogue in RTL From: Sameera Deshpande To: Paul Brook Cc: Ramana Radhakrishnan , "gcc-patches@gcc.gnu.org" , "nickc@redhat.com" , Richard Earnshaw In-Reply-To: <201111070956.15291.paul@codesourcery.com> References: <4e83484c.03c7640a.2591.10bdSMTPIN_ADDED@mx.google.com> <1320659204.30897.85.camel@e102549-lin.cambridge.arm.com> <201111070956.15291.paul@codesourcery.com> Date: Mon, 07 Nov 2011 17:24:16 +0000 Message-ID: <1320686656.30897.105.camel@e102549-lin.cambridge.arm.com> Mime-Version: 1.0 X-MC-Unique: 111110717242002201 X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Mon, 2011-11-07 at 09:56 +0000, Paul Brook wrote: > > The array REGISTER_NAMES in aout.h use S0, S2, ... names for double > > registers. Is there any way to use OVERLAPPING_REGISTER_NAMES? If that > > can be done, I can eliminate the table here. > > You should be using %P. > Paul, Thanks for your comment. Please find attached reworked patch. The patch is tested with check-gcc without regression. - Thanks and regards, Sameera D. diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 23a29c6..2c38883 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -65,6 +65,7 @@ extern int thumb1_legitimate_address_p (enum machine_mode, rtx, int); extern int arm_const_double_rtx (rtx); extern int neg_const_double_rtx_ok_for_fpa (rtx); extern int vfp3_const_double_rtx (rtx); +extern bool load_multiple_operation_p (rtx, bool, enum machine_mode, bool); extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *); extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *, int *); @@ -176,10 +177,13 @@ extern int arm_float_words_big_endian (void); /* Thumb functions. */ extern void arm_init_expanders (void); -extern const char *thumb_unexpanded_epilogue (void); +extern const char *thumb1_unexpanded_epilogue (void); extern void thumb1_expand_prologue (void); extern void thumb1_expand_epilogue (void); extern const char *thumb1_output_interwork (void); +extern void thumb2_expand_epilogue (void); +extern void thumb2_output_return (rtx); +extern void thumb2_expand_return (void); #ifdef TREE_CODE extern int is_called_in_ARM_mode (tree); #endif diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index e07c8c3..ec87892 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -8906,6 +8906,137 @@ neon_valid_immediate (rtx op, enum machine_mode mode, int inverse, #undef CHECK } +/* Return true if OP is a valid load multiple operation for MODE mode. + CONSECUTIVE is true if the registers in the operation must form + a consecutive sequence in the register bank. STACK_ONLY is true + if the base register must be the stack pointer. RETURN_PC is true + if value is to be loaded in PC. */ +bool +load_multiple_operation_p (rtx op, bool consecutive, enum machine_mode mode, + bool return_pc) +{ + HOST_WIDE_INT count = XVECLEN (op, 0); + unsigned dest_regno, first_dest_regno; + rtx src_addr; + HOST_WIDE_INT i = 1, base = 0; + HOST_WIDE_INT offset = 0; + rtx elt; + bool addr_reg_loaded = false; + bool update = false; + int reg_increment, regs_per_val; + int offset_adj; + + /* If DFmode, we must be asking for consecutive, + since fldmdd can only do consecutive regs. */ + gcc_assert ((mode != DFmode) || consecutive); + + /* Set up the increments and the regs per val based on the mode. */ + reg_increment = GET_MODE_SIZE (mode); + regs_per_val = mode == DFmode ? 2 : 1; + offset_adj = return_pc ? 1 : 0; + + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET + || !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))) + return false; + + /* Check to see if this might be a write-back. */ + if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, offset_adj))) == PLUS) + { + i++; + base = 1; + update = true; + + /* The offset adjustment should be same as number of registers being + popped * size of single register. */ + if (!REG_P (SET_DEST (elt)) + || !REG_P (XEXP (SET_SRC (elt), 0)) + || !CONST_INT_P (XEXP (SET_SRC (elt), 1)) + || INTVAL (XEXP (SET_SRC (elt), 1)) != + ((count - 1 - offset_adj) * reg_increment)) + return false; + } + + i = i + offset_adj; + base = base + offset_adj; + /* Perform a quick check so we don't blow up below. */ + if (GET_CODE (XVECEXP (op, 0, i - 1)) != SET + || !REG_P (SET_DEST (XVECEXP (op, 0, i - 1))) + || !MEM_P (SET_SRC (XVECEXP (op, 0, i - 1)))) + return false; + + /* If only one reg being loaded, success depends on the type: + FLDMDD can do just one reg, LDM must do at least two. */ + if (count <= i) + return mode == DFmode ? true : false; + + first_dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1))); + dest_regno = first_dest_regno; + + src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0); + + if (GET_CODE (src_addr) == PLUS) + { + if (!CONST_INT_P (XEXP (src_addr, 1))) + return false; + offset = INTVAL (XEXP (src_addr, 1)); + src_addr = XEXP (src_addr, 0); + } + + if (!REG_P (src_addr)) + return false; + + /* The pattern we are trying to match here is: + [(SET (R_d0) (MEM (PLUS (src_addr) (offset)))) + (SET (R_d1) (MEM (PLUS (src_addr) (offset + )))) + : + : + (SET (R_dn) (MEM (PLUS (src_addr) (offset + n * )))) + ] + Where, + 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))). + 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn). + 3. If consecutive is TRUE, then for kth register being loaded, + REGNO (R_dk) = REGNO (R_d0) + k. */ + for (; i < count; i++) + { + elt = XVECEXP (op, 0, i); + + if (GET_CODE (elt) != SET + || !REG_P (SET_DEST (elt)) + || GET_MODE (SET_DEST (elt)) != mode + || (consecutive + && (REGNO (SET_DEST (elt)) + != (unsigned int) (first_dest_regno + regs_per_val * (i - base)))) + || REGNO (SET_DEST (elt)) <= dest_regno + || !MEM_P (SET_SRC (elt)) + || GET_MODE (SET_SRC (elt)) != mode + || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS + || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr) + || !CONST_INT_P (XEXP (XEXP (SET_SRC (elt), 0), 1)) + || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != + (offset + (i - base) * reg_increment)) + && (!REG_P (XEXP (SET_SRC (elt), 0)) + || offset + (i - base) * reg_increment != 0))) + return false; + + dest_regno = REGNO (SET_DEST (elt)); + if (dest_regno == REGNO (src_addr)) + addr_reg_loaded = true; + } + + if (update && addr_reg_loaded) + return false; + + /* For Thumb-1, address register is always modified - either by write-back + or by explicit load. If the pattern does not describe an update, it must + be because the address register is in the list of loaded registers. */ + if (TARGET_THUMB1) + return update || addr_reg_loaded; + + return true; +} + /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for float elements), and a modified constant (whatever should be output for a @@ -16092,6 +16223,137 @@ emit_multi_reg_push (unsigned long mask) return par; } +/* Generate and emit an insn pattern that we will recognize as a pop_multi. + SAVED_REGS_MASK shows which registers need to be restored. + + Unfortunately, since this insn does not reflect very well the actual + semantics of the operation, we need to annotate the insn for the benefit + of DWARF2 frame unwind information. + + There's no reason why this couldn't be used for Thumb-1 or ARM, in theory, + but currently the pattern that matches this in the MD file is only enabled + for Thumb-2. */ +static void +thumb2_emit_multi_reg_pop (unsigned long saved_regs_mask, bool really_return) +{ + int num_regs = 0; + int i, j; + rtx par; + rtx dwarf = NULL_RTX; + rtx tmp, reg; + int offset_adj = really_return ? 1 : 0; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + gcc_assert (num_regs && num_regs <= 16); + + /* The parallel needs to hold num_regs SETs + and one SET for the stack update. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1 + offset_adj)); + + if (really_return) + { + tmp = ret_rtx; + XVECEXP (par, 0, 0) = tmp; + } + + /* Increment the stack pointer, based on there being + num_regs 4-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, 4 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, offset_adj) = tmp; + + /* Now restore every reg, which may include PC. */ + for (j = 0, i = 0; j < num_regs; i++) + if (saved_regs_mask & (1 << i)) + { + reg = gen_rtx_REG (SImode, i); + tmp = gen_rtx_SET (VOIDmode, + reg, + gen_frame_mem + (SImode, + plus_constant (stack_pointer_rtx, 4 * j))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, j + 1 + offset_adj) = tmp; + + /* We need to maintain a sequence for DWARF info too. As dwarf info + should not have PC, skip PC. */ + if (i != PC_REGNUM) + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + j++; + } + + if (really_return) + par = emit_jump_insn (par); + else + par = emit_insn (par); + + REG_NOTES (par) = dwarf; +} + +/* Generate and emit an insn pattern that we will recognize as a pop_multi + of NUM_REGS consecutive VFP regs, starting at FIRST_REG. + + Unfortunately, since this insn does not reflect very well the actual + semantics of the operation, we need to annotate the insn for the benefit + of DWARF2 frame unwind information. */ +static void +thumb2_emit_vfp_multi_reg_pop (int first_reg, int num_regs) +{ + int i, j; + rtx par; + rtx dwarf = NULL_RTX; + rtx tmp, reg; + + gcc_assert (num_regs && num_regs <= 32); + + if (num_regs > 16) + { + thumb2_emit_vfp_multi_reg_pop (first_reg, 16); + thumb2_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16); + return; + } + + /* The parallel needs to hold num_regs SETs + and one SET for the stack update. */ + par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1)); + + /* Increment the stack pointer, based on there being + num_regs 8-byte registers to restore. */ + tmp = gen_rtx_SET (VOIDmode, + stack_pointer_rtx, + plus_constant (stack_pointer_rtx, 8 * num_regs)); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, 0) = tmp; + + /* now show EVERY reg that will be restored, using a SET for each. */ + for (j = 0, i=first_reg; j < num_regs; i += 2) + { + reg = gen_rtx_REG (DFmode, i); + + tmp = gen_rtx_SET (VOIDmode, + reg, + gen_frame_mem + (DFmode, + plus_constant (stack_pointer_rtx, + 8 * j))); + RTX_FRAME_RELATED_P (tmp) = 1; + XVECEXP (par, 0, j + 1) = tmp; + + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); + + j++; + } + + par = emit_insn (par); + REG_NOTES (par) = dwarf; +} + /* Calculate the size of the return value that is passed in registers. */ static unsigned arm_size_return_regs (void) @@ -21622,7 +21884,7 @@ thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue) /* The bits which aren't usefully expanded as rtl. */ const char * -thumb_unexpanded_epilogue (void) +thumb1_unexpanded_epilogue (void) { arm_stack_offsets *offsets; int regno; @@ -22191,7 +22453,6 @@ thumb1_expand_prologue (void) cfun->machine->lr_save_eliminated = 0; } - void thumb1_expand_epilogue (void) { @@ -22246,6 +22507,242 @@ thumb1_expand_epilogue (void) emit_use (gen_rtx_REG (SImode, LR_REGNUM)); } +/* Generate pattern *pop_multiple_with_stack_update_and_return if single + POP instruction can be generated. LR should be replaced by PC. All + the checks required are already done by USE_RETURN_INSN (). Hence, + all we really need to check here is if single register is to be + returned, or multiple register return. */ +void +thumb2_expand_return (void) +{ + int i, num_regs; + unsigned long saved_regs_mask; + arm_stack_offsets *offsets; + + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + if (saved_regs_mask) + { + if (num_regs == 1) + { + rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + rtx reg = gen_rtx_REG (SImode, PC_REGNUM); + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + XVECEXP (par, 0, 0) = ret_rtx; + XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr); + RTX_FRAME_RELATED_P (par) = 1; + emit_jump_insn (par); + } + else + { + saved_regs_mask &= ~ (1 << LR_REGNUM); + saved_regs_mask |= (1 << PC_REGNUM); + thumb2_emit_multi_reg_pop (saved_regs_mask, true); + } + } + else + { + emit_jump_insn (ret_rtx); + } +} + +/* Generate RTL to represent a Thumb-2 epilogue. + + Note that this RTL does not include the + Return insn, which is created separately and + handled in thumb2_output_return. */ +void +thumb2_expand_epilogue (void) +{ + HOST_WIDE_INT amount; + int reg; + unsigned long saved_regs_mask; + unsigned long func_type; + int i; + arm_stack_offsets *offsets; + int num_regs = 0; + bool really_return = false; + + func_type = arm_current_func_type (); + + /* Naked functions don't have epilogues. */ + if (IS_NAKED (func_type) + || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)) + { + emit_jump_insn (ret_rtx); + return; + } + + /* At the end of the code of a function, the stack pointer will + be pointing at the outgoing args area, so we first need to + get it to point at the saved_regs area. */ + + /* Determine how much to add to the stack pointer. */ + offsets = arm_get_frame_offsets (); + saved_regs_mask = offsets->saved_regs_mask; + + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + num_regs++; + + /* In Thumb-2 mode, the frame pointer points to the last + saved register. */ + amount = offsets->outgoing_args - offsets->saved_regs; + + if (frame_pointer_needed) + { + emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); + amount = offsets->locals_base - offsets->saved_regs; + } + + gcc_assert (amount >= 0); + if (amount) + emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, + GEN_INT (amount))); + + /* Emit a USE (stack_pointer_rtx), so that + the stack adjustment will not be deleted. */ + emit_insn (gen_prologue_use (stack_pointer_rtx)); + + /* Now handle any VFP restoration. */ + if (TARGET_HARD_FLOAT && TARGET_VFP) + { + int end_reg = LAST_VFP_REGNUM + 1; + + /* Scan the registers in reverse order. We need to match + any groupings made in the prologue and generate matching + fldmdd operations. The need to match groups is because, + unlike pop, fldmdd can only do consecutive regs. */ + for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2) + /* Look for a case where a reg does not need restoring. */ + if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) + && (!df_regs_ever_live_p (reg + 1) + || call_used_regs[reg + 1])) + { + /* Restore the regs discovered so far (from reg+2 to end_reg). */ + if (end_reg > reg + 2) + thumb2_emit_vfp_multi_reg_pop (reg + 2, + (end_reg - (reg + 2)) / 2); + end_reg = reg; + } + + /* Restore the remaining regs that we have discovered (or possibly + even all of them, if the conditional in the for loop never fired). */ + if (end_reg > reg + 2) + thumb2_emit_vfp_multi_reg_pop (reg + 2, (end_reg - (reg + 2)) / 2); + } + + /* iWMMXt is not supported when Thumb-2 in use. If it were, we would + want to be restoring the appropriate iWMMXt regs here, in a similar + way to arm_output_epilogue. */ + + /* If there are registers to restore, make it happen. */ + if (saved_regs_mask) + { + /* It's illegal to do a pop for only one reg, so generate an ldr. */ + if (num_regs == 1) + { + for (i = 0; i <= LAST_ARM_REGNUM; i++) + if (saved_regs_mask & (1 << i)) + { + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + emit_insn (gen_movsi (gen_rtx_REG (SImode, i), addr)); + } + } + + /* Two or more regs warrants the use of a multi-reg pop. */ + else + { + /* If multi-pop is last instruction, don't generate `branch to + return-address' instruction. Instead, pop LR in PC. */ + if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL + && !IS_STACKALIGN (func_type) + && crtl->args.pretend_args_size == 0 + && saved_regs_mask & (1 << LR_REGNUM) + && !crtl->calls_eh_return) + { + saved_regs_mask &= ~ (1 << LR_REGNUM); + saved_regs_mask |= (1 << PC_REGNUM); + really_return = true; + } + + thumb2_emit_multi_reg_pop (saved_regs_mask, really_return); + if (really_return == true) + return; + } + } + + /* Unwind the pre-pushed regs. */ + if (crtl->args.pretend_args_size) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (crtl->args.pretend_args_size))); + + /* Stack adjustment for exception handler. */ + if (crtl->calls_eh_return) + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM))); + + emit_jump_insn (ret_rtx); +} + + +/* Generate the appropriate instruction to return for Thumb-2. + OPERAND holds a condition, which must be passed to output_asm_insn. */ +void +thumb2_output_return (rtx operand) +{ + char instr[100]; + unsigned long func_type; + + func_type = arm_current_func_type (); + + if (IS_NAKED (func_type)) + /* Do nothing if naked function. */ + return; + + if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN) + { + rtx op; + + /* A volatile function should never return. Call abort. */ + op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort"); + assemble_external_libcall (op); + output_asm_insn ("bl\t%a0", &op); + + return; + } + + switch ((int) ARM_FUNC_TYPE (func_type)) + { + case ARM_FT_ISR: + case ARM_FT_FIQ: + sprintf (instr, "subs\t%%|pc, %%|lr, #4"); + break; + + case ARM_FT_EXCEPTION: + sprintf (instr, "movs\t%%|pc, %%|lr"); + break; + + default: + sprintf (instr, "bx\t%%|lr"); + break; + } + + output_asm_insn (instr, &operand); +} + /* Implementation of insn prologue_thumb1_interwork. This is the first "instruction" of a function called in ARM mode. Swap to thumb mode. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index a78ba88..6cfb2da 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -6669,6 +6669,141 @@ FALSE, operands[1], &offset); }) +;; Pop (as used in epilogue RTL) +;; +;; This should really be in thumb2.md, but it needs to live above +;; the ldmsi patterns, so that it matches before them. +;; Furthermore, there is no reason why it could not be extended +;; to support Thumb-1 and ARM at a later date (whereupon it would +;; fully deserve its spot in this file). +(define_insn "*pop_multiple_with_stack_update" + [(match_parallel 0 "load_multiple_operation" + [(set (match_operand:SI 1 "s_register_operand" "+rk") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + ])] + "TARGET_THUMB2" + "* + { + int i; + char pattern[100]; + int num_saves = XVECLEN (operands[0], 0); + + if (REGNO (operands[1]) == SP_REGNUM) + { + strcpy (pattern, \"pop\\t{\"); + } + else + { + strcpy (pattern, \"ldm%(ia%)\\t\"); + strcat (pattern, reg_names[REGNO (operands[1])]); + strcat (pattern, \"!, {\"); + } + + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 1), 0))]); + + /* Skip over the first element and the one we just generated. */ + for (i = 2; i < (num_saves); i++) + { + strcat (pattern, \", %|\"); + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]); + } + + strcat (pattern, \"}\"); + output_asm_insn (pattern, operands); + + return \"\"; + } + " + [(set_attr "type" "load4")] +) + +;; Pop with return (as used in epilogue RTL) +;; +;; This instruction is generated when the registers are popped at end of +;; epilogue. Here, instead of popping the value in LR and then generating +;; jump to LR, value is popped in PC. Hence, the pattern is combined with +;; (return). +(define_insn "*pop_multiple_with_stack_update_and_return" + [(match_parallel 0 "load_multiple_operation_return" + [(return) + (set (match_operand:SI 1 "s_register_operand" "+k") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + ])] + "TARGET_THUMB2" + "* + { + int i; + char pattern[100]; + int num_saves = XVECLEN (operands[0], 0); + + strcpy (pattern, \"pop\\t{\"); + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, 2), 0))]); + + /* Skip over the first two elements and the one we just generated. */ + for (i = 3; i < (num_saves); i++) + { + strcat (pattern, \", %|\"); + strcat (pattern, + reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]); + } + + strcat (pattern, \"}\"); + output_asm_insn (pattern, operands); + + return \"\"; + } + " + [(set_attr "type" "load4")] +) + +(define_insn "*thumb2_ldr_with_return" + [(return) + (set (reg:SI PC_REGNUM) + (mem:SI (post_inc:SI (match_operand:SI 0 "s_register_operand" "+k"))))] + "TARGET_THUMB2" + "ldr%?\t%|pc, [%0], #4" + [(set_attr "type" "load1") + (set_attr "predicable" "yes")] +) + +(define_insn "*vfp_pop_multiple_with_stack_update" + [(match_parallel 0 "load_multiple_operation_fp" + [(set (match_operand:SI 1 "s_register_operand" "+k") + (plus:SI (match_dup 1) + (match_operand:SI 2 "const_int_operand" "I"))) + (set (match_operand:DF 3 "arm_hard_register_operand" "") + (mem:DF (match_dup 1)))])] + "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP" + "* + { + int num_regs = XVECLEN (operands[0], 0); + char pattern[100]; + rtx op_list[2]; + strcpy (pattern, \"fldmfdd\\t\"); + strcat (pattern, + reg_names[REGNO (SET_DEST (XVECEXP (operands[0], 0, 0)))]); + strcat (pattern, \"!, {\"); + op_list[0] = XEXP (XVECEXP (operands[0], 0, 1), 0); + strcat (pattern, \"%P0\"); + if ((num_regs - 1) > 1) + { + strcat (pattern, \"-%P1\"); + op_list [1] = XEXP (XVECEXP (operands[0], 0, num_regs - 1), 0); + } + + strcat (pattern, \"}\"); + output_asm_insn (pattern, op_list); + return \"\"; + } + " + [(set_attr "type" "load4")] +) + (define_expand "store_multiple" [(match_par_dup 3 [(set (match_operand:SI 0 "" "") (match_operand:SI 1 "" "")) @@ -8486,8 +8621,19 @@ (define_expand "return" [(return)] - "TARGET_32BIT && USE_RETURN_INSN (FALSE)" - "") + "(TARGET_ARM || (TARGET_THUMB2 + && ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL + && !IS_STACKALIGN (arm_current_func_type ()))) + && USE_RETURN_INSN (FALSE)" + " + { + if (TARGET_THUMB2) + { + thumb2_expand_return (); + DONE; + } + } + ") ;; Often the return insn will be the same as loading from memory, so set attr (define_insn "*arm_return" @@ -10529,6 +10675,11 @@ emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2))); if (TARGET_THUMB1) thumb1_expand_epilogue (); + else if (TARGET_THUMB2) + { + thumb2_expand_epilogue (); + DONE; + } else if (USE_RETURN_INSN (FALSE)) { emit_jump_insn (gen_return ()); @@ -10572,12 +10723,12 @@ (define_insn "*epilogue_insns" [(unspec_volatile [(return)] VUNSPEC_EPILOGUE)] - "TARGET_EITHER" + "TARGET_ARM || TARGET_THUMB1" "* if (TARGET_32BIT) return arm_output_epilogue (NULL); else /* TARGET_THUMB1 */ - return thumb_unexpanded_epilogue (); + return thumb1_unexpanded_epilogue (); " ; Length is absolute worst case [(set_attr "length" "44") diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 92eb004..7e2203d 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -369,84 +369,22 @@ (define_special_predicate "load_multiple_operation" (match_code "parallel") { - HOST_WIDE_INT count = XVECLEN (op, 0); - unsigned dest_regno; - rtx src_addr; - HOST_WIDE_INT i = 1, base = 0; - HOST_WIDE_INT offset = 0; - rtx elt; - bool addr_reg_loaded = false; - bool update = false; - - if (count <= 1 - || GET_CODE (XVECEXP (op, 0, 0)) != SET - || !REG_P (SET_DEST (XVECEXP (op, 0, 0)))) - return false; - - /* Check to see if this might be a write-back. */ - if (GET_CODE (SET_SRC (elt = XVECEXP (op, 0, 0))) == PLUS) - { - i++; - base = 1; - update = true; - - /* Now check it more carefully. */ - if (GET_CODE (SET_DEST (elt)) != REG - || GET_CODE (XEXP (SET_SRC (elt), 0)) != REG - || GET_CODE (XEXP (SET_SRC (elt), 1)) != CONST_INT - || INTVAL (XEXP (SET_SRC (elt), 1)) != (count - 1) * 4) - return false; - } - - /* Perform a quick check so we don't blow up below. */ - if (count <= i - || GET_CODE (XVECEXP (op, 0, i - 1)) != SET - || GET_CODE (SET_DEST (XVECEXP (op, 0, i - 1))) != REG - || GET_CODE (SET_SRC (XVECEXP (op, 0, i - 1))) != MEM) - return false; - - dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, i - 1))); - src_addr = XEXP (SET_SRC (XVECEXP (op, 0, i - 1)), 0); - if (GET_CODE (src_addr) == PLUS) - { - if (GET_CODE (XEXP (src_addr, 1)) != CONST_INT) - return false; - offset = INTVAL (XEXP (src_addr, 1)); - src_addr = XEXP (src_addr, 0); - } - if (!REG_P (src_addr)) - return false; + return load_multiple_operation_p (op, /*consecutive=*/false, + SImode, /*return_pc*/false); +}) - for (; i < count; i++) - { - elt = XVECEXP (op, 0, i); +(define_special_predicate "load_multiple_operation_return" + (match_code "parallel") +{ + return load_multiple_operation_p (op, /*consecutive=*/false, + SImode, /*return_pc*/true); +}) - if (GET_CODE (elt) != SET - || GET_CODE (SET_DEST (elt)) != REG - || GET_MODE (SET_DEST (elt)) != SImode - || REGNO (SET_DEST (elt)) <= dest_regno - || GET_CODE (SET_SRC (elt)) != MEM - || GET_MODE (SET_SRC (elt)) != SImode - || ((GET_CODE (XEXP (SET_SRC (elt), 0)) != PLUS - || !rtx_equal_p (XEXP (XEXP (SET_SRC (elt), 0), 0), src_addr) - || GET_CODE (XEXP (XEXP (SET_SRC (elt), 0), 1)) != CONST_INT - || INTVAL (XEXP (XEXP (SET_SRC (elt), 0), 1)) != offset + (i - base) * 4) - && (!REG_P (XEXP (SET_SRC (elt), 0)) - || offset + (i - base) * 4 != 0))) - return false; - dest_regno = REGNO (SET_DEST (elt)); - if (dest_regno == REGNO (src_addr)) - addr_reg_loaded = true; - } - /* For Thumb, we only have updating instructions. If the pattern does - not describe an update, it must be because the address register is - in the list of loaded registers - on the hardware, this has the effect - of overriding the update. */ - if (update && addr_reg_loaded) - return false; - if (TARGET_THUMB1) - return update || addr_reg_loaded; - return true; +(define_special_predicate "load_multiple_operation_fp" + (match_code "parallel") +{ + return load_multiple_operation_p (op, /*consecutive=*/true, + DFmode, /*return_pc*/false); }) (define_special_predicate "store_multiple_operation" diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 05585da..78f4e81 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -635,17 +635,18 @@ (set_attr "length" "20")] ) -;; Note: this is not predicable, to avoid issues with linker-generated -;; interworking stubs. -(define_insn "*thumb2_return" +(define_insn "*thumb2_rtl_epilogue_return" [(return)] - "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)" + "(TARGET_THUMB2)" "* { - return output_return_instruction (const_true_rtx, TRUE, FALSE); + thumb2_output_return (const_true_rtx); + return \"\"; }" - [(set_attr "type" "load1") - (set_attr "length" "12")] + [(set_attr "type" "branch") + (set_attr "length" "4") + (set_attr "predicable" "no") + (set_attr "conds" "unconditional")] ) (define_insn_and_split "thumb2_eh_return"