From patchwork Wed Jun 23 21:19:48 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Maxim Kuvyrkov X-Patchwork-Id: 56720 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 82549B6F10 for ; Thu, 24 Jun 2010 07:20:10 +1000 (EST) Received: (qmail 3989 invoked by alias); 23 Jun 2010 21:20:04 -0000 Received: (qmail 3920 invoked by uid 22791); 23 Jun 2010 21:20:00 -0000 X-SWARE-Spam-Status: No, hits=-1.9 required=5.0 tests=AWL, BAYES_00, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mail.codesourcery.com (HELO mail.codesourcery.com) (38.113.113.100) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 23 Jun 2010 21:19:53 +0000 Received: (qmail 13961 invoked from network); 23 Jun 2010 21:19:50 -0000 Received: from unknown (HELO ?172.16.1.24?) (maxim@127.0.0.2) by mail.codesourcery.com with ESMTPA; 23 Jun 2010 21:19:50 -0000 Message-ID: <4C227A74.1070201@codesourcery.com> Date: Thu, 24 Jun 2010 01:19:48 +0400 From: Maxim Kuvyrkov User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.1.10) Gecko/20100512 Thunderbird/3.0.5 MIME-Version: 1.0 To: Richard Earnshaw CC: gcc-patches Subject: Wrap calculation of PIC address into a single instruction References: <4C18F225.2040509@codesourcery.com> <4C227243.9080104@codesourcery.com> In-Reply-To: <4C227243.9080104@codesourcery.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This patch enables optimizations, particularly GCSE, handle calculation of PIC addresses. GCSE tracks only single instructions, so it can't handle two-instruction calculation of PIC address. With this patch, calculations of PIC addresses are represented as single instructions allowing GCSE eliminate all but the first address calculation for global variables. Any comments? OK to check in? diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 5671587..d846557 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -4897,17 +4897,13 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF) { - rtx pic_ref, address; rtx insn; if (reg == 0) { gcc_assert (can_create_pseudo_p ()); reg = gen_reg_rtx (Pmode); - address = gen_reg_rtx (Pmode); } - else - address = reg; /* VxWorks does not impose a fixed gap between segments; the run-time gap can be different from the object-file gap. We therefore can't @@ -4923,18 +4919,21 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) insn = arm_pic_static_addr (orig, reg); else { + rtx pat; + rtx mem; + /* If this function doesn't have a pic register, create one now. */ require_pic_register (); - if (TARGET_32BIT) - emit_insn (gen_pic_load_addr_32bit (address, orig)); - else /* TARGET_THUMB1 */ - emit_insn (gen_pic_load_addr_thumb1 (address, orig)); + pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); - pic_ref = gen_const_mem (Pmode, - gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, - address)); - insn = emit_move_insn (reg, pic_ref); + /* Make the MEM as close to a constant as possible. */ + mem = SET_SRC (pat); + gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); + MEM_READONLY_P (mem) = 1; + MEM_NOTRAP_P (mem) = 1; + + insn = emit_insn (pat); } /* Put a REG_EQUAL note on this insn, so that it can be optimized @@ -5214,6 +5213,15 @@ pcrel_constant_p (rtx x) return FALSE; } +/* Return true to X will surely end up in an index register after the first + splitting pass. */ +static bool +will_be_in_index_register (const_rtx x) +{ + /* arm.md: calculate_pic_address will split this into a register. */ + return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM; +} + /* Return nonzero if X is a valid ARM state address operand. */ int arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, @@ -5271,8 +5279,9 @@ arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, rtx xop1 = XEXP (x, 1); return ((arm_address_register_rtx_p (xop0, strict_p) - && GET_CODE(xop1) == CONST_INT - && arm_legitimate_index_p (mode, xop1, outer, strict_p)) + && ((GET_CODE(xop1) == CONST_INT + && arm_legitimate_index_p (mode, xop1, outer, strict_p)) + || (!strict_p && will_be_in_index_register (xop1)))) || (arm_address_register_rtx_p (xop1, strict_p) && arm_legitimate_index_p (mode, xop0, outer, strict_p))); } @@ -5358,7 +5367,8 @@ thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) rtx xop1 = XEXP (x, 1); return ((arm_address_register_rtx_p (xop0, strict_p) - && thumb2_legitimate_index_p (mode, xop1, strict_p)) + && (thumb2_legitimate_index_p (mode, xop1, strict_p) + || (!strict_p && will_be_in_index_register (xop1)))) || (arm_address_register_rtx_p (xop1, strict_p) && thumb2_legitimate_index_p (mode, xop0, strict_p))); } @@ -5661,7 +5671,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) && XEXP (x, 0) != frame_pointer_rtx && XEXP (x, 1) != frame_pointer_rtx && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) - && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) + && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) + || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) return 1; /* REG+const has 5-7 bit offset for non-SP registers. */ diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index b6cca49..534bfc7 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -5231,6 +5231,34 @@ ;; we use an unspec. The offset will be loaded from a constant pool entry, ;; since that is the only type of relocation we can use. +;; Wrap calculation of the whole PIC address in a single pattern for the +;; benefit of optimizers, particularly, PRE and HOIST. Calculation of +;; a PIC address involves two loads from memory, so we want to CSE it +;; as often as possible. +;; This pattern will be split into one of the pic_load_addr_* patterns +;; and a move after GCSE optimizations. +;; +;; Note: Update arm.c: legitimize_pic_address() when changing this pattern. +(define_expand "calculate_pic_address" + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" +) + +;; Split calculate_pic_address into pic_load_addr_* and a move. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (mem:SI (plus:SI (match_operand:SI 1 "register_operand" "") + (unspec:SI [(match_operand:SI 2 "" "")] + UNSPEC_PIC_SYM))))] + "flag_pic" + [(set (match_dup 3) (unspec:SI [(match_dup 2)] UNSPEC_PIC_SYM)) + (set (match_dup 0) (mem:SI (plus:SI (match_dup 1) (match_dup 3))))] + "operands[3] = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];" +) + ;; The rather odd constraints on the following are to force reload to leave ;; the insn alone, and to force the minipool generation pass to then move ;; the GOT symbol to memory.