From patchwork Tue Oct 4 17:32:35 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Uros Bizjak X-Patchwork-Id: 117671 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 8C44BB6F7C for ; Wed, 5 Oct 2011 04:32:55 +1100 (EST) Received: (qmail 19466 invoked by alias); 4 Oct 2011 17:32:52 -0000 Received: (qmail 19450 invoked by uid 22791); 4 Oct 2011 17:32:50 -0000 X-SWARE-Spam-Status: No, hits=-2.3 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW X-Spam-Check-By: sourceware.org Received: from mail-yx0-f175.google.com (HELO mail-yx0-f175.google.com) (209.85.213.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 04 Oct 2011 17:32:36 +0000 Received: by yxj17 with SMTP id 17so813451yxj.20 for ; Tue, 04 Oct 2011 10:32:35 -0700 (PDT) MIME-Version: 1.0 Received: by 10.150.56.29 with SMTP id e29mr1433680yba.61.1317749555373; Tue, 04 Oct 2011 10:32:35 -0700 (PDT) Received: by 10.147.116.13 with HTTP; Tue, 4 Oct 2011 10:32:35 -0700 (PDT) In-Reply-To: References: <20111003230055.GA27052@intel.com> Date: Tue, 4 Oct 2011 19:32:35 +0200 Message-ID: Subject: Re: PATCH: PR target/50603: [x32] Unnecessary lea From: Uros Bizjak To: "H.J. Lu" Cc: gcc-patches@gcc.gnu.org, Jakub Jelinek , Richard Henderson Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On Tue, Oct 4, 2011 at 6:06 PM, H.J. Lu wrote: >> OTOH, x86_64 and i686 targets can also benefit from this change. If >> combine can't create more complex address (covered by lea), then it >> will simply propagate memory operand back into the add insn. It looks >> to me that we can't loose here, so: >> >>  /* Improve address combine.  */ >>  if (code == PLUS && MEM_P (src2)) >>    src2 = force_reg (mode, src2); >> >> Any opinions? >> > > It doesn't work with 64bit libstdc++: Yeah, yeah. ix86_output_mi_thunk has some ... issues. Please try attached patch that introduces ix86_emit_binop and uses it in a bunch of places. Uros. Index: i386-protos.h =================================================================== --- i386-protos.h (revision 179506) +++ i386-protos.h (working copy) @@ -94,6 +94,7 @@ extern bool ix86_lea_outperforms (rtx, unsigned in unsigned int, unsigned int); extern bool ix86_avoid_lea_for_add (rtx, rtx[]); extern bool ix86_avoid_lea_for_addr (rtx, rtx[]); +extern void ix86_emit_binop (enum rtx_code, enum machine_mode, rtx, rtx); extern void ix86_split_lea_for_addr (rtx[], enum machine_mode); extern bool ix86_lea_for_add_ok (rtx, rtx[]); extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high); Index: i386.c =================================================================== --- i386.c (revision 179506) +++ i386.c (working copy) @@ -15727,6 +15727,10 @@ ix86_fixup_binary_operands (enum rtx_code code, en if (MEM_P (src1) && !rtx_equal_p (dst, src1)) src1 = force_reg (mode, src1); + /* Improve address combine. */ + if (code == PLUS && MEM_P (src2)) + src2 = force_reg (mode, src2); + operands[1] = src1; operands[2] = src2; return dst; @@ -16470,6 +16474,20 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[]) return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost); } +/* Emit x86 binary operand CODE in mode MODE, where the first operand + matches destination. RTX includes clobber of FLAGS_REG. */ + +extern void ix86_emit_binop (enum rtx_code code, enum machine_mode mode, + rtx dst, rtx src) +{ + rtx op, clob; + + op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src)); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); +} + /* Split lea instructions into a sequence of instructions which are executed on ALU to avoid AGU stalls. It is assumed that it is allowed to clobber flags register @@ -16482,8 +16500,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach unsigned int regno1 = INVALID_REGNUM; unsigned int regno2 = INVALID_REGNUM; struct ix86_address parts; - rtx tmp, clob; - rtvec par; + rtx tmp; int ok, adds; ok = ix86_decompose_address (operands[1], &parts); @@ -16515,14 +16532,7 @@ ix86_split_lea_for_addr (rtx operands[], enum mach gcc_assert (regno2 != regno0); for (adds = parts.scale; adds > 0; adds--) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.index); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, - gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.index); } else { @@ -16531,30 +16541,14 @@ ix86_split_lea_for_addr (rtx operands[], enum mach emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); /* Use shift for scaling. */ - tmp = gen_rtx_ASHIFT (mode, operands[0], - GEN_INT (exact_log2 (parts.scale))); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); + ix86_emit_binop (ASHIFT, mode, operands[0], + GEN_INT (exact_log2 (parts.scale))); if (parts.base) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.base); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.base); if (parts.disp && parts.disp != const0_rtx) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.disp); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.disp); } } else if (!parts.base && !parts.index) @@ -16565,41 +16559,32 @@ ix86_split_lea_for_addr (rtx operands[], enum mach else { if (!parts.base) - { - if (regno0 != regno2) - emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); - } + { + if (regno0 != regno2) + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index)); + } else if (!parts.index) - { - if (regno0 != regno1) - emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); - } - else - { - if (regno0 == regno1) - tmp = gen_rtx_PLUS (mode, operands[0], parts.index); - else if (regno0 == regno2) - tmp = gen_rtx_PLUS (mode, operands[0], parts.base); - else - { + { + if (regno0 != regno1) emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); - tmp = gen_rtx_PLUS (mode, operands[0], parts.index); - } + } + else + { + if (regno0 == regno1) + tmp = parts.index; + else if (regno0 == regno2) + tmp = parts.base; + else + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base)); + tmp = parts.index; + } - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], tmp); + } if (parts.disp && parts.disp != const0_rtx) - { - tmp = gen_rtx_PLUS (mode, operands[0], parts.disp); - tmp = gen_rtx_SET (VOIDmode, operands[0], tmp); - clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); - par = gen_rtvec (2, tmp, clob); - emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); - } + ix86_emit_binop (PLUS, mode, operands[0], parts.disp); } } @@ -30940,7 +30925,7 @@ x86_output_mi_thunk (FILE *file, } } - emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx)); + ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); } /* Adjust the this parameter by a value stored in the vtable. */ @@ -30983,7 +30968,7 @@ x86_output_mi_thunk (FILE *file, REGNO (this_reg)), vcall_mem)); else - emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem)); + ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); } /* If necessary, drop THIS back to its stack slot. */