From patchwork Tue Jul 20 16:32:32 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 59344 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 7F843B6EFF for ; Wed, 21 Jul 2010 02:32:59 +1000 (EST) Received: (qmail 3859 invoked by alias); 20 Jul 2010 16:32:55 -0000 Received: (qmail 3839 invoked by uid 22791); 20 Jul 2010 16:32:49 -0000 X-SWARE-Spam-Status: No, hits=-5.2 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_HI, SPF_HELO_PASS, TW_VS, T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 20 Jul 2010 16:32:43 +0000 Received: from int-mx05.intmail.prod.int.phx2.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.18]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o6KGWXMB019174 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Tue, 20 Jul 2010 12:32:34 -0400 Received: from anchor.twiddle.home (ovpn-113-74.phx2.redhat.com [10.3.113.74]) by int-mx05.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o6KGWXoc028138; Tue, 20 Jul 2010 12:32:33 -0400 Message-ID: <4C45CFA0.9070404@redhat.com> Date: Tue, 20 Jul 2010 09:32:32 -0700 From: Richard Henderson User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.10) Gecko/20100621 Fedora/3.0.5-1.fc13 Thunderbird/3.0.5 MIME-Version: 1.0 To: "H.J. Lu" CC: Bernd Schmidt , GCC Patches , ubizjak@gmail.com Subject: Re: x86_64 varargs setup jump table References: <4C4035C3.9080305@codesourcery.com> <4C40A5BD.9080208@redhat.com> <4C40F005.3060507@codesourcery.com> <4C41BD52.5040905@codesourcery.com> <4C447222.7080500@redhat.com> <4C44C00F.3070201@redhat.com> In-Reply-To: <4C44C00F.3070201@redhat.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On 07/19/2010 02:13 PM, Richard Henderson wrote: > This bootstraps; regression test starting now. > > Obviously there's some patterns in i386.md that should be removed > along with this, were this patch to go in. A slightly different patch that passes regression testing. This also vanishes the patterns that should go. r~ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index bb0b890..d9dc571 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -7062,11 +7062,8 @@ static void setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) { rtx save_area, mem; - rtx label; - rtx tmp_reg; - rtx nsse_reg; alias_set_type set; - int i; + int i, max; /* GPR size of varargs save area. */ if (cfun->va_list_gpr_size) @@ -7087,10 +7084,11 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) save_area = frame_pointer_rtx; set = get_varargs_alias_set (); - for (i = cum->regno; - i < X86_64_REGPARM_MAX - && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; - i++) + max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; + if (max > X86_64_REGPARM_MAX) + max = X86_64_REGPARM_MAX; + + for (i = cum->regno; i < max; i++) { mem = gen_rtx_MEM (Pmode, plus_constant (save_area, i * UNITS_PER_WORD)); @@ -7102,33 +7100,41 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) if (ix86_varargs_fpr_size) { + enum machine_mode smode; + rtx label, test; + /* Now emit code to save SSE registers. The AX parameter contains number - of SSE parameter registers used to call this function. We use - sse_prologue_save insn template that produces computed jump across - SSE saves. We need some preparation work to get this working. */ + of SSE parameter registers used to call this function, though all we + actually check here is the zero/non-zero status. */ label = gen_label_rtx (); + test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); + emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), + label)); + + /* If we've determined that we're only loading scalars (and not + vector data) then we can store doubles instead. */ + if (crtl->stack_alignment_needed < 128) + smode = DFmode; + else + smode = V4SFmode; - nsse_reg = gen_reg_rtx (Pmode); - emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); - - /* Compute address of memory block we save into. We always use pointer - pointing 127 bytes after first byte to store - this is needed to keep - instruction size limited by 4 bytes (5 bytes for AVX) with one - byte displacement. */ - tmp_reg = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - plus_constant (save_area, - ix86_varargs_gpr_size + 127))); - mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); - MEM_NOTRAP_P (mem) = 1; - set_mem_alias_set (mem, set); - set_mem_align (mem, 64); + max = cum->sse_regno + cfun->va_list_fpr_size / 16; + if (max > X86_64_SSE_REGPARM_MAX) + max = X86_64_SSE_REGPARM_MAX; - /* And finally do the dirty job! */ - emit_insn (gen_sse_prologue_save (mem, nsse_reg, - GEN_INT (cum->sse_regno), label, - gen_reg_rtx (Pmode))); + for (i = cum->sse_regno; i < max; ++i) + { + mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size); + mem = gen_rtx_MEM (smode, mem); + MEM_NOTRAP_P (mem) = 1; + set_mem_alias_set (mem, set); + set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); + + emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i))); + } + + emit_label (label); } } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 88b4029..6616da2 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -79,13 +79,11 @@ ;; Prologue support UNSPEC_STACK_ALLOC UNSPEC_SET_GOT - UNSPEC_SSE_PROLOGUE_SAVE UNSPEC_REG_SAVE UNSPEC_DEF_CFA UNSPEC_SET_RIP UNSPEC_SET_GOT_OFFSET UNSPEC_MEMORY_BLOCKAGE - UNSPEC_SSE_PROLOGUE_SAVE_LOW ;; TLS support UNSPEC_TP @@ -17825,179 +17823,6 @@ { return ASM_SHORT "0x0b0f"; } [(set_attr "length" "2")]) -(define_expand "sse_prologue_save" - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "immediate_operand" "")) - (use (label_ref:DI (match_operand 3 "" ""))) - (clobber (match_operand:DI 4 "register_operand" "")) - (use (match_dup 1))])] - "TARGET_64BIT" - "") - -;; Pre-reload version of prologue save. Until after prologue generation we don't know -;; what the size of save instruction will be. -;; Operand 0+operand 6 is the memory save area -;; Operand 1 is number of registers to save (will get overwritten to operand 5) -;; Operand 2 is number of non-vaargs SSE arguments -;; Operand 3 is label starting the save block -;; Operand 4 is used for temporary computation of jump address -(define_insn "*sse_prologue_save_insn1" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 6 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "=r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X"))) - (clobber (match_operand:DI 4 "register_operand" "=&r")) - (use (match_operand:DI 5 "register_operand" "1"))] - "TARGET_64BIT - && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128" - "#" - [(set_attr "type" "other") - (set_attr "memory" "store") - (set_attr "mode" "DI")]) - -;; We know size of save instruction; expand the computation of jump address -;; in the jumptable. -(define_split - [(parallel [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE)) - (clobber (reg:CC FLAGS_REG)) - (clobber (match_operand:DI 1 "register_operand" "")) - (use (match_operand:DI 2 "const_int_operand" "")) - (use (match_operand 3 "" "")) - (clobber (match_operand:DI 4 "register_operand" "")) - (use (match_operand:DI 5 "register_operand" ""))])] - "reload_completed" - [(parallel [(set (match_dup 0) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] - UNSPEC_SSE_PROLOGUE_SAVE_LOW)) - (use (match_dup 1)) - (use (match_dup 2)) - (use (match_dup 3)) - (use (match_dup 5))])] -{ - /* Movaps is 4 bytes, AVX and movsd is 5 bytes. */ - int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128); - - /* Compute address to jump to: - label - eax*size + nnamed_sse_arguments*size. */ - if (size == 5) - emit_insn (gen_rtx_SET (VOIDmode, operands[4], - gen_rtx_PLUS - (Pmode, - gen_rtx_MULT (Pmode, operands[1], - GEN_INT (4)), - operands[1]))); - else if (size == 4) - emit_insn (gen_rtx_SET (VOIDmode, operands[4], - gen_rtx_MULT (Pmode, operands[1], - GEN_INT (4)))); - else - gcc_unreachable (); - if (INTVAL (operands[2])) - emit_move_insn - (operands[1], - gen_rtx_CONST (DImode, - gen_rtx_PLUS (DImode, - operands[3], - GEN_INT (INTVAL (operands[2]) - * size)))); - else - emit_move_insn (operands[1], operands[3]); - emit_insn (gen_subdi3 (operands[1], operands[1], operands[4])); - operands[5] = GEN_INT (size); -}) - -(define_insn "sse_prologue_save_insn" - [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") - (match_operand:DI 4 "const_int_operand" "n"))) - (unspec:BLK [(reg:DI XMM0_REG) - (reg:DI XMM1_REG) - (reg:DI XMM2_REG) - (reg:DI XMM3_REG) - (reg:DI XMM4_REG) - (reg:DI XMM5_REG) - (reg:DI XMM6_REG) - (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW)) - (use (match_operand:DI 1 "register_operand" "r")) - (use (match_operand:DI 2 "const_int_operand" "i")) - (use (label_ref:DI (match_operand 3 "" "X"))) - (use (match_operand:DI 5 "const_int_operand" "i"))] - "TARGET_64BIT - && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 - && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" -{ - int i; - operands[0] = gen_rtx_MEM (Pmode, - gen_rtx_PLUS (Pmode, operands[0], operands[4])); - /* VEX instruction with a REX prefix will #UD. */ - if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS) - gcc_unreachable (); - - output_asm_insn ("jmp\t%A1", operands); - for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) - { - operands[4] = adjust_address (operands[0], DImode, i*16); - operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); - PUT_MODE (operands[4], TImode); - if (GET_CODE (XEXP (operands[0], 0)) != PLUS) - output_asm_insn ("rex", operands); - if (crtl->stack_alignment_needed < 128) - output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands); - else - output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands); - } - targetm.asm_out.internal_label (asm_out_file, "L", - CODE_LABEL_NUMBER (operands[3])); - return ""; -} - [(set_attr "type" "other") - (set_attr "length_immediate" "0") - (set_attr "length_address" "0") - ;; 2 bytes for jump and opernds[4] bytes for each save. - (set (attr "length") - (plus (const_int 2) - (mult (symbol_ref ("INTVAL (operands[5])")) - (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])"))))) - (set_attr "memory" "store") - (set_attr "modrm" "0") - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "DI")]) - (define_expand "prefetch" [(prefetch (match_operand 0 "address_operand" "") (match_operand:SI 1 "const_int_operand" "")