From patchwork Thu Sep 2 09:09:31 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Uros Bizjak X-Patchwork-Id: 63455 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 0F447B7170 for ; Thu, 2 Sep 2010 19:10:18 +1000 (EST) Received: (qmail 6872 invoked by alias); 2 Sep 2010 09:10:12 -0000 Received: (qmail 6019 invoked by uid 22791); 2 Sep 2010 09:09:48 -0000 X-SWARE-Spam-Status: No, hits=-1.8 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, TW_ZJ, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-qy0-f182.google.com (HELO mail-qy0-f182.google.com) (209.85.216.182) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 02 Sep 2010 09:09:34 +0000 Received: by qyk4 with SMTP id 4so352426qyk.20 for ; Thu, 02 Sep 2010 02:09:32 -0700 (PDT) MIME-Version: 1.0 Received: by 10.224.2.134 with SMTP id 6mr6178297qaj.237.1283418571929; Thu, 02 Sep 2010 02:09:31 -0700 (PDT) Received: by 10.229.28.200 with HTTP; Thu, 2 Sep 2010 02:09:31 -0700 (PDT) Date: Thu, 2 Sep 2010 11:09:31 +0200 Message-ID: Subject: [PATCH, i386]: Macroize remaining peephole2 patterns From: Uros Bizjak To: gcc-patches@gcc.gnu.org Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hello! 2010-09-02 Uros Bizjak * config/i386/i386.md (nonmemory_operand): New mode attribute. (push memory peephole2): Macroize peepholes using SWI mode iterator. (move immediate to memory peephole2): Macroize peepholes using SWI124 mode iterator. (non-pairable NOT peephole2): Macroize peepholes using SWI124 mode iterator. (simple lea add peephole2): Macroize peepholes using SWI48 mode iterator. (simple lea mult peephole2): Ditto. (imul by 3,5,9 to lea peephole2): Ditto. (mov $-1, reg peephole2): Macroize peepholes using SWI248 mode iterator. (imul $32bit_imm,mem,reg peephole2): Ditto. (imul $8/16bit_imm,regmem,reg peephole2): Ditto. Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}. Committed to mainline SVN. Uros. Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 163761) +++ config/i386/i386.md (working copy) @@ -858,6 +858,13 @@ (SI "immediate_operand") (DI "x86_64_immediate_operand")]) +;; Nonmemory operand predicate for integer modes. +(define_mode_attr nonmemory_operand + [(QI "nonmemory_operand") + (HI "nonmemory_operand") + (SI "nonmemory_operand") + (DI "x86_64_nonmemory_operand")]) + ;; Operand predicate for shifts. (define_mode_attr shift_operand [(QI "nonimmediate_operand") @@ -1659,8 +1666,7 @@ "TARGET_64BIT && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; We need to define this as both peepholer and splitter for case ;; peephole2 pass is not run. @@ -2027,8 +2033,7 @@ "TARGET_64BIT && !symbolic_operand (operands[1], DImode) && !x86_64_immediate_operand (operands[1], DImode)" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; We need to define this as both peepholer and splitter for case ;; peephole2 pass is not run. @@ -4598,8 +4603,7 @@ "TARGET_SHORTEN_X87_SSE && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()) && peep2_reg_dead_p (2, operands[0])" - [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))] - "") + [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]) ;; Avoid vector decoded forms of the instruction. (define_peephole2 @@ -4608,8 +4612,7 @@ (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] - "") + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]) (define_peephole2 [(match_scratch:SF 2 "x") @@ -4617,8 +4620,7 @@ (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] - "") + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]) (define_insn_and_split "fix_trunc_fisttp_i387_1" [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") @@ -15829,8 +15831,7 @@ (use (match_dup 3)) (clobber (match_dup 0)) (clobber (match_dup 1)) - (clobber (match_dup 2))])] - "") + (clobber (match_dup 2))])]) ;; ...and this one handles cmpstrn*_1. (define_peephole2 @@ -15865,10 +15866,7 @@ (use (reg:CC FLAGS_REG)) (clobber (match_dup 0)) (clobber (match_dup 1)) - (clobber (match_dup 2))])] - "") - - + (clobber (match_dup 2))])]) ;; Conditional move instructions. @@ -16184,11 +16182,13 @@ ;; The % modifier is not operational anymore in peephole2's, so we have to ;; swap the operands manually in the case of addition and multiplication. "if (COMMUTATIVE_ARITH_P (operands[2])) - operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), - operands[0], operands[1]); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), + GET_MODE (operands[2]), + operands[0], operands[1]); else - operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), - operands[1], operands[0]);") + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), + GET_MODE (operands[2]), + operands[1], operands[0]);") ;; Conditional addition patterns (define_expand "addcc" @@ -16198,7 +16198,6 @@ (match_operand:SWI 3 "const_int_operand" "")] "" "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") - ;; Misc patterns (?) @@ -16529,26 +16528,15 @@ ;; Don't push memory operands (define_peephole2 - [(set (match_operand:SI 0 "push_operand" "") - (match_operand:SI 1 "memory_operand" "")) - (match_scratch:SI 2 "r")] + [(set (match_operand:SWI 0 "push_operand" "") + (match_operand:SWI 1 "memory_operand" "")) + (match_scratch:SWI 2 "")] "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) -(define_peephole2 - [(set (match_operand:DI 0 "push_operand" "") - (match_operand:DI 1 "memory_operand" "")) - (match_scratch:DI 2 "r")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY - && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -;; We need to handle SFmode only, because DFmode and XFmode is split to +;; We need to handle SFmode only, because DFmode and XFmode are split to ;; SImode pushes. (define_peephole2 [(set (match_operand:SF 0 "push_operand" "") @@ -16557,106 +16545,34 @@ "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) -(define_peephole2 - [(set (match_operand:HI 0 "push_operand" "") - (match_operand:HI 1 "memory_operand" "")) - (match_scratch:HI 2 "r")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY - && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -(define_peephole2 - [(set (match_operand:QI 0 "push_operand" "") - (match_operand:QI 1 "memory_operand" "")) - (match_scratch:QI 2 "q")] - "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY - && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - ;; Don't move an immediate directly to memory when the instruction ;; gets too big. (define_peephole2 - [(match_scratch:SI 1 "r") - (set (match_operand:SI 0 "memory_operand" "") + [(match_scratch:SWI124 1 "") + (set (match_operand:SWI124 0 "memory_operand" "") (const_int 0))] "optimize_insn_for_speed_p () - && ! TARGET_USE_MOV0 + && !TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES && get_attr_length (insn) >= ix86_cur_cost ()->large_insn && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 1) (const_int 0)) - (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 1))] - "") - -(define_peephole2 - [(match_scratch:HI 1 "r") - (set (match_operand:HI 0 "memory_operand" "") - (const_int 0))] - "optimize_insn_for_speed_p () - && ! TARGET_USE_MOV0 - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn - && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC FLAGS_REG))]) (set (match_dup 0) (match_dup 1))] "operands[2] = gen_lowpart (SImode, operands[1]);") (define_peephole2 - [(match_scratch:QI 1 "q") - (set (match_operand:QI 0 "memory_operand" "") - (const_int 0))] + [(match_scratch:SWI124 2 "") + (set (match_operand:SWI124 0 "memory_operand" "") + (match_operand:SWI124 1 "immediate_operand" ""))] "optimize_insn_for_speed_p () - && ! TARGET_USE_MOV0 && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 2) (const_int 0)) - (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 1))] - "operands[2] = gen_lowpart (SImode, operands[1]);") - -(define_peephole2 - [(match_scratch:SI 2 "r") - (set (match_operand:SI 0 "memory_operand" "") - (match_operand:SI 1 "immediate_operand" ""))] - "optimize_insn_for_speed_p () - && TARGET_SPLIT_LONG_MOVES && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) -(define_peephole2 - [(match_scratch:HI 2 "r") - (set (match_operand:HI 0 "memory_operand" "") - (match_operand:HI 1 "immediate_operand" ""))] - "optimize_insn_for_speed_p () - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - -(define_peephole2 - [(match_scratch:QI 2 "q") - (set (match_operand:QI 0 "memory_operand" "") - (match_operand:QI 1 "immediate_operand" ""))] - "optimize_insn_for_speed_p () - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (match_dup 2))] - "") - ;; Don't compare memory with zero, load and use a test instead. (define_peephole2 [(set (match_operand 0 "flags_reg_operand" "") @@ -16666,8 +16582,7 @@ (match_scratch:SI 3 "r")] "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" [(set (match_dup 3) (match_dup 2)) - (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] - "") + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]) ;; NOT is not pairable on Pentium, while XOR is, but one byte longer. ;; Don't split NOTs with a displacement operand, because resulting XOR @@ -16681,47 +16596,19 @@ ;; lifetime information then. (define_peephole2 - [(set (match_operand:SI 0 "nonimmediate_operand" "") - (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + [(set (match_operand:SWI124 0 "nonimmediate_operand" "") + (not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand" "")))] "optimize_insn_for_speed_p () && ((TARGET_NOT_UNPAIRABLE - && (!MEM_P (operands[0]) - || !memory_displacement_operand (operands[0], SImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode))) + && (!MEM_P (operands[0]) + || !memory_displacement_operand (operands[0], mode))) + || (TARGET_NOT_VECTORMODE + && long_memory_operand (operands[0], mode))) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) - (xor:SI (match_dup 1) (const_int -1))) - (clobber (reg:CC FLAGS_REG))])] - "") + (xor:SWI124 (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])]) -(define_peephole2 - [(set (match_operand:HI 0 "nonimmediate_operand" "") - (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] - "optimize_insn_for_speed_p () - && ((TARGET_NOT_UNPAIRABLE - && (!MEM_P (operands[0]) - || !memory_displacement_operand (operands[0], HImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode))) - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) - (xor:HI (match_dup 1) (const_int -1))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_peephole2 - [(set (match_operand:QI 0 "nonimmediate_operand" "") - (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] - "optimize_insn_for_speed_p () - && ((TARGET_NOT_UNPAIRABLE - && (!MEM_P (operands[0]) - || !memory_displacement_operand (operands[0], QImode))) - || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode))) - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) - (xor:QI (match_dup 1) (const_int -1))) - (clobber (reg:CC FLAGS_REG))])] - "") - ;; Non pairable "test imm, reg" instructions can be translated to ;; "and imm, reg" if reg dies. The "and" form is also shorter (one ;; byte opcode instead of two, have a short form for byte operands), @@ -16744,8 +16631,7 @@ (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) (const_int 0)])) (set (match_dup 2) - (and:SI (match_dup 2) (match_dup 3)))])] - "") + (and:SI (match_dup 2) (match_dup 3)))])]) ;; We don't need to handle HImode case, because it will be promoted to SImode ;; on ! TARGET_PARTIAL_REG_STALL @@ -16765,8 +16651,7 @@ (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) (const_int 0)])) (set (match_dup 2) - (and:QI (match_dup 2) (match_dup 3)))])] - "") + (and:QI (match_dup 2) (match_dup 3)))])]) (define_peephole2 [(set (match_operand 0 "flags_reg_operand" "") @@ -16799,8 +16684,7 @@ (match_dup 2) (const_int 8) (const_int 8)) - (match_dup 3)))])] - "") + (match_dup 3)))])]) ;; Don't do logical operations with memory inputs. (define_peephole2 @@ -16814,8 +16698,7 @@ [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 0) (match_dup 2)])) - (clobber (reg:CC FLAGS_REG))])] - "") + (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(match_scratch:SI 2 "r") @@ -16828,8 +16711,7 @@ [(set (match_dup 2) (match_dup 1)) (parallel [(set (match_dup 0) (match_op_dup 3 [(match_dup 2) (match_dup 0)])) - (clobber (reg:CC FLAGS_REG))])] - "") + (clobber (reg:CC FLAGS_REG))])]) ;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address ;; refers to the destination of the load! @@ -16863,8 +16745,7 @@ || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))" [(set (match_dup 0) (match_dup 2)) (set (match_dup 0) - (match_op_dup 3 [(match_dup 0) (match_dup 1)]))] - "") + (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) ; Don't do logical operations with memory outputs ; @@ -16886,8 +16767,7 @@ (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 2) (match_dup 1)])) (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) (define_peephole2 [(match_scratch:SI 2 "r") @@ -16903,8 +16783,7 @@ (parallel [(set (match_dup 2) (match_op_dup 3 [(match_dup 1) (match_dup 2)])) (clobber (reg:CC FLAGS_REG))]) - (set (match_dup 0) (match_dup 2))] - "") + (set (match_dup 0) (match_dup 2))]) ;; Attempt to always use XOR for zeroing registers. (define_peephole2 @@ -16916,9 +16795,7 @@ && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int 0)) (clobber (reg:CC FLAGS_REG))])] -{ - operands[0] = gen_lowpart (word_mode, operands[0]); -}) + "operands[0] = gen_lowpart (word_mode, operands[0]);") (define_peephole2 [(set (strict_low_part (match_operand 0 "register_operand" "")) @@ -16930,74 +16807,57 @@ [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) (clobber (reg:CC FLAGS_REG))])]) -;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg. +;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg. (define_peephole2 - [(set (match_operand 0 "register_operand" "") + [(set (match_operand:SWI248 0 "register_operand" "") (const_int -1))] - "(GET_MODE (operands[0]) == HImode - || GET_MODE (operands[0]) == SImode - || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) - && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) + "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (const_int -1)) (clobber (reg:CC FLAGS_REG))])] - "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, - operands[0]);") +{ + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); +}) -;; Attempt to convert simple leas to adds. These can be created by -;; move expanders. +;; Attempt to convert simple lea to add/shift. +;; These can be created by move expanders. + (define_peephole2 - [(set (match_operand:SI 0 "register_operand" "") - (plus:SI (match_dup 0) - (match_operand:SI 1 "nonmemory_operand" "")))] + [(set (match_operand:SWI48 0 "register_operand" "") + (plus:SWI48 (match_dup 0) + (match_operand:SWI48 1 "" "")))] "peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1))) - (clobber (reg:CC FLAGS_REG))])] - "") + [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(set (match_operand:SI 0 "register_operand" "") (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "nonmemory_operand" "")) 0))] - "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])" + "TARGET_64BIT + && peep2_regno_dead_p (0, FLAGS_REG) + && REGNO (operands[0]) == REGNO (operands[1])" [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = gen_lowpart (SImode, operands[2]);") (define_peephole2 - [(set (match_operand:DI 0 "register_operand" "") - (plus:DI (match_dup 0) - (match_operand:DI 1 "x86_64_general_operand" "")))] - "peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_peephole2 - [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_dup 0) - (match_operand:SI 1 "const_int_operand" "")))] + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_dup 0) + (match_operand:SWI48 1 "const_int_operand" "")))] "exact_log2 (INTVAL (operands[1])) >= 0 && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") (define_peephole2 - [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_dup 0) - (match_operand:DI 1 "const_int_operand" "")))] - "exact_log2 (INTVAL (operands[1])) >= 0 - && peep2_regno_dead_p (0, FLAGS_REG)" - [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") - -(define_peephole2 [(set (match_operand:SI 0 "register_operand" "") (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "const_int_operand" "")) 0))] - "exact_log2 (INTVAL (operands[2])) >= 0 + "TARGET_64BIT + && exact_log2 (INTVAL (operands[2])) >= 0 && REGNO (operands[0]) == REGNO (operands[1]) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) @@ -17171,23 +17031,23 @@ ;; Convert imul by three, five and nine into lea (define_peephole2 [(parallel - [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "register_operand" "") - (match_operand:SI 2 "const_int_operand" ""))) + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "register_operand" "") + (match_operand:SWI48 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "INTVAL (operands[2]) == 3 || INTVAL (operands[2]) == 5 || INTVAL (operands[2]) == 9" [(set (match_dup 0) - (plus:SI (mult:SI (match_dup 1) (match_dup 2)) - (match_dup 1)))] + (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2)) + (match_dup 1)))] "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") (define_peephole2 [(parallel - [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "const_int_operand" ""))) + [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "") + (match_operand:SWI48 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "optimize_insn_for_speed_p () && (INTVAL (operands[2]) == 3 @@ -17195,136 +17055,60 @@ || INTVAL (operands[2]) == 9)" [(set (match_dup 0) (match_dup 1)) (set (match_dup 0) - (plus:SI (mult:SI (match_dup 0) (match_dup 2)) - (match_dup 0)))] + (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2)) + (match_dup 0)))] "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") -(define_peephole2 - [(parallel - [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "register_operand" "") - (match_operand:DI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_64BIT - && (INTVAL (operands[2]) == 3 - || INTVAL (operands[2]) == 5 - || INTVAL (operands[2]) == 9)" - [(set (match_dup 0) - (plus:DI (mult:DI (match_dup 1) (match_dup 2)) - (match_dup 1)))] - "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") - -(define_peephole2 - [(parallel - [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_64BIT - && optimize_insn_for_speed_p () - && (INTVAL (operands[2]) == 3 - || INTVAL (operands[2]) == 5 - || INTVAL (operands[2]) == 9)" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 0) - (plus:DI (mult:DI (match_dup 0) (match_dup 2)) - (match_dup 0)))] - "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") - -;; Imul $32bit_imm, mem, reg is vector decoded, while +;; imul $32bit_imm, mem, reg is vector decoded, while ;; imul $32bit_imm, reg, reg is direct decoded. (define_peephole2 - [(match_scratch:DI 3 "r") - (parallel [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "memory_operand" "") - (match_operand:DI 2 "immediate_operand" ""))) + [(match_scratch:SWI48 3 "r") + (parallel [(set (match_operand:SWI48 0 "register_operand" "") + (mult:SWI48 (match_operand:SWI48 1 "memory_operand" "") + (match_operand:SWI48 2 "immediate_operand" ""))) (clobber (reg:CC FLAGS_REG))])] "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) - (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "") + (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) (define_peephole2 [(match_scratch:SI 3 "r") - (parallel [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "memory_operand" "") - (match_operand:SI 2 "immediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))])] - "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () - && !satisfies_constraint_K (operands[2])" - [(set (match_dup 3) (match_dup 1)) - (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])] - "") - -(define_peephole2 - [(match_scratch:SI 3 "r") (parallel [(set (match_operand:DI 0 "register_operand" "") (zero_extend:DI (mult:SI (match_operand:SI 1 "memory_operand" "") (match_operand:SI 2 "immediate_operand" "")))) (clobber (reg:CC FLAGS_REG))])] - "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + "TARGET_64BIT + && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () && !satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 1)) (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) - (clobber (reg:CC FLAGS_REG))])] - "") + (clobber (reg:CC FLAGS_REG))])]) ;; imul $8/16bit_imm, regmem, reg is vector decoded. ;; Convert it into imul reg, reg ;; It would be better to force assembler to encode instruction using long ;; immediate, but there is apparently no way to do so. (define_peephole2 - [(parallel [(set (match_operand:DI 0 "register_operand" "") - (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") - (match_operand:DI 2 "const_int_operand" ""))) + [(parallel [(set (match_operand:SWI248 0 "register_operand" "") + (mult:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "") + (match_operand:SWI248 2 "const_int_operand" ""))) (clobber (reg:CC FLAGS_REG))]) - (match_scratch:DI 3 "r")] + (match_scratch:SWI248 3 "r")] "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () && satisfies_constraint_K (operands[2])" [(set (match_dup 3) (match_dup 2)) - (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) + (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3))) (clobber (reg:CC FLAGS_REG))])] { if (!rtx_equal_p (operands[0], operands[1])) emit_move_insn (operands[0], operands[1]); }) -(define_peephole2 - [(parallel [(set (match_operand:SI 0 "register_operand" "") - (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") - (match_operand:SI 2 "const_int_operand" ""))) - (clobber (reg:CC FLAGS_REG))]) - (match_scratch:SI 3 "r")] - "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () - && satisfies_constraint_K (operands[2])" - [(set (match_dup 3) (match_dup 2)) - (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) - (clobber (reg:CC FLAGS_REG))])] -{ - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); -}) - -(define_peephole2 - [(parallel [(set (match_operand:HI 0 "register_operand" "") - (mult:HI (match_operand:HI 1 "nonimmediate_operand" "") - (match_operand:HI 2 "immediate_operand" ""))) - (clobber (reg:CC FLAGS_REG))]) - (match_scratch:HI 3 "r")] - "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()" - [(set (match_dup 3) (match_dup 2)) - (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) - (clobber (reg:CC FLAGS_REG))])] -{ - if (!rtx_equal_p (operands[0], operands[1])) - emit_move_insn (operands[0], operands[1]); -}) - ;; After splitting up read-modify operations, array accesses with memory ;; operands might end up in form: ;; sall $2, %eax