From patchwork Tue Jun 22 18:27:50 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: PATCH: PR target/44588: Very inefficient 8bit mod/div Date: Tue, 22 Jun 2010 08:27:50 -0000 From: "H.J. Lu" X-Patchwork-Id: 56560 Message-Id: To: Uros Bizjak Cc: gcc-patches@gcc.gnu.org On Tue, Jun 22, 2010 at 11:05 AM, Uros Bizjak wrote: > On Mon, 2010-06-21 at 12:33 -0700, H.J. Lu wrote: >> Hi, >> >> This patch adds 8bit divmov pattern for x86. X86 8bit divide >> instructions return result in AX with >> >> AL <- Quotient >> AH <- Remainder >> >> This patch models it and properly extends quotient. Tested >> on Intel64 with -m64 and -m32.  There are no regressions. >> OK for trunk? >> >> BTW, there is only one divb used in subreg_get_info in >> gcc compilers. The old code is >> >>         movzbl  mode_size(%r13), %edi >>         movzbl  mode_size(%r14), %esi >>         xorl    %edx, %edx >>         movl    %edi, %eax >>         divw    %si >>         testw   %dx, %dx >>         jne     .L1194 >> >> The new one is >> >>         movzbl  mode_size(%r13), %edi >>         movl    %edi, %eax >>         divb    mode_size(%r14) >>         movzbl  %ah, %eax >>         testb   %al, %al >>         jne     .L1194 >> > > Hm, something is not combined correctly, I'd say "testb %ah, %ah" is > optimal in the second case. > Here is another update adjusted for mov pattern changes in i386.md. 8bit result is stored in AL <- Quotient AH <- Remainder If we use AX for quotient in 8bit divmod pattern, we have to make sure that AX is valid for quotient. We have to extend AL with UNSPEC since AH isn't the part of quotient,. Instead, I use AL for quotient and use UNSPEC_MOVQI_EXTZH to extract remainder from AL. Quotient access can be optimized very nicely. If remainder is used, we may have an extract move for UNSPEC_MOVQI_EXTZH. I think this is a reasonable comprise. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ab90d73..f268e90 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -104,6 +104,7 @@ UNSPEC_REP UNSPEC_LD_MPIC ; load_macho_picbase UNSPEC_TRUNC_NOOP + UNSPEC_MOVQI_EXTZH ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -760,6 +761,11 @@ ;; Used in signed and unsigned divisions. (define_code_iterator any_div [div udiv]) +(define_code_attr any_div_code [(div "DIV") (udiv "UDIV")]) +(define_code_attr extend_code + [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")]) +(define_code_attr extract_code + [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")]) ;; Instruction prefix for signed and unsigned operations. (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "") @@ -2052,6 +2058,59 @@ (const_string "orig"))) (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")]) +;; Used to extract remainder from AH by 8bit divmod. Use unspec so +;; that we can extract it from AL. +(define_insn "*movqi_extzh" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R") + (unspec:QI [(match_operand 1 "register_operand" "Q,Q")] + UNSPEC_MOVQI_EXTZH))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extzh_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (unspec:QI [(match_operand 1 "register_operand" "Q,Q")] + UNSPEC_MOVQI_EXTZH))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + ;; Convert impossible stores of immediate to existing instructions. ;; First try to get scratch register and go through it. In case this ;; fails, move by 32bit parts. @@ -7305,17 +7364,6 @@ ;; Divide instructions -(define_insn "divqi3" - [(set (match_operand:QI 0 "register_operand" "=a") - (any_div:QI - (match_operand:HI 1 "register_operand" "0") - (match_operand:QI 2 "nonimmediate_operand" "qm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_QIMODE_MATH" - "div{b}\t%2" - [(set_attr "type" "idiv") - (set_attr "mode" "QI")]) - ;; The patterns that match these are at the end of this file. (define_expand "divxf3" @@ -7352,6 +7400,58 @@ ;; Divmod instructions. +(define_expand "divmodqi4" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (any_div:QI + (match_operand:QI 1 "register_operand" "") + (match_operand:QI 2 "nonimmediate_operand" ""))) + (set (match_operand:QI 3 "register_operand" "") + (mod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" +{ + rtx div, clobber, set; + rtx operand1; + + operand1 = gen_reg_rtx (HImode); + clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + + /* Properly extend operands[1] to HImode. */ + set = gen_rtx_SET (VOIDmode, operand1, + gen_rtx_ (HImode, operands[1])); + if ( == SIGN_EXTEND) + emit_insn (set); + else + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); + + /* Generate 8bit divide. Result is in AX. */ + div = gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ (QImode, operand1, + operands[2])); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber))); + + /* Extract remainder from AH. Use UNSPEC_MOVQI_EXTZH to extract it + from AL. */ + operand1 = gen_rtx_UNSPEC (QImode, gen_rtvec (1, operands[0]), + UNSPEC_MOVQI_EXTZH); + emit_move_insn (operands[3], operand1); + DONE; +}) + +;; Divide AX by r/m8, with result stored in +;; AL <- Quotient +;; AH <- Remainder +(define_insn "*divqi3" + [(set (match_operand:QI 0 "register_operand" "=a") + (any_div:QI + (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "div{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + (define_expand "divmod4" [(parallel [(set (match_operand:SWIM248 0 "register_operand" "") (div:SWIM248 --- /dev/null 2010-06-16 10:11:06.602750711 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c 2010-06-21 12:01:25.705950180 -0700 @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=atom" } */ + +unsigned char +foo (unsigned char x, unsigned char y) +{ + return x % y; +} + +/* { dg-final { scan-assembler-times "divb" 1 } } */ +/* { dg-final { scan-assembler-not "divw" } } */ --- /dev/null 2010-06-16 10:11:06.602750711 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c 2010-06-21 12:01:17.857932744 -0700 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=atom" } */ + +extern unsigned char z; + +unsigned char +foo (unsigned char x, unsigned char y) +{ + z = x/y; + return x % y; +} + +/* { dg-final { scan-assembler-times "divb" 1 } } */ +/* { dg-final { scan-assembler-not "divw" } } */ --- /dev/null 2010-06-16 10:11:06.602750711 -0700 +++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c 2010-06-21 12:02:36.809962702 -0700 @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=atom" } */ + +extern void abort (void); +extern void exit (int); + +unsigned char cx = 7; + +int +main () +{ + unsigned char cy; + + cy = cx / 6; if (cy != 1) abort (); + cy = cx % 6; if (cy != 1) abort (); + + exit(0); +} + +/* { dg-final { scan-assembler-times "divb" 1 } } */ +/* { dg-final { scan-assembler-not "divw" } } */