Patchwork PATCH: PR target/44588: Very inefficient 8bit mod/div

login
register
mail settings
Submitter H.J. Lu
Date June 21, 2010, 7:33 p.m.
Message ID <20100621193321.GA13780@intel.com>
Download mbox | patch
Permalink /patch/56340/
State New
Headers show

Comments

H.J. Lu - June 21, 2010, 7:33 p.m.
Hi,

This patch adds 8bit divmov pattern for x86. X86 8bit divide
instructions return result in AX with

AL <- Quotient
AH <- Remainder

This patch models it and properly extends quotient. Tested
on Intel64 with -m64 and -m32.  There are no regressions.
OK for trunk?

BTW, there is only one divb used in subreg_get_info in
gcc compilers. The old code is

        movzbl  mode_size(%r13), %edi
        movzbl  mode_size(%r14), %esi
        xorl    %edx, %edx
        movl    %edi, %eax
        divw    %si
        testw   %dx, %dx
        jne     .L1194

The new one is

        movzbl  mode_size(%r13), %edi
        movl    %edi, %eax
        divb    mode_size(%r14)
        movzbl  %ah, %eax
        testb   %al, %al
        jne     .L1194

Thanks.


H.J.
---
gcc/

2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* config/i386/i386.md (UNSPEC_MOVQI_EXTZV): New.
	(any_div_code): Likewise.
	(extend_code): Likewise.
	(extract_code): Likewise.
	(*movqi_extzv_3): Likewise.
	(*movqi_extzv_3_rex64): Likewise.
	(*movqi_extzv): Likewise.
	(<u>divmodqi4): Likewise.
	(*<u>divqi3): Likewise.
	(<u>divqi3): Remvoved.

gcc/testsuite/

2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>

	PR target/44588
	* gcc.target/i386/umod-1.c: New.
	* gcc.target/i386/umod-2.c: Likewise.
	* gcc.target/i386/umod-3.c: Likewise.
Uros Bizjak - June 22, 2010, 6:05 p.m.
On Mon, 2010-06-21 at 12:33 -0700, H.J. Lu wrote:
> Hi,
> 
> This patch adds 8bit divmov pattern for x86. X86 8bit divide
> instructions return result in AX with
> 
> AL <- Quotient
> AH <- Remainder
> 
> This patch models it and properly extends quotient. Tested
> on Intel64 with -m64 and -m32.  There are no regressions.
> OK for trunk?
> 
> BTW, there is only one divb used in subreg_get_info in
> gcc compilers. The old code is
> 
>         movzbl  mode_size(%r13), %edi
>         movzbl  mode_size(%r14), %esi
>         xorl    %edx, %edx
>         movl    %edi, %eax
>         divw    %si
>         testw   %dx, %dx
>         jne     .L1194
> 
> The new one is
> 
>         movzbl  mode_size(%r13), %edi
>         movl    %edi, %eax
>         divb    mode_size(%r14)
>         movzbl  %ah, %eax
>         testb   %al, %al
>         jne     .L1194
> 

Hm, something is not combined correctly, I'd say "testb %ah, %ah" is
optimal in the second case.

Uros.

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 777f8e7..2b5cdc0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@ 
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_MOVQI_EXTZV
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +761,11 @@ 
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr any_div_code [(div "DIV") (udiv "UDIV")])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2295,6 +2301,86 @@ 
 	(const_string "SI")
 	(const_string "QI")))])
 
+(define_insn "*movqi_extzv_3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzv_3_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+;; Use unspec so that it won't be removed unless the result is unused.
+(define_insn "*movqi_extzv"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(unspec:QI [(match_operand 1 "ext_register_operand" "Q,Q")]
+	  UNSPEC_MOVQI_EXTZV))]
+  ""
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
+    default:
+      return "mov{b}\t{%b1, %0|%0, %b1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 (define_insn "movsi_insv_1"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
@@ -7556,17 +7642,6 @@ 
 
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7603,6 +7678,63 @@ 
 
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand0, operand1;
+  
+  operand0 = gen_reg_rtx (HImode);
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_SET (VOIDmode, operand0,
+		     gen_rtx_<any_div_code> (HImode, operand1,
+					     operands[2]));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  */
+  operand1 = gen_rtx_<extract_code> (QImode, operand0,
+				     GEN_INT (8), GEN_INT (8));
+  emit_move_insn (operands[3], operand1);
+
+  /* Extract quotient from AL.  It can't be accessed as AX.  */
+  operand0 = gen_rtx_UNSPEC (QImode, gen_rtvec (1, operand0), 
+			     UNSPEC_MOVQI_EXTZV);
+  emit_move_insn (operands[0], operand0);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*<u>divqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(any_div:HI
+	  (match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "<sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */