Patchwork PATCH: PR target/44588: Very inefficient 8bit mod/div

login
register
mail settings
Submitter H.J. Lu
Date June 22, 2010, 4:41 p.m.
Message ID <AANLkTikfrrQdVLuV1HNUcnIjyDzOUS0j2rYSHB-F178F@mail.gmail.com>
Download mbox | patch
Permalink /patch/56528/
State New
Headers show

Comments

H.J. Lu - June 22, 2010, 4:41 p.m.
On Mon, Jun 21, 2010 at 12:33 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Hi,
>
> This patch adds 8bit divmov pattern for x86. X86 8bit divide
> instructions return result in AX with
>
> AL <- Quotient
> AH <- Remainder
>
> This patch models it and properly extends quotient. Tested
> on Intel64 with -m64 and -m32.  There are no regressions.
> OK for trunk?
>
> BTW, there is only one divb used in subreg_get_info in
> gcc compilers. The old code is
>
>        movzbl  mode_size(%r13), %edi
>        movzbl  mode_size(%r14), %esi
>        xorl    %edx, %edx
>        movl    %edi, %eax
>        divw    %si
>        testw   %dx, %dx
>        jne     .L1194
>
> The new one is
>
>        movzbl  mode_size(%r13), %edi
>        movl    %edi, %eax
>        divb    mode_size(%r14)
>        movzbl  %ah, %eax
>        testb   %al, %al
>        jne     .L1194
>
> Thanks.
>
>
> H.J.
> ---
> gcc/
>
> 2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/44588
>        * config/i386/i386.md (UNSPEC_MOVQI_EXTZV): New.
>        (any_div_code): Likewise.
>        (extend_code): Likewise.
>        (extract_code): Likewise.
>        (*movqi_extzv_3): Likewise.
>        (*movqi_extzv_3_rex64): Likewise.
>        (*movqi_extzv): Likewise.
>        (<u>divmodqi4): Likewise.
>        (*<u>divqi3): Likewise.
>        (<u>divqi3): Remvoved.
>
> gcc/testsuite/
>
> 2010-06-21  H.J. Lu  <hongjiu.lu@intel.com>
>
>        PR target/44588
>        * gcc.target/i386/umod-1.c: New.
>        * gcc.target/i386/umod-2.c: Likewise.
>        * gcc.target/i386/umod-3.c: Likewise.
>

Here is the updated patch.  It uses UNSPEC to extract
remainder in AH from AL. If we only need quotient, it
will be in AL.  Now we generate:

[hjl@gnu-6 divb]$ cat umod-2.c
extern unsigned char z;

unsigned char
foo (unsigned char x, unsigned char y)
{
  z = x/y;
   return x % y;
}
[hjl@gnu-6 divb]$ make umod-2.s
/export/build/gnu/gcc/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/gcc/build-x86_64-linux/gcc/ -Os -S umod-2.c
[hjl@gnu-6 divb]$ cat umod-2.s
	.file	"umod-2.c"
	.text
.globl foo
	.type	foo, @function
foo:
.LFB0:
	.cfi_startproc
	movzbl	%dil, %eax
	divb	%sil
	movb	%al, z(%rip)
	movzbl	%ah, %eax
	ret

OK for trunk?

Thanks.

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 777f8e7..2960324 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,7 @@ 
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_MOVQI_EXTZH
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +761,11 @@ 
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr any_div_code [(div "DIV") (udiv "UDIV")])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2295,6 +2301,59 @@ 
 	(const_string "SI")
 	(const_string "QI")))])
 
+;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
+;; that we can extract it from AL.
+(define_insn "*movqi_extzh"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzh_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
+		   UNSPEC_MOVQI_EXTZH))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 (define_insn "movsi_insv_1"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
@@ -7556,17 +7615,6 @@ 
 
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7603,6 +7651,58 @@ 
 
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand1;
+  
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_SET (VOIDmode, operands[0],
+		     gen_rtx_<any_div_code> (QImode, operand1,
+					     operands[2]));
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  Use UNSPEC_MOVQI_EXTZH to extract it
+     from AL.  */
+  operand1 = gen_rtx_UNSPEC (QImode, gen_rtvec (1, operands[0]), 
+			     UNSPEC_MOVQI_EXTZH);
+  emit_move_insn (operands[3], operand1);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*<u>divqi3"
+  [(set (match_operand:QI 0 "register_operand" "=a")
+	(any_div:QI
+	  (match_operand:HI 1 "register_operand" "0")
+	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "<sgnprefix>div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */