Patchwork PATCH: PR target/44588: Very inefficient 8bit mod/div

login
register
mail settings
Submitter H.J. Lu
Date June 23, 2010, 9:22 p.m.
Message ID <AANLkTikscW1MhNkymw9o24JxT5xDieWTuRES9R2Cn9k2@mail.gmail.com>
Download mbox | patch
Permalink /patch/56722/
State New
Headers show

Comments

H.J. Lu - June 23, 2010, 9:22 p.m.
On Wed, Jun 23, 2010 at 12:50 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Jun 23, 2010 at 12:36 PM, Paolo Bonzini <bonzini@gnu.org> wrote:
>> On 06/23/2010 09:35 PM, H.J. Lu wrote:
>>>
>>> On Wed, Jun 23, 2010 at 12:16 PM, Paolo Bonzini<bonzini@gnu.org>  wrote:
>>>>
>>>> On 06/23/2010 08:24 PM, H.J. Lu wrote:
>>>>>
>>>>>  [(set (match_operand:HI 0 "register_operand" "=a")
>>>>>         (div:HI
>>>>>           (match_operand:HI 1 "register_operand" "0")
>>>>>           (match_operand:QI 2 "nonimmediate_operand" "qm")))
>>>>>    (clobber (reg:CC FLAGS_REG))]
>>>>
>>>> Maybe this:
>>>>
>>>> [(set (match_operand:QI 0 "register_operand" "=a")
>>>>   (subreg:QI
>>>>    (div:HI
>>>>     (match_operand:HI 1 "register_operand" "0")
>>>>     (match_operand:QI 2 "nonimmediate_operand" "qm")) 0))
>>>>  (clobber (reg:CC FLAGS_REG))]
>>>>
>>>
>>> It doesn't make a big difference since only lower 8bit
>>> is set by it.
>>
>> For full divmod you have to shift mod and ior of course.  I understood that
>> you were talking of *<u>divqi3.
>>
>
> I can't do shift/ior on QI output from *<u>divqi3. I have
>
> ;; Divide AX by r/m8, with result stored in
> ;; AL <- Quotient
> ;; AH <- Remainder
> (define_insn "*<u>divqi3"
>  [(set (match_operand:QI 0 "register_operand" "=a")
>        (any_div:QI
>          (match_operand:HI 1 "register_operand" "0")
>          (match_operand:QI 2 "nonimmediate_operand" "qm")))
>   (clobber (reg:CC FLAGS_REG))]
>  "TARGET_QIMODE_MATH"
>  "<sgnprefix>div{b}\t%2"
>  [(set_attr "type" "idiv")
>   (set_attr "mode" "QI")])
>
> and use
>
> ;; Used to extract remainder from AH by 8bit divmod.  Use unspec so
> ;; that we can extract it from AL.
> (define_insn "*movqi_extzh"
>  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
>        (unspec:QI [(match_operand 1 "register_operand" "Q,Q")]
>                   UNSPEC_MOVQI_EXTZH))]
>
> to extract it from AL
>

Here is a different approach.  It uses UNSPEC for 8bit divmod.
The generated code is the same.

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ab90d73..1a31109 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,8 @@ 
   UNSPEC_REP
   UNSPEC_LD_MPIC	; load_macho_picbase
   UNSPEC_TRUNC_NOOP
+  UNSPEC_DIVQI
+  UNSPEC_UDIVQI
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -760,6 +762,10 @@ 
 
 ;; Used in signed and unsigned divisions.
 (define_code_iterator any_div [div udiv])
+(define_code_attr extend_code
+  [(div "SIGN_EXTEND") (udiv "ZERO_EXTEND")])
+(define_code_attr extract_code
+  [(div "SIGN_EXTRACT") (udiv "ZERO_EXTRACT")])
 
 ;; Instruction prefix for signed and unsigned operations.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
@@ -2563,6 +2569,59 @@ 
 	(const_string "SI")
 	(const_string "QI")))])
 
+(define_insn "*movqi_extzv_3"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "!TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (and (match_operand:QI 0 "register_operand" "")
+			(ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			     (ne (symbol_ref "TARGET_MOVX")
+				 (const_int 0))))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
+(define_insn "*movqi_extzv_3_rex64"
+  [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+	(zero_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+			 (const_int 8)
+			 (const_int 8)))]
+  "TARGET_64BIT"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_IMOVX:
+      return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+    default:
+      return "mov{b}\t{%h1, %0|%0, %h1}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+			(ne (symbol_ref "TARGET_MOVX")
+			    (const_int 0)))
+	(const_string "imovx")
+	(const_string "imov")))
+   (set (attr "mode")
+     (if_then_else (eq_attr "type" "imovx")
+	(const_string "SI")
+	(const_string "QI")))])
+
 (define_expand "mov<mode>_insv_1"
   [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "")
 			    (const_int 8)
@@ -7305,17 +7364,6 @@ 
 
 ;; Divide instructions
 
-(define_insn "<u>divqi3"
-  [(set (match_operand:QI 0 "register_operand" "=a")
-	(any_div:QI
-	  (match_operand:HI 1 "register_operand" "0")
-	  (match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "<sgnprefix>div{b}\t%2"
-  [(set_attr "type" "idiv")
-   (set_attr "mode" "QI")])
-
 ;; The patterns that match these are at the end of this file.
 
 (define_expand "divxf3"
@@ -7352,6 +7400,76 @@ 
 
 ;; Divmod instructions.
 
+(define_expand "<u>divmodqi4"
+  [(parallel [(set (match_operand:QI 0 "register_operand" "")
+		   (any_div:QI
+		     (match_operand:QI 1 "register_operand" "")
+		     (match_operand:QI 2 "nonimmediate_operand" "")))
+	      (set (match_operand:QI 3 "register_operand" "")
+		   (mod:QI (match_dup 1) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_QIMODE_MATH"
+{
+  rtx div, clobber, set;
+  rtx operand0, operand1;
+  
+  operand0 = gen_reg_rtx (HImode);
+  operand1 = gen_reg_rtx (HImode);
+  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+
+  /* Properly extend operands[1] to HImode.  */
+  set = gen_rtx_SET (VOIDmode, operand1,
+		     gen_rtx_<extend_code> (HImode, operands[1]));
+  if (<extend_code> == SIGN_EXTEND)
+    emit_insn (set);
+  else
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  /* Generate 8bit divide.  Result is in AX.  */
+  div = gen_rtx_UNSPEC (HImode, gen_rtvec (2, operand1, operands[2]),
+			(<extend_code> == SIGN_EXTEND
+			 ? UNSPEC_DIVQI : UNSPEC_UDIVQI));
+  div = gen_rtx_SET (VOIDmode, operand0, div);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, div, clobber)));
+
+  /* Extract remainder from AH.  */
+  operand1 = gen_rtx_<extract_code> (QImode, operand0,
+				     GEN_INT (8), GEN_INT (8));
+  emit_move_insn (operands[3], operand1);
+
+  /* Extract quotient from AL.  */
+  operand1 = simplify_gen_subreg (QImode, operand0, HImode, 0);
+  emit_move_insn (operands[0], operand1);
+  DONE;
+})
+
+;; Divide AX by r/m8, with result stored in
+;; AL <- Quotient
+;; AH <- Remainder
+(define_insn "*divqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:HI 1 "register_operand" "0")
+	   (match_operand:QI 2 "nonimmediate_operand" "qm")]
+	  UNSPEC_DIVQI))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "idiv{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
+(define_insn "*udivqi3"
+  [(set (match_operand:HI 0 "register_operand" "=a")
+	(unspec:HI
+	  [(match_operand:HI 1 "register_operand" "0")
+	   (match_operand:QI 2 "nonimmediate_operand" "qm")]
+	  UNSPEC_UDIVQI))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_QIMODE_MATH"
+  "div{b}\t%2"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "QI")])
+
 (define_expand "divmod<mode>4"
   [(parallel [(set (match_operand:SWIM248 0 "register_operand" "")
 		   (div:SWIM248
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-1.c	2010-06-21 12:01:25.705950180 -0700
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-2.c	2010-06-21 12:01:17.857932744 -0700
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern unsigned char z;
+
+unsigned char
+foo (unsigned char x, unsigned char y)
+{
+  z = x/y;
+  return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */
--- /dev/null	2010-06-16 10:11:06.602750711 -0700
+++ gcc/gcc/testsuite/gcc.target/i386/umod-3.c	2010-06-21 12:02:36.809962702 -0700
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=atom" } */
+
+extern void abort (void);
+extern void exit (int);
+
+unsigned char cx = 7;
+
+int
+main ()
+{
+  unsigned char cy;
+  
+  cy = cx / 6; if (cy != 1) abort ();
+  cy = cx % 6; if (cy != 1) abort ();
+
+  exit(0);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-not "divw" } } */