Patchwork RFC: PATCH: Add -m8bit-idiv for x86

login
register
mail settings
Submitter H.J. Lu
Date Sept. 16, 2010, 12:40 a.m.
Message ID <AANLkTi=1xtZ9+H+V8Oy6Hqchvtk8=V0GDSBq0vjpkEGF@mail.gmail.com>
Download mbox | patch
Permalink /patch/64930/
State New
Headers show

Comments

H.J. Lu - Sept. 16, 2010, 12:40 a.m.
On Wed, Sep 15, 2010 at 4:08 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Sep 16, 2010 at 12:37 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>>>>>> This patch generates 2 idivbs since the optimization is done at RTL
>>>>>>>> expansion. Is there a way to delay this until later when 2 idivls are
>>>>>>>> optimized into 1 idivl and before IRA since this optimization needs
>>>>>>>> a scratch register.
>>>>>>>
>>>>>>> Splitter with && can_create_pseudo_p () split constraint will limit
>>>>>>> splits to pre-regalloc passes, or ...
>>>>>>
>>>>>> try_split doesn't allow any insn of the result matches the original pattern
>>>>>> to avoid infinite loop.
>>>>>
>>>>> So, switch the places of div and mod RTXes in the parallel and provide
>>>>> another divl_1 insn pattern that matches this new parallel.
>>>>>
>>>>
>>>> Here is the updated patch.  I added 2 splitters for each divmod pattern.
>>>> It splits 32bit divmod into
>>>>
>>>> if (dividend and divisor are in [0-255])
>>>>  use 8bit unsigned integer divide
>>>> else
>>>>  use 32bit integer divide
>>>>
>>>> before IRA. It works quite well.  OK for trunk if there are no regressions
>>>> on Linux./ia32 and Linux/x86-64?
>
> Some comments on new code below...
>
>> +extern void ix86_split_idivmod (bool, enum machine_mode, rtx[]);
>
> Can you please add bool as the last argument (this is the way all
> other functions are defined).
>
>> +;; Math-dependant single word integer modes without QImode and HImode.
>> +(define_mode_iterator SWIM48 [SI (DI "TARGET_64BIT")])
>
> This is the same as SWI48, please use SWI48 instead (SI and DI modes
> are not dependant on TARGET_{QI,HI}MODE_MATH).
>
>> +(define_expand "testdi_ccno_1"
>> +  [(set (reg:CCNO FLAGS_REG)
>> +     (compare:CCNO
>> +       (and:DI (match_operand:DI 0 "nonimmediate_operand" "")
>> +               (match_operand:DI 1 "nonmemory_operand" ""))
>> +       (const_int 0)))]
>> +  "TARGET_64BIT"
>> +  "")
>
> Uh, no. There is no DImode immediate operand. You should use
> "x86_64_szext_general_operand" predicate with "&& !(MEM_P
> (operands[0]) && MEM_P (operands[1]))" insn predicate, to match
> *testdi_1 insn.
>
> OK with these changes, but please wait a day or two for eventual
> comments from other people involved in this discussion.

This is the patch I am going to commit in a day or 2.

Thanks.

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 900b424..d466fa1 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -129,6 +129,7 @@  extern void ix86_split_ashr (rtx *, rtx, enum machine_mode);
 extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
 extern rtx ix86_find_base_term (rtx);
 extern bool ix86_check_movabs (rtx, int);
+extern void ix86_split_idivmod (enum machine_mode, rtx[], bool);
 
 extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
 extern int ix86_attr_length_immediate_default (rtx, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 19d6387..9315a42 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1985,6 +1985,7 @@  static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
 static void ix86_add_new_builtins (int);
 static rtx ix86_expand_vec_perm_builtin (tree);
 static tree ix86_canonical_va_list_type (tree);
+static void predict_jump (int);
 
 enum ix86_function_specific_strings
 {
@@ -2629,6 +2630,7 @@  ix86_target_string (int isa, int flags, const char *arch, const char *tune,
     { "-msseregparm",			MASK_SSEREGPARM },
     { "-mstack-arg-probe",		MASK_STACK_PROBE },
     { "-mtls-direct-seg-refs",		MASK_TLS_DIRECT_SEG_REFS },
+    { "-m8bit-idiv",			MASK_USE_8BIT_IDIV },
   };
 
   const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
@@ -14651,6 +14653,107 @@  ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
     emit_move_insn (operands[0], dst);
 }
 
+/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
+   divisor are within the the range [0-255].  */
+
+void
+ix86_split_idivmod (enum machine_mode mode, rtx operands[],
+		    bool signed_p)
+{
+  rtx end_label, qimode_label;
+  rtx insn, div, mod;
+  rtx scratch, tmp0, tmp1, tmp2;
+  rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
+  rtx (*gen_zero_extend) (rtx, rtx);
+  rtx (*gen_test_ccno_1) (rtx, rtx);
+
+  switch (mode)
+    {
+    case SImode:
+      gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
+      gen_test_ccno_1 = gen_testsi_ccno_1;
+      gen_zero_extend = gen_zero_extendqisi2;
+      break;
+    case DImode:
+      gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
+      gen_test_ccno_1 = gen_testdi_ccno_1;
+      gen_zero_extend = gen_zero_extendqidi2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  end_label = gen_label_rtx ();
+  qimode_label = gen_label_rtx ();
+
+  scratch = gen_reg_rtx (mode);
+
+  /* Use 8bit unsigned divimod if dividend and divisor are within the
+     the range [0-255].  */
+  emit_move_insn (scratch, operands[2]);
+  scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
+				 scratch, 1, OPTAB_DIRECT);
+  emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
+  tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
+  tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
+			       gen_rtx_LABEL_REF (VOIDmode, qimode_label),
+			       pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = qimode_label;
+
+  /* Generate original signed/unsigned divimod.  */
+  div = gen_divmod4_1 (operands[0], operands[1],
+		       operands[2], operands[3]);
+  emit_insn (div);
+
+  /* Branch to the end.  */
+  emit_jump_insn (gen_jump (end_label));
+  emit_barrier ();
+
+  /* Generate 8bit unsigned divide.  */
+  emit_label (qimode_label);
+  /* Don't use operands[0] for result of 8bit divide since not all
+     registers support QImode ZERO_EXTRACT.  */
+  tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
+  tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
+  tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
+  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
+
+  if (signed_p)
+    {
+      div = gen_rtx_DIV (SImode, operands[2], operands[3]);
+      mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
+    }
+  else
+    {
+      div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
+      mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
+    }
+
+  /* Extract remainder from AH.  */
+  tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
+  if (REG_P (operands[1]))
+    insn = emit_move_insn (operands[1], tmp1);
+  else
+    {
+      /* Need a new scratch register since the old one has result 
+	 of 8bit divide.  */
+      scratch = gen_reg_rtx (mode);
+      emit_move_insn (scratch, tmp1);
+      insn = emit_move_insn (operands[1], scratch);
+    }
+  set_unique_reg_note (insn, REG_EQUAL, mod);
+
+  /* Zero extend quotient from AL.  */
+  tmp1 = gen_lowpart (QImode, tmp0);
+  insn = emit_insn (gen_zero_extend (operands[0], tmp1));
+  set_unique_reg_note (insn, REG_EQUAL, div);
+
+  emit_label (end_label);
+}
+
 #define LEA_SEARCH_THRESHOLD 12
 
 /* Search backward for non-agu definition of register number REGNO1
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 45e82e0..a71cdd0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -7341,6 +7341,59 @@ 
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(div:SWI48 (match_operand:SWI48 2 "register_operand" "")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "")))
+   (set (match_operand:SWI48 1 "register_operand" "")
+	(mod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, true); DONE;")
+
+(define_insn_and_split "divmod<mode>4_1"
+  [(set (match_operand:SWI48 1 "register_operand" "=&d")
+	(mod:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		   (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 0 "register_operand" "=a")
+	(div:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(parallel [(set (match_dup 1)
+		   (ashiftrt:SWI48 (match_dup 4) (match_dup 5)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 0)
+	           (div:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (mod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
+
+  if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+    operands[4] = operands[2];
+  else
+    {
+      /* Avoid use of cltd in favor of a mov+shift.  */
+      emit_move_insn (operands[1], operands[2]);
+      operands[4] = operands[1];
+    }
+}
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*divmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
 	(div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7386,6 +7439,46 @@ 
   [(set_attr "type" "multi")
    (set_attr "mode" "<MODE>")])
 
+;; Split with 8bit unsigned divide:
+;; 	if (dividend an divisor are in [0-255])
+;;	   use 8bit unsigned integer divide
+;;	 else
+;;	   use original integer divide
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(udiv:SWI48 (match_operand:SWI48 2 "register_operand" "")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "")))
+   (set (match_operand:SWI48 1 "register_operand" "")
+	(umod:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_8BIT_IDIV
+   && TARGET_QIMODE_MATH
+   && can_create_pseudo_p ()
+   && !optimize_insn_for_size_p ()"
+  [(const_int 0)]
+  "ix86_split_idivmod (<MODE>mode, operands, false); DONE;")
+
+(define_insn_and_split "udivmod<mode>4_1"
+  [(set (match_operand:SWI48 1 "register_operand" "=&d")
+	(umod:SWI48 (match_operand:SWI48 2 "register_operand" "0")
+		    (match_operand:SWI48 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWI48 0 "register_operand" "=a")
+	(udiv:SWI48 (match_dup 2) (match_dup 3)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(set (match_dup 1) (const_int 0))
+   (parallel [(set (match_dup 0)
+		   (udiv:SWI48 (match_dup 2) (match_dup 3)))
+	      (set (match_dup 1)
+		   (umod:SWI48 (match_dup 2) (match_dup 3)))
+	      (use (match_dup 1))
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*udivmod<mode>4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
 	(udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0")
@@ -7440,6 +7533,15 @@ 
   ""
   "")
 
+(define_expand "testdi_ccno_1"
+  [(set (reg:CCNO FLAGS_REG)
+	(compare:CCNO
+	  (and:DI (match_operand:DI 0 "nonimmediate_operand" "")
+		  (match_operand:DI 1 "x86_64_szext_general_operand" ""))
+	  (const_int 0)))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "")
+
 (define_insn "*testdi_1"
   [(set (reg FLAGS_REG)
 	(compare
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 5790e76..aa78cdf 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -388,3 +388,7 @@  Support F16C built-in functions and code generation
 mfentry
 Target Report Var(flag_fentry) Init(-1)
 Emit profiling counter call at function entry before prologue.
+
+m8bit-idiv
+Target Report Mask(USE_8BIT_IDIV) Save
+Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b354382..08d929a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -602,7 +602,7 @@  Objective-C and Objective-C++ Dialects}.
 -momit-leaf-frame-pointer  -mno-red-zone -mno-tls-direct-seg-refs @gol
 -mcmodel=@var{code-model} -mabi=@var{name} @gol
 -m32  -m64 -mlarge-data-threshold=@var{num} @gol
--msse2avx -mfentry}
+-msse2avx -mfentry -m8bit-idiv}
 
 @emph{IA-64 Options}
 @gccoptlist{-mbig-endian  -mlittle-endian  -mgnu-as  -mgnu-ld  -mno-pic @gol
@@ -12647,6 +12647,16 @@  If profiling is active @option{-pg} put the profiling
 counter call before prologue.
 Note: On x86 architectures the attribute @code{ms_hook_prologue}
 isn't possible at the moment for @option{-mfentry} and @option{-pg}.
+
+@item -m8bit-idiv
+@itemx -mno-8bit-idiv
+@opindex 8bit-idiv
+On some processors, like Intel Atom, 8bit unsigned integer divide is
+much faster than 32bit/64bit integer divide.  This option will generate a
+runt-time check.  If both dividend and divisor are within range of 0
+to 255, 8bit unsigned integer divide will be used instead of
+32bit/64bit integer divide.
+
 @end table
 
 These @samp{-m} switches are supported in addition to the above
diff --git a/gcc/testsuite/gcc.target/i386/divmod-1.c b/gcc/testsuite/gcc.target/i386/divmod-1.c
new file mode 100644
index 0000000..2769a21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-1.c
@@ -0,0 +1,30 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (int x, int y, int q, int r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+int
+main ()
+{
+  test (7, 6, 1, 1);
+  test (-7, -6, 1, -1);
+  test (-7, 6, -1, -1);
+  test (7, -6, -1, 1);
+  test (255, 254, 1, 1);
+  test (256, 254, 1, 2);
+  test (256, 256, 1, 0);
+  test (254, 256, 0, 254);
+  test (254, 255, 0, 254);
+  test (254, 1, 254, 0);
+  test (255, 2, 127, 1);
+  test (1, 256, 0, 1);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/divmod-2.c b/gcc/testsuite/gcc.target/i386/divmod-2.c
new file mode 100644
index 0000000..0e73b27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-2.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+int
+foo (int x, int y)
+{
+   return x / y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/divmod-3.c b/gcc/testsuite/gcc.target/i386/divmod-3.c
new file mode 100644
index 0000000..4b84436
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-3.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+int
+foo (int x, int y)
+{
+   return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/divmod-4.c b/gcc/testsuite/gcc.target/i386/divmod-4.c
new file mode 100644
index 0000000..7124d7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-4.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (int x, int y, int q, int r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/divmod-4a.c b/gcc/testsuite/gcc.target/i386/divmod-4a.c
new file mode 100644
index 0000000..572b3df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-4a.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Os -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (int x, int y, int q, int r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+/* { dg-final { scan-assembler-not "divb" } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/divmod-5.c b/gcc/testsuite/gcc.target/i386/divmod-5.c
new file mode 100644
index 0000000..8d179be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-5.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (int, int, int, int, int, int);
+
+void
+bar (int x, int y)
+{
+  foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/divmod-6.c b/gcc/testsuite/gcc.target/i386/divmod-6.c
new file mode 100644
index 0000000..c79dba0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-6.c
@@ -0,0 +1,30 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (long long x, long long y, long long q, long long r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+int
+main ()
+{
+  test (7, 6, 1, 1);
+  test (-7, -6, 1, -1);
+  test (-7, 6, -1, -1);
+  test (7, -6, -1, 1);
+  test (255, 254, 1, 1);
+  test (256, 254, 1, 2);
+  test (256, 256, 1, 0);
+  test (254, 256, 0, 254);
+  test (254, 255, 0, 254);
+  test (254, 1, 254, 0);
+  test (255, 2, 127, 1);
+  test (1, 256, 0, 1);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/divmod-7.c b/gcc/testsuite/gcc.target/i386/divmod-7.c
new file mode 100644
index 0000000..20a4cd3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-7.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+/* { dg-require-effective-target lp64 } */
+
+extern void abort (void);
+
+void
+test (long long x, long long y, long long q, long long r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivq" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/divmod-8.c b/gcc/testsuite/gcc.target/i386/divmod-8.c
new file mode 100644
index 0000000..5192b98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/divmod-8.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (long long, long long, long long, long long,
+		 long long, long long);
+
+void
+bar (long long x, long long y)
+{
+  foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "idivq" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-1.c b/gcc/testsuite/gcc.target/i386/udivmod-1.c
new file mode 100644
index 0000000..eebd843
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-1.c
@@ -0,0 +1,31 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+int
+main ()
+{
+  test (7, 6, 1, 1);
+  test (255, 254, 1, 1);
+  test (256, 254, 1, 2);
+  test (256, 256, 1, 0);
+  test (254, 256, 0, 254);
+  test (254, 255, 0, 254);
+  test (254, 1, 254, 0);
+  test (255, 2, 127, 1);
+  test (1, 256, 0, 1);
+  test (0x80000000, 0x7fffffff, 1, 1);
+  test (0x7fffffff, 0x80000000, 0, 0x7fffffff);
+  test (0x80000000, 0x80000003, 0, 0x80000000);
+  test (0xfffffffd, 0xfffffffe, 0, 0xfffffffd);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-2.c b/gcc/testsuite/gcc.target/i386/udivmod-2.c
new file mode 100644
index 0000000..2bba8f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-2.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+unsigned int
+foo (unsigned int x, unsigned int y)
+{
+   return x / y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-3.c b/gcc/testsuite/gcc.target/i386/udivmod-3.c
new file mode 100644
index 0000000..f2ac4e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-3.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+unsigned int
+foo (unsigned int x, unsigned int y)
+{
+   return x % y;
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-4.c b/gcc/testsuite/gcc.target/i386/udivmod-4.c
new file mode 100644
index 0000000..14dd87c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-4.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-4a.c b/gcc/testsuite/gcc.target/i386/udivmod-4a.c
new file mode 100644
index 0000000..f1ff389
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-4a.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Os -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+test (unsigned int x, unsigned int y, unsigned int q, unsigned int r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+/* { dg-final { scan-assembler-not "divb" } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-5.c b/gcc/testsuite/gcc.target/i386/udivmod-5.c
new file mode 100644
index 0000000..7c31a0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-5.c
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (unsigned int, unsigned int, unsigned int,
+		 unsigned int, unsigned int, unsigned int);
+
+void
+bar (unsigned int x, unsigned int y)
+{
+  foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divl" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-6.c b/gcc/testsuite/gcc.target/i386/udivmod-6.c
new file mode 100644
index 0000000..d774171
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-6.c
@@ -0,0 +1,32 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void abort (void);
+
+void
+__attribute__((noinline))
+test (unsigned long long x, unsigned long long y,
+      unsigned long long q, unsigned long long r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+int
+main ()
+{
+  test (7, 6, 1, 1);
+  test (255, 254, 1, 1);
+  test (256, 254, 1, 2);
+  test (256, 256, 1, 0);
+  test (254, 256, 0, 254);
+  test (254, 255, 0, 254);
+  test (254, 1, 254, 0);
+  test (255, 2, 127, 1);
+  test (1, 256, 0, 1);
+  test (0x80000000, 0x7fffffff, 1, 1);
+  test (0x7fffffff, 0x80000000, 0, 0x7fffffff);
+  test (0x80000000, 0x80000003, 0, 0x80000000);
+  test (0xfffffffd, 0xfffffffe, 0, 0xfffffffd);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-7.c b/gcc/testsuite/gcc.target/i386/udivmod-7.c
new file mode 100644
index 0000000..14a065f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-7.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+/* { dg-require-effective-target lp64 } */
+
+extern void abort (void);
+
+void
+test (unsigned long long x, unsigned long long y,
+      unsigned long long q, unsigned long long r)
+{
+  if ((x / y) != q || (x % y) != r)
+    abort ();
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divq" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/udivmod-8.c b/gcc/testsuite/gcc.target/i386/udivmod-8.c
new file mode 100644
index 0000000..16459fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/udivmod-8.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O2 -m8bit-idiv" } */
+
+extern void foo (unsigned long long, unsigned long long,
+		 unsigned long long, unsigned long long,
+		 unsigned long long, unsigned long long);
+
+void
+bar (unsigned long long x, unsigned long long y)
+{
+  foo (0, 0, 0, 0, x / y, x % y);
+}
+
+/* { dg-final { scan-assembler-times "divb" 1 } } */
+/* { dg-final { scan-assembler-times "divq" 1 } } */