Patchwork [testsuite,i386] BMI2 support for GCC

login
register
mail settings
Submitter Uros Bizjak
Date Aug. 20, 2011, 10:55 p.m.
Message ID <CAFULd4aWosYONxxfhLU901+8O6rNHPjOTuZnUw_6RY-hk6rORw@mail.gmail.com>
Download mbox | patch
Permalink /patch/110791/
State New
Headers show

Comments

Uros Bizjak - Aug. 20, 2011, 10:55 p.m.
On Sat, Aug 20, 2011 at 11:47 PM, Richard Henderson <rth@redhat.com> wrote:

>> - Yb register constraint is added to conditionally enable generation
>> of BMI alternatives in generic shift and rotate patterns. The BMI
>> variant is generated only if RA chooses it as the most profitable
>> alternative.
>
> We really should use the (relatively new) enabled attribute instead
> of adding more and more conditional register constraints.

Indeed. New version is attached - this one also implements imul
splitting to mulx.

2011-08-20  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (type): Add imulx, ishiftx and rotatex.
	(length_immediate): Handle imulx, ishiftx and rotatex.
	(imm_disp): Ditto.
	(isa): Add bmi2.
	(enabled): Handle bmi2.
	(w): New mode attribute.

	(*mul<mode><dwi>3): Split from *<u>mul<mode><dwi>3.
	(*umul<mode><dwi>3): Ditto.  Add imulx BMI2 alternative.
	(bmi2_umul<mode><dwi>3_1): New insn pattern.
	(*umul<mode><dwi>3 splitter): New splitter to avoid flags dependency.

	(*bmi2_ashl<mode>3_1): New insn pattern.
	(*ashl<mode>3_1): Add ishiftx BMI2 alternative.
	(*ashl<mode>3_1 splitter): New splitter to avoid flags dependency.
	(*bmi2_ashlsi3_1_zext): New insn pattern.
	(*ashlsi3_1_zext): Add ishiftx BMI2 alternative.
	(*ashlsi3_1_zext splitter): New splitter to avoid flags dependency.

	(*bmi2_<shiftrt_insn><mode>3_1): New insn pattern.
	(*<shiftrt_insn><mode>3_1): Add ishiftx BMI2 alternative.
	(*<shiftrt_insn><mode>3_1 splitter): New splitter to avoid
	flags dependency.
	(*bmi2_<shiftrt_insn>si3_1_zext): New insn pattern.
	(*<shiftrt_insn>si3_1_zext): Add ishiftx BMI2 alternative.
	(*<shiftrt_insn>si3_1_zext splitter): New splitter to avoid
	flags dependency.

	(*bmi2_rorx<mode>3_1): New insn pattern.
	(*<rotate_insn><mode>3_1): Add rotatex BMI2 alternative.
	(*rotate<mode>3_1 splitter): New splitter to avoid flags dependency.
	(*rotatert<mode>3_1 splitter): Ditto.
	(*bmi2_rorxsi3_1_zext): New insn pattern.
	(*<rotate_insn>si3_1_zext): Add rotatex BMI2 alternative.
	(*rotatesi3_1_zext  splitter): New splitter to avoid flags dependency.
	(*rotatertsi3_1_zext splitter): Ditto.

	* config/i386/i386.c (print_reg): Handle 'M' and 'N' modifiers.
	(print_operand): Ditto.

Bootstrapped on x86_64-pc-linux-gnu, regression test in progress.

Uros.
Jakub Jelinek - Aug. 21, 2011, 10:20 a.m.
On Sun, Aug 21, 2011 at 12:55:41AM +0200, Uros Bizjak wrote:
>  (define_attr "enabled" ""
> -  (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
> +  (cond [(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI")

Shouldn't this be TARGET_BMI2 ?

> +  	 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
>  	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
>  	]
>  	(const_int 1)))

	Jakub

Patch

Index: i386/i386.md
===================================================================
--- i386/i386.md	(revision 177925)
+++ i386/i386.md	(working copy)
@@ -50,6 +50,8 @@ 
 ;; t --  likewise, print the V8SFmode name of the register.
 ;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
 ;; y -- print "st(0)" instead of "st" as a register.
+;; M -- print the low register of a double word register pair.
+;; N -- print the high register of a double word register pair.
 ;; d -- print duplicated register operand for AVX instruction.
 ;; D -- print condition for SSE cmp instruction.
 ;; P -- if PIC, print an @PLT suffix.
@@ -377,7 +379,7 @@ 
 (define_attr "type"
   "other,multi,
    alu,alu1,negnot,imov,imovx,lea,
-   incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+   incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
    str,bitmanip,
@@ -414,8 +416,8 @@ 
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
-	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
-			  imul,icmp,push,pop")
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
+			  rotate,rotatex,rotate1,imul,imulx,icmp,push,pop")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
 	 (eq_attr "type" "imov,test")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
@@ -675,7 +677,7 @@ 
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 1 "immediate_operand" "")))
 	   (const_string "true")
-	 (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+	 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,imulx,idiv")
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 2 "immediate_operand" "")))
 	   (const_string "true")
@@ -699,11 +701,12 @@ 
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,noavx,avx"
+(define_attr "isa" "base,bmi2,noavx,avx"
   (const_string "base"))
 
 (define_attr "enabled" ""
-  (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
+  (cond [(eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI")
+  	 (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
 	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
 	]
 	(const_int 1)))
@@ -947,6 +950,9 @@ 
 ;; Instruction suffix for REX 64bit operators.
 (define_mode_attr rex64suffix [(SI "") (DI "{q}")])
 
+;; Register prefix for word mode.
+(define_mode_attr w [(SI "k") (DI "q")])
+
 ;; This mode iterator allows :P to be used for patterns that operate on
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
@@ -6849,16 +6855,71 @@ 
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_QIMODE_MATH")
 
-(define_insn "*<u>mul<mode><dwi>3_1"
+(define_insn "*bmi2_umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=r")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%d"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))]
+  "TARGET_BMI
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%2, %M0, %N0|%N0, %M0, %2}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A,r")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0,d"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   imul{<imodesuffix>}\t%2
+   #"
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "imul,imulx")
+   (set_attr "length_immediate" "0,*")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "alternative" "0")
+		 (if_then_else (eq_attr "cpu" "athlon")
+		   (const_string "vector")
+		   (const_string "double"))]
+	      (const_string "*")))
+   (set_attr "amdfam10_decode" "double,*")
+   (set_attr "bdver1_decode" "direct,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+;; Convert umul to umulx pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:<DWI> 0 "register_operand" "")
+       (mult:<DWI>
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 1 "nonimmediate_operand" ""))
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 2 "nonimmediate_operand" ""))))
+  (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI && reload_completed"
+ [(set (match_dup 0)
+       (mult:<DWI>
+	 (zero_extend:<DWI> (match_dup 1))
+         (zero_extend:<DWI> (match_dup 2))))])
+
+(define_insn "*mul<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "register_operand" "=A")
 	(mult:<DWI>
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "<sgnprefix>mul{<imodesuffix>}\t%2"
+  "imul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -9056,16 +9117,26 @@ 
   [(set_attr "type" "ishift")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*bmi2_ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		      (match_operand:QI 2 "register_operand" "r")))]
+  "TARGET_BMI"
+  "salx\t{%<w>2, %1, %0|%0, %1, %<w>2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
-	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9081,9 +9152,12 @@ 
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		          (const_int 0))
 		      (match_operand 0 "register_operand" ""))
@@ -9102,17 +9176,38 @@ 
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:QI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI"
+  "salx\t{%k2, %1, %k0|%k0, %1, %k2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
 (define_insn "*ashlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
-		     (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9127,9 +9222,12 @@ 
 	return "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		     (const_int 0))
 		 (match_operand 2 "const1_operand" ""))
@@ -9147,6 +9245,17 @@ 
        (const_string "*")))
    (set_attr "mode" "SI")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))])
+
 (define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
@@ -9763,20 +9872,38 @@ 
   DONE;
 })
 
+(define_insn "*bmi2_<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			   (match_operand:QI 2 "register_operand" "r")))]
+  "TARGET_BMI"
+  "<shiftrt>x\t{%<w>2, %1, %0|%0, %1, %<w>2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<shiftrt_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
-  else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{<imodesuffix>}\t%0";
+      else
+	return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand" "")
@@ -9786,19 +9913,82 @@ 
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<shiftrt_insn>si3_1_zext"
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			   (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_<shiftrt_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
-			  (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			  (match_operand:QI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI"
+  "<shiftrt>x\t{%k2, %1, %k0|%k0, %1, %k2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{l}\t%k0";
+      else
+	return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+			  (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{l}\t%k0";
+    return "<shiftrt>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9808,7 +9998,7 @@ 
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<shiftrt_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -10060,42 +10250,151 @@ 
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
 
+(define_insn "*bmi2_rorx<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			(match_operand:QI 2 "immediate_operand" "<S>")))]
+  "TARGET_BMI"
+  "rorx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<rotate_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			(match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{<imodesuffix>}\t%0";
-  else
-    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{<imodesuffix>}\t%0";
+      else
+	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "rotate")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
    (set (attr "length_immediate")
      (if_then_else
-       (and (match_operand 2 "const1_operand" "")
-	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-		(const_int 0)))
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
        (const_string "0")
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<rotate_insn>si3_1_zext"
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			(match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_rorxsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		       (match_operand:QI 2 "immediate_operand" "I"))))]
+  "TARGET_64BIT && TARGET_BMI"
+  "rorx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
-    if (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{l}\t%k0";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{l}\t%k0";
+      else
+	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		       (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
   else
-    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "rotate")
    (set (attr "length_immediate")
@@ -10105,7 +10404,7 @@ 
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<rotate_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
Index: i386/i386.c
===================================================================
--- i386/i386.c	(revision 177928)
+++ i386/i386.c	(working copy)
@@ -13285,6 +13285,8 @@  put_condition_code (enum rtx_code code, enum machi
    If CODE is 't', pretend the mode is V8SFmode.
    If CODE is 'h', pretend the reg is the 'high' byte register.
    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+   If CODE is 'M', print the low register of a double word register pair.
+   If CODE is 'N', print the high register of a double word register pair.
    If CODE is 'd', duplicate the operand for AVX instruction.
  */
 
@@ -13327,6 +13329,18 @@  print_reg (rtx x, int code, FILE *file)
     code = 16;
   else if (code == 't')
     code = 32;
+  else if (code == 'M')
+    {
+      gcc_assert (GET_MODE (x) == GET_MODE_WIDER_MODE (word_mode));
+      x = gen_lowpart (word_mode, x);
+      code = GET_MODE_SIZE (word_mode);
+    }
+  else if (code == 'N')
+    {
+      gcc_assert (GET_MODE (x) == GET_MODE_WIDER_MODE (word_mode));
+      x = gen_highpart (word_mode, x);
+      code = GET_MODE_SIZE (word_mode);
+    }
   else
     code = GET_MODE_SIZE (GET_MODE (x));
 
@@ -13472,6 +13486,8 @@  get_some_local_dynamic_name (void)
    t --  likewise, print the V8SFmode name of the register.
    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
    y -- print "st(0)" instead of "st" as a register.
+   M -- print the low register of a double word register pair.
+   N -- print the high register of a double word register pair.
    d -- print duplicated register operand for AVX instruction.
    D -- print condition for SSE cmp instruction.
    P -- if PIC, print an @PLT suffix.
@@ -13678,6 +13694,8 @@  ix86_print_operand (FILE *file, rtx x, int code)
 	case 'h':
 	case 't':
 	case 'y':
+	case 'M':
+	case 'N':
 	case 'x':
 	case 'X':
 	case 'P':