diff mbox

[i386] : Improve zero_extend patterns

Message ID CAFULd4Yaq9kD7BVD3bR7XLBX6F7TPeS-Pz1oyARF8VRXXd_adw@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak March 4, 2012, 6:52 p.m. UTC
Hello!

Attached patch improves zero_extend patterns by:
- removing flags reg clobber from zero_extendsidi patterns for 32bit
targets. Everything, including "movl $0, mem" can be split without
using flags reg clobber.
- removing intermediate *zero_extend*2_movzbl_and patterns. We do not
need to remove any fake clobbers in !TARGET_ZERO_EXTEND_WITH_AND case
anymore
- adding o,0 and x,x register alternatives. We can split matching
memory to load 0 in highpart for 64bit and 32bit targets, and movd
zero extends also in xmm->xmm case
- truly splitting "and" RTXes to zero_extend RTXes when appropriate
(but only in !TARGET_ZERO_EXTEND_WITH_AND case), again removing
unneeded flags reg clobbers
- fixing TARGET_ZERO_EXTEND_WITH_AND peephole2

2012-03-04  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/constraints.md (Ya): New internal constraint.
	* config/i386/i386.md (zero_extendsidi2): Remove expansion.
	(*zero_extendsidi2_rex64): Add x,x alternative.
	(*zero_extendsidi2): Ditto.  Add o,0 alternative.
	Remove flags reg clobber.  Adjust corresponding splits.
	(zero_extend<mode>si2): Macroize expander from zero_extendhisi2 and
	zero_extendqisi2 expanders using SWI12 mode iterator.
	(zero_extend<mode>si2_and): Macroize insn from
	zero_extendhisi2_and and zero_extendqisi2_and.  Merge corresponding
	splitters.
	(*zero_extend<mode>si2):  Macroize insn from
	*zero_extendhisi2_movzbl and *zero_extendqisi2_movzbl.
	(*zero_extend*2_movzbl_and): Remove insn patterns.
	(zero_extendqihi2_and): Merge corresponding splitter.
	(*zero_extendqihi2): Rename from *zero_extendqihi2_movzbl.
	(*zero_extend*2_movzbl_and): Remove insn patterns.
	(*anddi_1): Split TYPE_IMOVX instructions.
	(*andsi_1): Use Ya for alternative 2.  Split TYPE_IMOVX instructions.
	(*andhi_1): Ditto.
	(and->zext splitter): Add splitter pattern.
	(zero extend with andsi3 splitter): Adjust zero_extend pattern.

Patch was tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.

Uros.
diff mbox

Patch

Index: config/i386/constraints.md
===================================================================
--- config/i386/constraints.md	(revision 184886)
+++ config/i386/constraints.md	(working copy)
@@ -89,6 +89,7 @@ 
 ;;  z	First SSE register.
 ;;  i	SSE2 inter-unit moves enabled
 ;;  m	MMX inter-unit moves enabled
+;;  a	Integer register when zero extensions with AND are disabled
 ;;  p	Integer register when TARGET_PARTIAL_REG_STALL is disabled
 ;;  d	Integer register when integer DFmode moves are enabled
 ;;  x	Integer register when integer XFmode moves are enabled
@@ -108,6 +109,11 @@ 
  "TARGET_PARTIAL_REG_STALL ? NO_REGS : GENERAL_REGS"
  "@internal Any integer register when TARGET_PARTIAL_REG_STALL is disabled.")
 
+(define_register_constraint "Ya"
+ "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)
+  ? NO_REGS : GENERAL_REGS"
+ "@internal Any integer register when zero extensions with AND are disabled.")
+
 (define_register_constraint "Yd"
  "(TARGET_64BIT
    || (TARGET_INTEGER_DFMODE_MOVES && optimize_function_for_speed_p (cfun)))
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 184886)
+++ config/i386/i386.md	(working copy)
@@ -3371,20 +3371,14 @@ 
 
 (define_expand "zero_extendsidi2"
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
-  ""
-{
-  if (!TARGET_64BIT)
-    {
-      emit_insn (gen_zero_extendsidi2_1 (operands[0], operands[1]));
-      DONE;
-    }
-})
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))])
 
 (define_insn "*zero_extendsidi2_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand"  "=r,o,?*Ym,?*y,?*Yi,*x")
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+			"=r,o,?*Ym,?*y,?*Yi,!*x")
 	(zero_extend:DI
-	 (match_operand:SI 1 "nonimmediate_operand" "rm,0,r   ,m  ,r   ,m")))]
+	 (match_operand:SI 1 "nonimmediate_operand"
+	        	"rm,0,r   ,m  ,r   ,m*x")))]
   "TARGET_64BIT"
   "@
    mov{l}\t{%1, %k0|%k0, %1}
@@ -3393,24 +3387,17 @@ 
    movd\t{%1, %0|%0, %1}
    %vmovd\t{%1, %0|%0, %1}
    %vmovd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+  [(set_attr "isa" "*,*,*,*,*,sse2")
+   (set_attr "type" "imovx,multi,mmxmov,mmxmov,ssemov,ssemov")
    (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
    (set_attr "prefix_0f" "0,*,*,*,*,*")
-   (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
+   (set_attr "mode" "SI,SI,DI,DI,TI,TI")])
 
-(define_split
-  [(set (match_operand:DI 0 "memory_operand" "")
-     	(zero_extend:DI (match_dup 0)))]
-  "TARGET_64BIT"
-  [(set (match_dup 4) (const_int 0))]
-  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
-
-;; %%% Kill me once multi-word ops are sane.
-(define_insn "zero_extendsidi2_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*x")
-	(zero_extend:DI
-	 (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r   ,m  ,r   ,m")))
-   (clobber (reg:CC FLAGS_REG))]
+(define_insn "*zero_extendsidi2"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+			"=ro,?r,?o,?*Ym,?*y,?*Yi,!*x")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand"
+			"0  ,rm,r ,r   ,m  ,r   ,m*x")))]
   "!TARGET_64BIT"
   "@
    #
@@ -3426,19 +3413,26 @@ 
    (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")])
 
 (define_split
+  [(set (match_operand:DI 0 "memory_operand" "")
+     	(zero_extend:DI (match_operand:SI 1 "memory_operand" "")))]
+  "reload_completed"
+  [(set (match_dup 4) (const_int 0))]
+  "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
   [(set (match_operand:DI 0 "register_operand" "")
-	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))]
   "!TARGET_64BIT && reload_completed
+   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
    && true_regnum (operands[0]) == true_regnum (operands[1])"
   [(set (match_dup 4) (const_int 0))]
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
-	(zero_extend:DI (match_operand:SI 1 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
   "!TARGET_64BIT && reload_completed
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))"
   [(set (match_dup 3) (match_dup 1))
    (set (match_dup 4) (const_int 0))]
@@ -3453,112 +3447,100 @@ 
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
 
-(define_expand "zero_extendhisi2"
+(define_expand "zero_extend<mode>si2"
   [(set (match_operand:SI 0 "register_operand" "")
-	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+	(zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand" "")))]
   ""
 {
   if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
     {
-      operands[1] = force_reg (HImode, operands[1]);
-      emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1]));
       DONE;
     }
 })
 
-(define_insn_and_split "zero_extendhisi2_and"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
+(define_insn_and_split "zero_extend<mode>si2_and"
+  [(set (match_operand:SI 0 "register_operand" "=r,?&<r>")
+	(zero_extend:SI
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
-  ""
+{
+  if (true_regnum (operands[0]) != true_regnum (operands[1]))
+    {
+      ix86_expand_clear (operands[0]);
+
+      gcc_assert (!TARGET_PARTIAL_REG_STALL);
+      emit_insn (gen_movstrict<mode>
+		  (gen_lowpart (<MODE>mode, operands[0]), operands[1]));
+      DONE;
+    }
+
+  operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode));
+}
   [(set_attr "type" "alu1")
    (set_attr "mode" "SI")])
 
-(define_insn "*zero_extendhisi2_movzwl"
+(define_insn "*zero_extend<mode>si2"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
-  "!TARGET_ZERO_EXTEND_WITH_AND
-   || optimize_function_for_size_p (cfun)"
-  "movz{wl|x}\t{%1, %0|%0, %1}"
+	(zero_extend:SI
+	  (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))]
+  "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
+  "movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
 
-(define_expand "zero_extendqi<mode>2"
-  [(parallel
-    [(set (match_operand:SWI24 0 "register_operand" "")
-	  (zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
-     (clobber (reg:CC FLAGS_REG))])])
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]
+  ""
+{
+  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+    {
+      operands[1] = force_reg (QImode, operands[1]);
+      emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1]));
+      DONE;
+    }
+})
 
-(define_insn "*zero_extendqi<mode>2_and"
-  [(set (match_operand:SWI24 0 "register_operand" "=r,?&q")
-	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+(define_insn_and_split "zero_extendqihi2_and"
+  [(set (match_operand:HI 0 "register_operand" "=r,?&q")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
   "#"
-  [(set_attr "type" "alu1")
-   (set_attr "mode" "<MODE>")])
-
-;; When source and destination does not overlap, clear destination
-;; first and then do the movb
-(define_split
-  [(set (match_operand:SWI24 0 "register_operand" "")
-	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
-   && ANY_QI_REG_P (operands[0])
-   && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
-   && !reg_overlap_mentioned_p (operands[0], operands[1])"
-  [(set (strict_low_part (match_dup 2)) (match_dup 1))]
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
+	      (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[2] = gen_lowpart (QImode, operands[0]);
-  ix86_expand_clear (operands[0]);
-})
+  if (true_regnum (operands[0]) != true_regnum (operands[1]))
+    {
+      ix86_expand_clear (operands[0]);
 
-(define_insn "*zero_extendqi<mode>2_movzbl_and"
-  [(set (match_operand:SWI24 0 "register_operand" "=r,r")
-	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
-  "#"
-  [(set_attr "type" "imovx,alu1")
-   (set_attr "mode" "<MODE>")])
+      gcc_assert (!TARGET_PARTIAL_REG_STALL);
+      emit_insn (gen_movstrictqi
+		  (gen_lowpart (QImode, operands[0]), operands[1]));
+      DONE;
+    }
 
-;; For the movzbl case strip only the clobber
-(define_split
-  [(set (match_operand:SWI24 0 "register_operand" "")
-	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
-   && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
-  [(set (match_dup 0)
-	(zero_extend:SWI24 (match_dup 1)))])
+  operands[0] = gen_lowpart (SImode, operands[0]);
+}
+  [(set_attr "type" "alu1")
+   (set_attr "mode" "SI")])
 
 ; zero extend to SImode to avoid partial register stalls
-(define_insn "*zero_extendqi<mode>2_movzbl"
-  [(set (match_operand:SWI24 0 "register_operand" "=r")
-	(zero_extend:SWI24 (match_operand:QI 1 "nonimmediate_operand" "qm")))]
-  "reload_completed
-   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))"
+(define_insn "*zero_extendqihi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+  "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))"
   "movz{bl|x}\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
-
-;; Rest is handled by single and.
-(define_split
-  [(set (match_operand:SWI24 0 "register_operand" "")
-	(zero_extend:SWI24 (match_operand:QI 1 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && true_regnum (operands[0]) == true_regnum (operands[1])"
-  [(parallel [(set (match_dup 0) (and:SWI24 (match_dup 0) (const_int 255)))
-	      (clobber (reg:CC FLAGS_REG))])])
 
 ;; Sign extension instructions
 
@@ -7674,29 +7656,8 @@ 
   switch (get_attr_type (insn))
     {
     case TYPE_IMOVX:
-      {
-	enum machine_mode mode;
+      return "#";
 
-	gcc_assert (CONST_INT_P (operands[2]));
-	if (INTVAL (operands[2]) == (HOST_WIDE_INT) 0xffffffff)
-	  mode = SImode;
-	else if (INTVAL (operands[2]) == 0xffff)
-	  mode = HImode;
-	else
-	  {
-	    gcc_assert (INTVAL (operands[2]) == 0xff);
-	    mode = QImode;
-	  }
-
-	operands[1] = gen_lowpart (mode, operands[1]);
-	if (mode == SImode)
-	  return "mov{l}\t{%1, %k0|%k0, %1}";
-	else if (mode == HImode)
-	  return "movz{wl|x}\t{%1, %k0|%k0, %1}";
-	else
-	  return "movz{bl|x}\t{%1, %k0|%k0, %1}";
-      }
-
     default:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (get_attr_mode (insn) == MODE_SI)
@@ -7717,7 +7678,7 @@ 
    (set_attr "mode" "SI,DI,DI,SI")])
 
 (define_insn "*andsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya")
 	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
 		(match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
    (clobber (reg:CC FLAGS_REG))]
@@ -7726,25 +7687,8 @@ 
   switch (get_attr_type (insn))
     {
     case TYPE_IMOVX:
-      {
-	enum machine_mode mode;
+      return "#";
 
-	gcc_assert (CONST_INT_P (operands[2]));
-        if (INTVAL (operands[2]) == 0xffff)
-	  mode = HImode;
-	else
-	  {
-	    gcc_assert (INTVAL (operands[2]) == 0xff);
-	    mode = QImode;
-	  }
-
-	operands[1] = gen_lowpart (mode, operands[1]);
-	if (mode == HImode)
-	  return "movz{wl|x}\t{%1, %0|%0, %1}";
-	else
-	  return "movz{bl|x}\t{%1, %0|%0, %1}";
-      }
-
     default:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
       return "and{l}\t{%2, %0|%0, %2}";
@@ -7774,7 +7718,7 @@ 
    (set_attr "mode" "SI")])
 
 (define_insn "*andhi_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya")
 	(and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
 		(match_operand:HI 2 "general_operand" "rn,rm,L")))
    (clobber (reg:CC FLAGS_REG))]
@@ -7783,13 +7727,10 @@ 
   switch (get_attr_type (insn))
     {
     case TYPE_IMOVX:
-      gcc_assert (CONST_INT_P (operands[2]));
-      gcc_assert (INTVAL (operands[2]) == 0xff);
-      return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
+      return "#";
 
     default:
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
-
       return "and{w}\t{%2, %0|%0, %2}";
     }
 }
@@ -7829,6 +7770,44 @@ 
    (set_attr "mode" "QI")])
 
 (define_split
+  [(set (match_operand:SWI248 0 "register_operand" "")
+	(and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "")
+		    (match_operand:SWI248 2 "const_int_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed
+   && true_regnum (operands[0]) != true_regnum (operands[1])"
+  [(const_int 0)]
+{
+  enum machine_mode mode;
+
+  if (INTVAL (operands[2]) == (HOST_WIDE_INT) 0xffffffff)
+    mode = SImode;
+  else if (INTVAL (operands[2]) == 0xffff)
+    mode = HImode;
+  else
+    {
+      gcc_assert (INTVAL (operands[2]) == 0xff);
+      mode = QImode;
+    }
+
+  operands[1] = gen_lowpart (mode, operands[1]);
+
+  if (mode == SImode)
+    emit_insn (gen_zero_extendsidi2 (operands[0], operands[1]));
+  else
+    {
+      rtx (*insn) (rtx, rtx);
+
+      /* Zero extend to SImode to avoid partial register stalls.  */
+      operands[0] = gen_lowpart (SImode, operands[0]);
+
+      insn = (mode == HImode) ? gen_zero_extendhisi2 : gen_zero_extendqisi2;
+      emit_insn (insn (operands[0], operands[1]));
+    }
+  DONE;
+})
+
+(define_split
   [(set (match_operand 0 "register_operand" "")
 	(and (match_dup 0)
 	     (const_int -65536)))
@@ -11175,18 +11154,17 @@ 
   ix86_expand_clear (operands[3]);
 })
 
-;; Similar, but match zero_extendhisi2_and, which adds a clobber.
+;; Similar, but match zero extend with andsi3.
 
 (define_peephole2
   [(set (reg FLAGS_REG) (match_operand 0 "" ""))
    (set (match_operand:QI 1 "register_operand" "")
 	(match_operator:QI 2 "ix86_comparison_operator"
 	  [(reg FLAGS_REG) (const_int 0)]))
-   (parallel [(set (match_operand 3 "q_regs_operand" "")
-		   (zero_extend (match_dup 1)))
+   (parallel [(set (match_operand:SI 3 "q_regs_operand" "")
+		   (and:SI (match_dup 3) (const_int 255)))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "(peep2_reg_dead_p (3, operands[1])
-    || operands_match_p (operands[1], operands[3]))
+  "REGNO (operands[1]) == REGNO (operands[3])
    && ! reg_overlap_mentioned_p (operands[3], operands[0])"
   [(set (match_dup 4) (match_dup 0))
    (set (strict_low_part (match_dup 5))