Patchwork [i386] : Avodi movaps size optimizations for TARGET_AVX

login
register
mail settings
Submitter Uros Bizjak
Date May 10, 2012, 11:25 p.m.
Message ID <CAFULd4bh_pmM7no6unjPOU1gTG2wiK8isjY5Shc59U_rswySWA@mail.gmail.com>
Download mbox | patch
Permalink /patch/158410/
State New
Headers show

Comments

Uros Bizjak - May 10, 2012, 11:25 p.m.
Hello!

There is no point to emit vmovaps instead of vmovapd or vmovdqa, these
instructions have same sizes. Attached patch fixes this oversight for
TARGET_AVX.

2012-05-11  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (*movti_internal_rex64): Avoid MOVAPS size
	optimization for TARGET_AVX.
	(*movti_internal_sse): Ditto.
	(*movdi_internal_rex64): Handle TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
	(*movdi_internal): Ditto.
	(*movsi_internal): Ditto.
	(*movtf_internal): Avoid MOVAPS size optimization for TARGET_AVX.
	(*movdf_internal_rex64): Ditto.
	(*movfd_internal): Ditto.
	(*movsf_internal): Ditto.
	* config/i386/sse.md (mov<mode>): Handle TARGET_SSE_LOAD0_BY_PXOR.

Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 187372)
+++ i386.md	(working copy)
@@ -1890,12 +1890,15 @@ 
    (set (attr "mode")
    	(cond [(eq_attr "alternative" "0,1")
 		 (const_string "DI")
-	       (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-		    (match_test "optimize_function_for_size_p (cfun)"))
+	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "V4SF")
 	       (and (eq_attr "alternative" "4")
 		    (match_test "TARGET_SSE_TYPELESS_STORES"))
 		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "TI")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
 	       ]
 	       (const_string "TI")))])
 
@@ -1943,13 +1946,15 @@ 
   [(set_attr "type" "sselog1,ssemov,ssemov")
    (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
-	(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-		    (match_test "optimize_function_for_size_p (cfun)"))
+	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "V4SF")
 	       (and (eq_attr "alternative" "2")
 		    (match_test "TARGET_SSE_TYPELESS_STORES"))
 		 (const_string "V4SF")
-	       (not (match_test "TARGET_SSE2"))
+	       (match_test "TARGET_AVX")
+		 (const_string "TI")
+	       (ior (not (match_test "TARGET_SSE2"))
+		    (match_test "optimize_function_for_size_p (cfun)"))
 		 (const_string "V4SF")
 	      ]
 	      (const_string "TI")))])
@@ -1970,8 +1975,11 @@ 
 	return "movdq2q\t{%1, %0|%0, %1}";
 
     case TYPE_SSEMOV:
-      if (get_attr_mode (insn) == MODE_TI)
+      if (get_attr_mode (insn) == MODE_V4SF)
+	return "%vmovaps\t{%1, %0|%0, %1}";
+      else if (get_attr_mode (insn) == MODE_TI)
 	return "%vmovdqa\t{%1, %0|%0, %1}";
+
       /* Handle broken assemblers that require movd instead of movq.  */
       if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
 	return "%vmovd\t{%1, %0|%0, %1}";
@@ -2048,7 +2056,20 @@ 
      (if_then_else (eq_attr "alternative" "10,11,12,13,14,15")
        (const_string "maybe_vex")
        (const_string "orig")))
-   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,TI,DI,TI,DI,DI,DI,DI,DI")])
+   (set (attr "mode")
+   	(cond [(eq_attr "alternative" "0,4")
+		  (const_string "SI")
+	       (eq_attr "alternative" "10,12")
+		  (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+			   (const_string "V4SF")
+			 (match_test "TARGET_AVX")
+			   (const_string "TI")
+			 (match_test "optimize_function_for_size_p (cfun)")
+			   (const_string "V4SF")
+			]
+			(const_string "TI"))
+	      ]
+	      (const_string "DI")))])
 
 ;; Reload patterns to support multi-word load/store
 ;; with non-offsetable address.
@@ -2142,7 +2163,7 @@ 
 	case MODE_DI:
 	   return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_V4SF:
-	  return "movaps\t{%1, %0|%0, %1}";
+	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_V2SF:
 	  return "movlps\t{%1, %0|%0, %1}";
 	default:
@@ -2189,7 +2210,22 @@ 
      (if_then_else (eq_attr "alternative" "5,6,7,8")
        (const_string "maybe_vex")
        (const_string "orig")))
-   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF,DI,DI")])
+   (set (attr "mode")
+   	(cond [(eq_attr "alternative" "9,11")
+		  (const_string "V4SF")
+	       (eq_attr "alternative" "10,12")
+		  (const_string "V2SF")
+	       (eq_attr "alternative" "5,7")
+		  (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+			   (const_string "V4SF")
+			 (match_test "TARGET_AVX")
+			   (const_string "TI")
+			 (match_test "optimize_function_for_size_p (cfun)")
+			   (const_string "V4SF")
+			]
+			(const_string "TI"))
+	      ]
+	      (const_string "DI")))])
 
 (define_split
   [(set (match_operand:DI 0 "nonimmediate_operand")
@@ -2271,10 +2307,15 @@ 
      (cond [(eq_attr "alternative" "2,3")
 	      (const_string "DI")
 	    (eq_attr "alternative" "6,7")
-	      (if_then_else
-	        (not (match_test "TARGET_SSE2"))
-	        (const_string "V4SF")
-	        (const_string "TI"))
+	      (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		       (const_string "V4SF")
+		     (match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (ior (not (match_test "TARGET_SSE2"))
+		     	  (match_test "optimize_function_for_size_p (cfun)"))
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
 	    (and (eq_attr "alternative" "8,9,10,11")
 	         (not (match_test "TARGET_SSE2")))
 	      (const_string "SF")
@@ -2881,12 +2922,15 @@ 
    (set (attr "mode")
         (cond [(eq_attr "alternative" "3,4")
 		 (const_string "DI")
-	       (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-		    (match_test "optimize_function_for_size_p (cfun)"))
+	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "V4SF")
 	       (and (eq_attr "alternative" "2")
 		    (match_test "TARGET_SSE_TYPELESS_STORES"))
 		 (const_string "V4SF")
+	       (match_test "TARGET_AVX")
+		 (const_string "TI")
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "V4SF")
 	       ]
 	       (const_string "TI")))])
 
@@ -3030,9 +3074,11 @@ 
 	       (eq_attr "alternative" "3,4,5,6,11,12")
 		 (const_string "DI")
 
-	       /* xorps is one byte shorter.  */
+	       /* xorps is one byte shorter for !TARGET_AVX.  */
 	       (eq_attr "alternative" "7")
-		 (cond [(match_test "optimize_function_for_size_p (cfun)")
+		 (cond [(match_test "TARGET_AVX")
+			  (const_string "V2DF")
+			(match_test "optimize_function_for_size_p (cfun)")
 			  (const_string "V4SF")
 			(match_test "TARGET_SSE_LOAD0_BY_PXOR")
 			  (const_string "TI")
@@ -3043,14 +3089,16 @@ 
 		  whole SSE registers use APD move to break dependency
 		  chains, otherwise use short move to avoid extra work.
 
-		  movaps encodes one byte shorter.  */
+		  movaps encodes one byte shorter for !TARGET_AVX.  */
 	       (eq_attr "alternative" "8")
-		 (cond
-		   [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-			 (match_test "optimize_function_for_size_p (cfun)"))
-		      (const_string "V4SF")
-		    (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-		      (const_string "V2DF")
+		 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+			  (const_string "V4SF")
+			(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			  (const_string "V2DF")
+			(match_test "TARGET_AVX")
+			  (const_string "DF")
+			(match_test "optimize_function_for_size_p (cfun)")
+			  (const_string "V4SF")
 		   ]
 		   (const_string "DF"))
 	       /* For architectures resolving dependencies on register
@@ -3165,9 +3213,11 @@ 
 		   (const_string "V4SF")
 		   (const_string "V2SF"))
 
-	       /* xorps is one byte shorter.  */
+	       /* xorps is one byte shorter for !TARGET_AVX.  */
 	       (eq_attr "alternative" "5,9")
-		 (cond [(match_test "optimize_function_for_size_p (cfun)")
+		 (cond [(match_test "TARGET_AVX")
+			  (const_string "V2DF")
+			(match_test "optimize_function_for_size_p (cfun)")
 			  (const_string "V4SF")
 			(match_test "TARGET_SSE_LOAD0_BY_PXOR")
 			  (const_string "TI")
@@ -3178,16 +3228,19 @@ 
 		  whole SSE registers use APD move to break dependency
 		  chains, otherwise use short move to avoid extra work.
 
-		  movaps encodes one byte shorter.  */
+		  movaps encodes one byte shorter for !TARGET_AVX.  */
 	       (eq_attr "alternative" "6,10")
-		 (cond
-		   [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
-			 (match_test "optimize_function_for_size_p (cfun)"))
-		      (const_string "V4SF")
-		    (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-		      (const_string "V2DF")
+		 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+			  (const_string "V4SF")
+			(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			  (const_string "V2DF")
+			(match_test "TARGET_AVX")
+			  (const_string "DF")
+			(match_test "optimize_function_for_size_p (cfun)")
+			  (const_string "V4SF")
 		   ]
 		   (const_string "DF"))
+
 	       /* For architectures resolving dependencies on register
 		  parts we may avoid extra work to zero out upper part
 		  of register.  */
@@ -3277,12 +3330,16 @@ 
         (cond [(eq_attr "alternative" "3,4,9,10")
 		 (const_string "SI")
 	       (eq_attr "alternative" "5")
-		 (if_then_else
-		   (and (and (match_test "TARGET_SSE_LOAD0_BY_PXOR")
-			     (match_test "TARGET_SSE2"))
-			(not (match_test "optimize_function_for_size_p (cfun)")))
-		   (const_string "TI")
-		   (const_string "V4SF"))
+		 (cond [(match_test "TARGET_AVX")
+			  (const_string "V4SF")
+ 			(ior (not (match_test "TARGET_SSE2"))
+ 			     (match_test "optimize_function_for_size_p (cfun)"))
+ 		 	  (const_string "V4SF")
+			(match_test "TARGET_SSE_LOAD0_BY_PXOR")
+			  (const_string "TI")
+		       ]
+		       (const_string "V4SF"))
+
 	       /* For architectures resolving dependencies on
 		  whole SSE registers use APS move to break dependency
 		  chains, otherwise use short move to avoid extra work.
Index: sse.md
===================================================================
--- sse.md	(revision 187372)
+++ sse.md	(working copy)
@@ -491,6 +491,9 @@ 
 	       (ior (not (match_test "TARGET_SSE2"))
 		    (match_test "optimize_function_for_size_p (cfun)"))
 		 (const_string "V4SF")
+	       (and (eq_attr "alternative" "0")
+		    (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
+		 (const_string "TI")
 	      ]
 	      (const_string "<sseinsnmode>")))])