Patchwork [i386] Avoid LCP stalls (issue5975045)

login
register
mail settings
Submitter Teresa Johnson
Date March 30, 2012, 3:03 p.m.
Message ID <20120330150340.4EA5F615CC@tjsboxrox.mtv.corp.google.com>
Download mbox | patch
Permalink /patch/149667/
State New
Headers show

Comments

Teresa Johnson - March 30, 2012, 3:03 p.m.
Minor update to patch to remove unnecessary check in new movhi_imm_internal
define_insn.

Retested successfully.

Teresa

2012-03-29   Teresa Johnson  <tejohnson@google.com>

        * config/i386/i386.h (ix86_tune_indices): Add
        X86_TUNE_LCP_STALL.
        * config/i386/i386.md (movhi_internal): Split to
        movhi_internal and movhi_imm_internal.
        * config/i386/i386.c (initial_ix86_tune_features): Initialize
        X86_TUNE_LCP_STALL entry.



--
This patch is available for review at http://codereview.appspot.com/5975045
Richard Henderson - March 30, 2012, 3:11 p.m.
On 03/30/2012 11:03 AM, Teresa Johnson wrote:
> +(define_insn "*movhi_imm_internal"
> +  [(set (match_operand:HI 0 "memory_operand" "=m")
> +        (match_operand:HI 1 "immediate_operand" "n"))]
> +  "!TARGET_LCP_STALL"
> +{
> +  return "mov{w}\t{%1, %0|%0, %1}";
> +}
> +  [(set (attr "type") (const_string "imov"))
> +   (set (attr "mode") (const_string "HI"))])
> +
>  (define_insn "*movhi_internal"
>    [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
> -	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
> +	(match_operand:HI 1 "general_operand" "r,rn,rm,r"))]
>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))"

For reload to work correctly, all alternatives must remain part of the same pattern.
This issue should be handled with the ISA and ENABLED attributes.


r~
Richard Henderson - March 30, 2012, 3:19 p.m.
On 03/30/2012 11:11 AM, Richard Henderson wrote:
> On 03/30/2012 11:03 AM, Teresa Johnson wrote:
>> +(define_insn "*movhi_imm_internal"
>> +  [(set (match_operand:HI 0 "memory_operand" "=m")
>> +        (match_operand:HI 1 "immediate_operand" "n"))]
>> +  "!TARGET_LCP_STALL"
>> +{
>> +  return "mov{w}\t{%1, %0|%0, %1}";
>> +}
>> +  [(set (attr "type") (const_string "imov"))
>> +   (set (attr "mode") (const_string "HI"))])
>> +
>>  (define_insn "*movhi_internal"
>>    [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
>> -	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
>> +	(match_operand:HI 1 "general_operand" "r,rn,rm,r"))]
>>    "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
> 
> For reload to work correctly, all alternatives must remain part of the same pattern.
> This issue should be handled with the ISA and ENABLED attributes.

I'll also ask if this should better be handled with a peephole2.

While movw $1234,(%eax) might be expensive, is it so expensive that we
*must* force the use of a free register?  Might it be better only to
split the insn in two if and only if a free register exists?

That can easily be done with a peephole2 pattern...


r~

Patch

Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 185920)
+++ config/i386/i386.h	(working copy)
@@ -262,6 +262,7 @@  enum ix86_tune_indices {
   X86_TUNE_MOVX,
   X86_TUNE_PARTIAL_REG_STALL,
   X86_TUNE_PARTIAL_FLAG_REG_STALL,
+  X86_TUNE_LCP_STALL,
   X86_TUNE_USE_HIMODE_FIOP,
   X86_TUNE_USE_SIMODE_FIOP,
   X86_TUNE_USE_MOV0,
@@ -340,6 +341,8 @@  extern unsigned char ix86_tune_features[X86_TUNE_L
 #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
 #define TARGET_PARTIAL_FLAG_REG_STALL \
 	ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_LCP_STALL \
+	ix86_tune_features[X86_TUNE_LCP_STALL]
 #define TARGET_USE_HIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
 #define TARGET_USE_SIMODE_FIOP	ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
 #define TARGET_USE_MOV0		ix86_tune_features[X86_TUNE_USE_MOV0]
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 185920)
+++ config/i386/i386.md	(working copy)
@@ -2262,9 +2262,19 @@ 
 	   ]
 	   (const_string "SI")))])
 
+(define_insn "*movhi_imm_internal"
+  [(set (match_operand:HI 0 "memory_operand" "=m")
+        (match_operand:HI 1 "immediate_operand" "n"))]
+  "!TARGET_LCP_STALL"
+{
+  return "mov{w}\t{%1, %0|%0, %1}";
+}
+  [(set (attr "type") (const_string "imov"))
+   (set (attr "mode") (const_string "HI"))])
+
 (define_insn "*movhi_internal"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
-	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
+	(match_operand:HI 1 "general_operand" "r,rn,rm,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 185920)
+++ config/i386/i386.c	(working copy)
@@ -1964,6 +1964,11 @@  static unsigned int initial_ix86_tune_features[X86
   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
   m_CORE2I7 | m_GENERIC,
 
+  /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
+   * on 16-bit immediate moves into memory on Core2 and Corei7,
+   * which may also affect AMD implementations.  */
+  m_CORE2I7 | m_GENERIC | m_AMD_MULTIPLE,
+
   /* X86_TUNE_USE_HIMODE_FIOP */
   m_386 | m_486 | m_K6_GEODE,