Message ID | 20120330150340.4EA5F615CC@tjsboxrox.mtv.corp.google.com |
---|---|
State | New |
Headers | show |
On 03/30/2012 11:03 AM, Teresa Johnson wrote: > +(define_insn "*movhi_imm_internal" > + [(set (match_operand:HI 0 "memory_operand" "=m") > + (match_operand:HI 1 "immediate_operand" "n"))] > + "!TARGET_LCP_STALL" > +{ > + return "mov{w}\t{%1, %0|%0, %1}"; > +} > + [(set (attr "type") (const_string "imov")) > + (set (attr "mode") (const_string "HI"))]) > + > (define_insn "*movhi_internal" > [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") > - (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] > + (match_operand:HI 1 "general_operand" "r,rn,rm,r"))] > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" For reload to work correctly, all alternatives must remain part of the same pattern. This issue should be handled with the ISA and ENABLED attributes. r~
On 03/30/2012 11:11 AM, Richard Henderson wrote: > On 03/30/2012 11:03 AM, Teresa Johnson wrote: >> +(define_insn "*movhi_imm_internal" >> + [(set (match_operand:HI 0 "memory_operand" "=m") >> + (match_operand:HI 1 "immediate_operand" "n"))] >> + "!TARGET_LCP_STALL" >> +{ >> + return "mov{w}\t{%1, %0|%0, %1}"; >> +} >> + [(set (attr "type") (const_string "imov")) >> + (set (attr "mode") (const_string "HI"))]) >> + >> (define_insn "*movhi_internal" >> [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") >> - (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] >> + (match_operand:HI 1 "general_operand" "r,rn,rm,r"))] >> "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > > For reload to work correctly, all alternatives must remain part of the same pattern. > This issue should be handled with the ISA and ENABLED attributes. I'll also ask if this should better be handled with a peephole2. While movw $1234,(%eax) might be expensive, is it so expensive that we *must* force the use of a free register? Might it be better only to split the insn in two if and only if a free register exists? That can easily be done with a peephole2 pattern... r~
Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 185920) +++ config/i386/i386.h (working copy) @@ -262,6 +262,7 @@ enum ix86_tune_indices { X86_TUNE_MOVX, X86_TUNE_PARTIAL_REG_STALL, X86_TUNE_PARTIAL_FLAG_REG_STALL, + X86_TUNE_LCP_STALL, X86_TUNE_USE_HIMODE_FIOP, X86_TUNE_USE_SIMODE_FIOP, X86_TUNE_USE_MOV0, @@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_L #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL] #define TARGET_PARTIAL_FLAG_REG_STALL \ ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL] +#define TARGET_LCP_STALL \ + ix86_tune_features[X86_TUNE_LCP_STALL] #define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP] #define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP] #define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0] Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 185920) +++ config/i386/i386.md (working copy) @@ -2262,9 +2262,19 @@ ] (const_string "SI")))]) +(define_insn "*movhi_imm_internal" + [(set (match_operand:HI 0 "memory_operand" "=m") + (match_operand:HI 1 "immediate_operand" "n"))] + "!TARGET_LCP_STALL" +{ + return "mov{w}\t{%1, %0|%0, %1}"; +} + [(set (attr "type") (const_string "imov")) + (set (attr "mode") (const_string "HI"))]) + (define_insn "*movhi_internal" [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") - (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] + (match_operand:HI 1 "general_operand" "r,rn,rm,r"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 185920) +++ config/i386/i386.c (working copy) @@ -1964,6 +1964,11 @@ static unsigned int initial_ix86_tune_features[X86 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */ m_CORE2I7 | m_GENERIC, + /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall + * on 16-bit immediate moves into memory on Core2 and Corei7, + * which may also affect AMD implementations. */ + m_CORE2I7 | m_GENERIC | m_AMD_MULTIPLE, + /* X86_TUNE_USE_HIMODE_FIOP */ m_386 | m_486 | m_K6_GEODE,