Message ID | CAAe5K+V-+-snSMNw0jGAiRkRsqfUMFV-Nd_2LnmqkzAtwP94mg@mail.gmail.com |
---|---|
State | New |
Headers | show |
On Thu, Aug 23, 2012 at 2:01 PM, Teresa Johnson <tejohnson@google.com> wrote: > This patch is for google branches only. > > This is the patch I plan to apply after reverting an earlier set of > patches ported over from trunk that addressed the same problem but > were causing some performance regressions. This is a narrower fix > I had originally prepared before finding the trunk patches. > > This addresses only the specific problem of the and*i_1 patterns preferring > to match an earlier set of patterns "r,0,Z" when the source and dest registers > are the same, thus preferring to keep the instruction as an "andw" instead > of using a zero-extending move when the constant is 0xff. We now will > prefer to convert to a zero-extending move when the constant is 0xff and > the machine has LCP stalls. > > Google ref b/6615073. > > Tested with crosstool. Ok for google-4_7? > > 2012-08-23 Teresa Johnson <tejohnson@google.com> > > * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern > first to catch an and with 0xff that would be turned into > an andw on machines with length-changing prefix stalls, > and instead convert it into a zero-extending move. > (andsi_1, andhi_1): Ditto. > * config/i386/constraints.md (La): Rename from "L". > (Lh): New constraint to match possible LCP stalling ands. > * config/i386/i386.c (ix86_binary_operator_ok): Update > for rename of "L" constraint. > > Index: config/i386/i386.md > =================================================================== > --- config/i386/i386.md (revision 188251) > +++ config/i386/i386.md (working copy) > @@ -7697,10 +7697,10 @@ > "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;") > > (define_insn "*anddi_1" > - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") > + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r") > (and:DI > - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") > - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) > + (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm") > + (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La"))) > (clobber (reg:CC FLAGS_REG))] > "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" > { > @@ -7738,8 +7738,8 @@ > return "and{q}\t{%2, %0|%0, %2}"; > } > } Why is there a need to split L into Lh and La? Removing redundant 'and' seems beneficial. > - [(set_attr "type" "alu,alu,alu,imovx") > - (set_attr "length_immediate" "*,*,*,0") > + [(set_attr "type" "imovx,alu,alu,alu,imovx") > + (set_attr "length_immediate" "0,*,*,*,0") > (set (attr "prefix_rex") > (if_then_else > (and (eq_attr "type" "imovx") > @@ -7747,12 +7747,12 @@ > (match_operand 1 "ext_QIreg_operand" ""))) > (const_string "1") > (const_string "*"))) > - (set_attr "mode" "SI,DI,DI,SI")]) > + (set_attr "mode" "SI,SI,DI,DI,SI")]) > > (define_insn "*andsi_1" > - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") > - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") > - (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) > + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r") > + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm") > + (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La"))) > (clobber (reg:CC FLAGS_REG))] > "ix86_binary_operator_ok (AND, SImode, operands)" > { > @@ -7783,7 +7783,7 @@ > return "and{l}\t{%2, %0|%0, %2}"; > } > } > - [(set_attr "type" "alu,alu,imovx") > + [(set_attr "type" "imovx,alu,alu,imovx") > (set (attr "prefix_rex") > (if_then_else > (and (eq_attr "type" "imovx") > @@ -7791,7 +7791,7 @@ > (match_operand 1 "ext_QIreg_operand" ""))) > (const_string "1") > (const_string "*"))) > - (set_attr "length_immediate" "*,*,0") > + (set_attr "length_immediate" "0,*,*,0") > (set_attr "mode" "SI")]) > > ;; See comment for addsi_1_zext why we do use nonimmediate_operand > @@ -7807,9 +7807,9 @@ > (set_attr "mode" "SI")]) > > (define_insn "*andhi_1" > - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") > - (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") > - (match_operand:HI 2 "general_operand" "rn,rm,L"))) > + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r") > + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm") > + (match_operand:HI 2 "general_operand" "Lh,rn,rm,L"))) > (clobber (reg:CC FLAGS_REG))] > "ix86_binary_operator_ok (AND, HImode, operands)" > { > @@ -7826,15 +7826,15 @@ > return "and{w}\t{%2, %0|%0, %2}"; > } > } > - [(set_attr "type" "alu,alu,imovx") > - (set_attr "length_immediate" "*,*,0") > + [(set_attr "type" "imovx,alu,alu,imovx") > + (set_attr "length_immediate" "0,*,*,0") > (set (attr "prefix_rex") > (if_then_else > (and (eq_attr "type" "imovx") > (match_operand 1 "ext_QIreg_operand" "")) > (const_string "1") > (const_string "*"))) > - (set_attr "mode" "HI,HI,SI")]) > + (set_attr "mode" "SI,HI,HI,SI")]) > > ;; %%% Potential partial reg stall on alternative 2. What to do? > (define_insn "*andqi_1" > Index: config/i386/constraints.md > =================================================================== > --- config/i386/constraints.md (revision 184516) > +++ config/i386/constraints.md (working copy) > @@ -148,13 +148,19 @@ > (and (match_code "const_int") > (match_test "IN_RANGE (ival, -128, 127)"))) > > -(define_constraint "L" > +(define_constraint "La" > "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF} > for AND as a zero-extending move." > (and (match_code "const_int") > (match_test "ival == 0xff || ival == 0xffff > || ival == (HOST_WIDE_INT) 0xffffffff"))) > > +(define_constraint "Lh" > + "@code{0xFF} for AND as a zero-extending move when there are LCP stalls." > + (and (match_code "const_int") > + (match_test "ival == 0xff") > + (match_test "TARGET_LCP_STALL"))) This is necessary but not sufficient to generate LCP. For instance, not in adddi and addsi instructions. thanks, David > + > (define_constraint "M" > "0, 1, 2, or 3 (shifts for the @code{lea} instruction)." > (and (match_code "const_int") > Index: config/i386/i386.c > =================================================================== > --- config/i386/i386.c (revision 188251) > +++ config/i386/i386.c (working copy) > @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum > && (mode == HImode > || mode == SImode > || (TARGET_64BIT && mode == DImode)) > - && satisfies_constraint_L (src2)); > + && satisfies_constraint_La (src2)); > > return true; > } > > > -- > Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
On Thu, Aug 23, 2012 at 3:33 PM, Xinliang David Li <davidxl@google.com> wrote: > On Thu, Aug 23, 2012 at 2:01 PM, Teresa Johnson <tejohnson@google.com> wrote: >> This patch is for google branches only. >> >> This is the patch I plan to apply after reverting an earlier set of >> patches ported over from trunk that addressed the same problem but >> were causing some performance regressions. This is a narrower fix >> I had originally prepared before finding the trunk patches. >> >> This addresses only the specific problem of the and*i_1 patterns preferring >> to match an earlier set of patterns "r,0,Z" when the source and dest registers >> are the same, thus preferring to keep the instruction as an "andw" instead >> of using a zero-extending move when the constant is 0xff. We now will >> prefer to convert to a zero-extending move when the constant is 0xff and >> the machine has LCP stalls. >> >> Google ref b/6615073. >> >> Tested with crosstool. Ok for google-4_7? >> >> 2012-08-23 Teresa Johnson <tejohnson@google.com> >> >> * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern >> first to catch an and with 0xff that would be turned into >> an andw on machines with length-changing prefix stalls, >> and instead convert it into a zero-extending move. >> (andsi_1, andhi_1): Ditto. >> * config/i386/constraints.md (La): Rename from "L". >> (Lh): New constraint to match possible LCP stalling ands. >> * config/i386/i386.c (ix86_binary_operator_ok): Update >> for rename of "L" constraint. >> >> Index: config/i386/i386.md >> =================================================================== >> --- config/i386/i386.md (revision 188251) >> +++ config/i386/i386.md (working copy) >> @@ -7697,10 +7697,10 @@ >> "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;") >> >> (define_insn "*anddi_1" >> - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") >> + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r") >> (and:DI >> - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") >> - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) >> + (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm") >> + (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La"))) >> (clobber (reg:CC FLAGS_REG))] >> "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" >> { >> @@ -7738,8 +7738,8 @@ >> return "and{q}\t{%2, %0|%0, %2}"; >> } >> } > > Why is there a need to split L into Lh and La? Removing redundant > 'and' seems beneficial. I first did move the L condition first, which did fix one benchmark's performance issue but I got better performance overall by splitting it (so an "and" with 0xffff or 0xffffffffff might stay an and rather than becoming a movz variant). I'm not sure why using an "and" instead of converting it to a movz in these non-LCP situations matters though. > > >> - [(set_attr "type" "alu,alu,alu,imovx") >> - (set_attr "length_immediate" "*,*,*,0") >> + [(set_attr "type" "imovx,alu,alu,alu,imovx") >> + (set_attr "length_immediate" "0,*,*,*,0") >> (set (attr "prefix_rex") >> (if_then_else >> (and (eq_attr "type" "imovx") >> @@ -7747,12 +7747,12 @@ >> (match_operand 1 "ext_QIreg_operand" ""))) >> (const_string "1") >> (const_string "*"))) >> - (set_attr "mode" "SI,DI,DI,SI")]) >> + (set_attr "mode" "SI,SI,DI,DI,SI")]) >> >> (define_insn "*andsi_1" >> - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") >> - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") >> - (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) >> + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r") >> + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm") >> + (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La"))) >> (clobber (reg:CC FLAGS_REG))] >> "ix86_binary_operator_ok (AND, SImode, operands)" >> { >> @@ -7783,7 +7783,7 @@ >> return "and{l}\t{%2, %0|%0, %2}"; >> } >> } >> - [(set_attr "type" "alu,alu,imovx") >> + [(set_attr "type" "imovx,alu,alu,imovx") >> (set (attr "prefix_rex") >> (if_then_else >> (and (eq_attr "type" "imovx") >> @@ -7791,7 +7791,7 @@ >> (match_operand 1 "ext_QIreg_operand" ""))) >> (const_string "1") >> (const_string "*"))) >> - (set_attr "length_immediate" "*,*,0") >> + (set_attr "length_immediate" "0,*,*,0") >> (set_attr "mode" "SI")]) >> >> ;; See comment for addsi_1_zext why we do use nonimmediate_operand >> @@ -7807,9 +7807,9 @@ >> (set_attr "mode" "SI")]) >> >> (define_insn "*andhi_1" >> - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") >> - (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") >> - (match_operand:HI 2 "general_operand" "rn,rm,L"))) >> + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r") >> + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm") >> + (match_operand:HI 2 "general_operand" "Lh,rn,rm,L"))) >> (clobber (reg:CC FLAGS_REG))] >> "ix86_binary_operator_ok (AND, HImode, operands)" >> { >> @@ -7826,15 +7826,15 @@ >> return "and{w}\t{%2, %0|%0, %2}"; >> } >> } >> - [(set_attr "type" "alu,alu,imovx") >> - (set_attr "length_immediate" "*,*,0") >> + [(set_attr "type" "imovx,alu,alu,imovx") >> + (set_attr "length_immediate" "0,*,*,0") >> (set (attr "prefix_rex") >> (if_then_else >> (and (eq_attr "type" "imovx") >> (match_operand 1 "ext_QIreg_operand" "")) >> (const_string "1") >> (const_string "*"))) >> - (set_attr "mode" "HI,HI,SI")]) >> + (set_attr "mode" "SI,HI,HI,SI")]) >> >> ;; %%% Potential partial reg stall on alternative 2. What to do? >> (define_insn "*andqi_1" >> Index: config/i386/constraints.md >> =================================================================== >> --- config/i386/constraints.md (revision 184516) >> +++ config/i386/constraints.md (working copy) >> @@ -148,13 +148,19 @@ >> (and (match_code "const_int") >> (match_test "IN_RANGE (ival, -128, 127)"))) >> >> -(define_constraint "L" >> +(define_constraint "La" >> "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF} >> for AND as a zero-extending move." >> (and (match_code "const_int") >> (match_test "ival == 0xff || ival == 0xffff >> || ival == (HOST_WIDE_INT) 0xffffffff"))) >> >> +(define_constraint "Lh" >> + "@code{0xFF} for AND as a zero-extending move when there are LCP stalls." >> + (and (match_code "const_int") >> + (match_test "ival == 0xff") >> + (match_test "TARGET_LCP_STALL"))) > > > This is necessary but not sufficient to generate LCP. For instance, > not in adddi and addsi instructions. I saw cases where andsi or anddi with 0xff was converted to an andw in the end (possibly after some combining?). Teresa > > > thanks, > > David > >> + >> (define_constraint "M" >> "0, 1, 2, or 3 (shifts for the @code{lea} instruction)." >> (and (match_code "const_int") >> Index: config/i386/i386.c >> =================================================================== >> --- config/i386/i386.c (revision 188251) >> +++ config/i386/i386.c (working copy) >> @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum >> && (mode == HImode >> || mode == SImode >> || (TARGET_64BIT && mode == DImode)) >> - && satisfies_constraint_L (src2)); >> + && satisfies_constraint_La (src2)); >> >> return true; >> } >> >> >> -- >> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
Ok for now but some follow up may be needed. thanks, David On Thu, Aug 23, 2012 at 4:21 PM, Teresa Johnson <tejohnson@google.com> wrote: > On Thu, Aug 23, 2012 at 3:33 PM, Xinliang David Li <davidxl@google.com> wrote: >> On Thu, Aug 23, 2012 at 2:01 PM, Teresa Johnson <tejohnson@google.com> wrote: >>> This patch is for google branches only. >>> >>> This is the patch I plan to apply after reverting an earlier set of >>> patches ported over from trunk that addressed the same problem but >>> were causing some performance regressions. This is a narrower fix >>> I had originally prepared before finding the trunk patches. >>> >>> This addresses only the specific problem of the and*i_1 patterns preferring >>> to match an earlier set of patterns "r,0,Z" when the source and dest registers >>> are the same, thus preferring to keep the instruction as an "andw" instead >>> of using a zero-extending move when the constant is 0xff. We now will >>> prefer to convert to a zero-extending move when the constant is 0xff and >>> the machine has LCP stalls. >>> >>> Google ref b/6615073. >>> >>> Tested with crosstool. Ok for google-4_7? >>> >>> 2012-08-23 Teresa Johnson <tejohnson@google.com> >>> >>> * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern >>> first to catch an and with 0xff that would be turned into >>> an andw on machines with length-changing prefix stalls, >>> and instead convert it into a zero-extending move. >>> (andsi_1, andhi_1): Ditto. >>> * config/i386/constraints.md (La): Rename from "L". >>> (Lh): New constraint to match possible LCP stalling ands. >>> * config/i386/i386.c (ix86_binary_operator_ok): Update >>> for rename of "L" constraint. >>> >>> Index: config/i386/i386.md >>> =================================================================== >>> --- config/i386/i386.md (revision 188251) >>> +++ config/i386/i386.md (working copy) >>> @@ -7697,10 +7697,10 @@ >>> "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;") >>> >>> (define_insn "*anddi_1" >>> - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") >>> + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r") >>> (and:DI >>> - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") >>> - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) >>> + (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm") >>> + (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La"))) >>> (clobber (reg:CC FLAGS_REG))] >>> "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" >>> { >>> @@ -7738,8 +7738,8 @@ >>> return "and{q}\t{%2, %0|%0, %2}"; >>> } >>> } >> >> Why is there a need to split L into Lh and La? Removing redundant >> 'and' seems beneficial. > > I first did move the L condition first, which did fix one benchmark's > performance issue but I got better performance overall by splitting it > (so an "and" with 0xffff or 0xffffffffff might stay an and rather than > becoming a movz variant). I'm not sure why using an "and" instead of > converting it to a movz in these non-LCP situations matters though. > >> >> >>> - [(set_attr "type" "alu,alu,alu,imovx") >>> - (set_attr "length_immediate" "*,*,*,0") >>> + [(set_attr "type" "imovx,alu,alu,alu,imovx") >>> + (set_attr "length_immediate" "0,*,*,*,0") >>> (set (attr "prefix_rex") >>> (if_then_else >>> (and (eq_attr "type" "imovx") >>> @@ -7747,12 +7747,12 @@ >>> (match_operand 1 "ext_QIreg_operand" ""))) >>> (const_string "1") >>> (const_string "*"))) >>> - (set_attr "mode" "SI,DI,DI,SI")]) >>> + (set_attr "mode" "SI,SI,DI,DI,SI")]) >>> >>> (define_insn "*andsi_1" >>> - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") >>> - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") >>> - (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) >>> + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r") >>> + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm") >>> + (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La"))) >>> (clobber (reg:CC FLAGS_REG))] >>> "ix86_binary_operator_ok (AND, SImode, operands)" >>> { >>> @@ -7783,7 +7783,7 @@ >>> return "and{l}\t{%2, %0|%0, %2}"; >>> } >>> } >>> - [(set_attr "type" "alu,alu,imovx") >>> + [(set_attr "type" "imovx,alu,alu,imovx") >>> (set (attr "prefix_rex") >>> (if_then_else >>> (and (eq_attr "type" "imovx") >>> @@ -7791,7 +7791,7 @@ >>> (match_operand 1 "ext_QIreg_operand" ""))) >>> (const_string "1") >>> (const_string "*"))) >>> - (set_attr "length_immediate" "*,*,0") >>> + (set_attr "length_immediate" "0,*,*,0") >>> (set_attr "mode" "SI")]) >>> >>> ;; See comment for addsi_1_zext why we do use nonimmediate_operand >>> @@ -7807,9 +7807,9 @@ >>> (set_attr "mode" "SI")]) >>> >>> (define_insn "*andhi_1" >>> - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") >>> - (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") >>> - (match_operand:HI 2 "general_operand" "rn,rm,L"))) >>> + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r") >>> + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm") >>> + (match_operand:HI 2 "general_operand" "Lh,rn,rm,L"))) >>> (clobber (reg:CC FLAGS_REG))] >>> "ix86_binary_operator_ok (AND, HImode, operands)" >>> { >>> @@ -7826,15 +7826,15 @@ >>> return "and{w}\t{%2, %0|%0, %2}"; >>> } >>> } >>> - [(set_attr "type" "alu,alu,imovx") >>> - (set_attr "length_immediate" "*,*,0") >>> + [(set_attr "type" "imovx,alu,alu,imovx") >>> + (set_attr "length_immediate" "0,*,*,0") >>> (set (attr "prefix_rex") >>> (if_then_else >>> (and (eq_attr "type" "imovx") >>> (match_operand 1 "ext_QIreg_operand" "")) >>> (const_string "1") >>> (const_string "*"))) >>> - (set_attr "mode" "HI,HI,SI")]) >>> + (set_attr "mode" "SI,HI,HI,SI")]) >>> >>> ;; %%% Potential partial reg stall on alternative 2. What to do? >>> (define_insn "*andqi_1" >>> Index: config/i386/constraints.md >>> =================================================================== >>> --- config/i386/constraints.md (revision 184516) >>> +++ config/i386/constraints.md (working copy) >>> @@ -148,13 +148,19 @@ >>> (and (match_code "const_int") >>> (match_test "IN_RANGE (ival, -128, 127)"))) >>> >>> -(define_constraint "L" >>> +(define_constraint "La" >>> "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF} >>> for AND as a zero-extending move." >>> (and (match_code "const_int") >>> (match_test "ival == 0xff || ival == 0xffff >>> || ival == (HOST_WIDE_INT) 0xffffffff"))) >>> >>> +(define_constraint "Lh" >>> + "@code{0xFF} for AND as a zero-extending move when there are LCP stalls." >>> + (and (match_code "const_int") >>> + (match_test "ival == 0xff") >>> + (match_test "TARGET_LCP_STALL"))) >> >> >> This is necessary but not sufficient to generate LCP. For instance, >> not in adddi and addsi instructions. > > I saw cases where andsi or anddi with 0xff was converted to an andw in > the end (possibly after some combining?). > > Teresa > >> >> >> thanks, >> >> David >> >>> + >>> (define_constraint "M" >>> "0, 1, 2, or 3 (shifts for the @code{lea} instruction)." >>> (and (match_code "const_int") >>> Index: config/i386/i386.c >>> =================================================================== >>> --- config/i386/i386.c (revision 188251) >>> +++ config/i386/i386.c (working copy) >>> @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum >>> && (mode == HImode >>> || mode == SImode >>> || (TARGET_64BIT && mode == DImode)) >>> - && satisfies_constraint_L (src2)); >>> + && satisfies_constraint_La (src2)); >>> >>> return true; >>> } >>> >>> >>> -- >>> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413 > > > > -- > Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 188251) +++ config/i386/i386.md (working copy) @@ -7697,10 +7697,10 @@ "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;") (define_insn "*anddi_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r") (and:DI - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm") + (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" { @@ -7738,8 +7738,8 @@ return "and{q}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,alu,imovx") - (set_attr "length_immediate" "*,*,*,0") + [(set_attr "type" "imovx,alu,alu,alu,imovx") + (set_attr "length_immediate" "0,*,*,*,0") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -7747,12 +7747,12 @@ (match_operand 1 "ext_QIreg_operand" ""))) (const_string "1") (const_string "*"))) - (set_attr "mode" "SI,DI,DI,SI")]) + (set_attr "mode" "SI,SI,DI,DI,SI")]) (define_insn "*andsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm") + (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, SImode, operands)" { @@ -7783,7 +7783,7 @@ return "and{l}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,imovx") + [(set_attr "type" "imovx,alu,alu,imovx") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -7791,7 +7791,7 @@ (match_operand 1 "ext_QIreg_operand" ""))) (const_string "1") (const_string "*"))) - (set_attr "length_immediate" "*,*,0") + (set_attr "length_immediate" "0,*,*,0") (set_attr "mode" "SI")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand @@ -7807,9 +7807,9 @@ (set_attr "mode" "SI")]) (define_insn "*andhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") - (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:HI 2 "general_operand" "rn,rm,L"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm") + (match_operand:HI 2 "general_operand" "Lh,rn,rm,L"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, HImode, operands)" { @@ -7826,15 +7826,15 @@ return "and{w}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,imovx") - (set_attr "length_immediate" "*,*,0") + [(set_attr "type" "imovx,alu,alu,imovx") + (set_attr "length_immediate" "0,*,*,0") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") (match_operand 1 "ext_QIreg_operand" "")) (const_string "1") (const_string "*"))) - (set_attr "mode" "HI,HI,SI")]) + (set_attr "mode" "SI,HI,HI,SI")]) ;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*andqi_1" Index: config/i386/constraints.md =================================================================== --- config/i386/constraints.md (revision 184516) +++ config/i386/constraints.md (working copy) @@ -148,13 +148,19 @@ (and (match_code "const_int") (match_test "IN_RANGE (ival, -128, 127)"))) -(define_constraint "L" +(define_constraint "La" "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF} for AND as a zero-extending move." (and (match_code "const_int") (match_test "ival == 0xff || ival == 0xffff || ival == (HOST_WIDE_INT) 0xffffffff"))) +(define_constraint "Lh" + "@code{0xFF} for AND as a zero-extending move when there are LCP stalls." + (and (match_code "const_int") + (match_test "ival == 0xff") + (match_test "TARGET_LCP_STALL"))) + (define_constraint "M" "0, 1, 2, or 3 (shifts for the @code{lea} instruction)." (and (match_code "const_int") Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 188251) +++ config/i386/i386.c (working copy) @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum && (mode == HImode || mode == SImode || (TARGET_64BIT && mode == DImode)) - && satisfies_constraint_L (src2)); + && satisfies_constraint_La (src2)); return true; }