Patchwork [google/4_7] New fix to avoid LCP stalling andw with 0xff

login
register
mail settings
Submitter Teresa Johnson
Date Aug. 23, 2012, 9:01 p.m.
Message ID <CAAe5K+V-+-snSMNw0jGAiRkRsqfUMFV-Nd_2LnmqkzAtwP94mg@mail.gmail.com>
Download mbox | patch
Permalink /patch/179721/
State New
Headers show

Comments

Teresa Johnson - Aug. 23, 2012, 9:01 p.m.
This patch is for google branches only.

This is the patch I plan to apply after reverting an earlier set of
patches ported over from trunk that addressed the same problem but
were causing some performance regressions. This is a narrower fix
I had originally prepared before finding the trunk patches.

This addresses only the specific problem of the and*i_1 patterns preferring
to match an earlier set of patterns "r,0,Z" when the source and dest registers
are the same, thus preferring to keep the instruction as an "andw" instead
of using a zero-extending move when the constant is 0xff. We now will
prefer to convert to a zero-extending move when the constant is 0xff and
the machine has LCP stalls.

Google ref b/6615073.

Tested with crosstool. Ok for google-4_7?

2012-08-23  Teresa Johnson  <tejohnson@google.com>

        * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern
        first to catch an and with 0xff that would be turned into
        an andw on machines with length-changing prefix stalls,
        and instead convert it into a zero-extending move.
        (andsi_1, andhi_1): Ditto.
        * config/i386/constraints.md (La): Rename from "L".
        (Lh): New constraint to match possible LCP stalling ands.
        * config/i386/i386.c (ix86_binary_operator_ok): Update
        for rename of "L" constraint.
Xinliang David Li - Aug. 23, 2012, 10:33 p.m.
On Thu, Aug 23, 2012 at 2:01 PM, Teresa Johnson <tejohnson@google.com> wrote:
> This patch is for google branches only.
>
> This is the patch I plan to apply after reverting an earlier set of
> patches ported over from trunk that addressed the same problem but
> were causing some performance regressions. This is a narrower fix
> I had originally prepared before finding the trunk patches.
>
> This addresses only the specific problem of the and*i_1 patterns preferring
> to match an earlier set of patterns "r,0,Z" when the source and dest registers
> are the same, thus preferring to keep the instruction as an "andw" instead
> of using a zero-extending move when the constant is 0xff. We now will
> prefer to convert to a zero-extending move when the constant is 0xff and
> the machine has LCP stalls.
>
> Google ref b/6615073.
>
> Tested with crosstool. Ok for google-4_7?
>
> 2012-08-23  Teresa Johnson  <tejohnson@google.com>
>
>         * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern
>         first to catch an and with 0xff that would be turned into
>         an andw on machines with length-changing prefix stalls,
>         and instead convert it into a zero-extending move.
>         (andsi_1, andhi_1): Ditto.
>         * config/i386/constraints.md (La): Rename from "L".
>         (Lh): New constraint to match possible LCP stalling ands.
>         * config/i386/i386.c (ix86_binary_operator_ok): Update
>         for rename of "L" constraint.
>
> Index: config/i386/i386.md
> ===================================================================
> --- config/i386/i386.md (revision 188251)
> +++ config/i386/i386.md (working copy)
> @@ -7697,10 +7697,10 @@
>    "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")
>
>  (define_insn "*anddi_1"
> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r")
>         (and:DI
> -        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
> -        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
> +        (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm")
> +        (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La")))
>     (clobber (reg:CC FLAGS_REG))]
>    "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
>  {
> @@ -7738,8 +7738,8 @@
>         return "and{q}\t{%2, %0|%0, %2}";
>      }
>  }

Why is there a need to split L into Lh and La? Removing redundant
'and' seems beneficial.


> -  [(set_attr "type" "alu,alu,alu,imovx")
> -   (set_attr "length_immediate" "*,*,*,0")
> +  [(set_attr "type" "imovx,alu,alu,alu,imovx")
> +   (set_attr "length_immediate" "0,*,*,*,0")
>     (set (attr "prefix_rex")
>       (if_then_else
>         (and (eq_attr "type" "imovx")
> @@ -7747,12 +7747,12 @@
>                  (match_operand 1 "ext_QIreg_operand" "")))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "mode" "SI,DI,DI,SI")])
> +   (set_attr "mode" "SI,SI,DI,DI,SI")])
>
>  (define_insn "*andsi_1"
> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
> -       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
> -               (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
> +       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm")
> +               (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La")))
>     (clobber (reg:CC FLAGS_REG))]
>    "ix86_binary_operator_ok (AND, SImode, operands)"
>  {
> @@ -7783,7 +7783,7 @@
>        return "and{l}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "type" "alu,alu,imovx")
> +  [(set_attr "type" "imovx,alu,alu,imovx")
>     (set (attr "prefix_rex")
>       (if_then_else
>         (and (eq_attr "type" "imovx")
> @@ -7791,7 +7791,7 @@
>                  (match_operand 1 "ext_QIreg_operand" "")))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "length_immediate" "*,*,0")
> +   (set_attr "length_immediate" "0,*,*,0")
>     (set_attr "mode" "SI")])
>
>  ;; See comment for addsi_1_zext why we do use nonimmediate_operand
> @@ -7807,9 +7807,9 @@
>     (set_attr "mode" "SI")])
>
>  (define_insn "*andhi_1"
> -  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
> -       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
> -               (match_operand:HI 2 "general_operand" "rn,rm,L")))
> +  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
> +       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm")
> +               (match_operand:HI 2 "general_operand" "Lh,rn,rm,L")))
>     (clobber (reg:CC FLAGS_REG))]
>    "ix86_binary_operator_ok (AND, HImode, operands)"
>  {
> @@ -7826,15 +7826,15 @@
>        return "and{w}\t{%2, %0|%0, %2}";
>      }
>  }
> -  [(set_attr "type" "alu,alu,imovx")
> -   (set_attr "length_immediate" "*,*,0")
> +  [(set_attr "type" "imovx,alu,alu,imovx")
> +   (set_attr "length_immediate" "0,*,*,0")
>     (set (attr "prefix_rex")
>       (if_then_else
>         (and (eq_attr "type" "imovx")
>             (match_operand 1 "ext_QIreg_operand" ""))
>         (const_string "1")
>         (const_string "*")))
> -   (set_attr "mode" "HI,HI,SI")])
> +   (set_attr "mode" "SI,HI,HI,SI")])
>
>  ;; %%% Potential partial reg stall on alternative 2.  What to do?
>  (define_insn "*andqi_1"
> Index: config/i386/constraints.md
> ===================================================================
> --- config/i386/constraints.md  (revision 184516)
> +++ config/i386/constraints.md  (working copy)
> @@ -148,13 +148,19 @@
>    (and (match_code "const_int")
>         (match_test "IN_RANGE (ival, -128, 127)")))
>
> -(define_constraint "L"
> +(define_constraint "La"
>    "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF}
>     for AND as a zero-extending move."
>    (and (match_code "const_int")
>         (match_test "ival == 0xff || ival == 0xffff
>                     || ival == (HOST_WIDE_INT) 0xffffffff")))
>
> +(define_constraint "Lh"
> +  "@code{0xFF} for AND as a zero-extending move when there are LCP stalls."
> +  (and (match_code "const_int")
> +       (match_test "ival == 0xff")
> +       (match_test "TARGET_LCP_STALL")))


This is necessary but not sufficient to generate LCP. For instance,
not in adddi and addsi instructions.


thanks,

David

> +
>  (define_constraint "M"
>    "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
>    (and (match_code "const_int")
> Index: config/i386/i386.c
> ===================================================================
> --- config/i386/i386.c  (revision 188251)
> +++ config/i386/i386.c  (working copy)
> @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum
>             && (mode == HImode
>                 || mode == SImode
>                 || (TARGET_64BIT && mode == DImode))
> -           && satisfies_constraint_L (src2));
> +           && satisfies_constraint_La (src2));
>
>    return true;
>  }
>
>
> --
> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
Teresa Johnson - Aug. 23, 2012, 11:21 p.m.
On Thu, Aug 23, 2012 at 3:33 PM, Xinliang David Li <davidxl@google.com> wrote:
> On Thu, Aug 23, 2012 at 2:01 PM, Teresa Johnson <tejohnson@google.com> wrote:
>> This patch is for google branches only.
>>
>> This is the patch I plan to apply after reverting an earlier set of
>> patches ported over from trunk that addressed the same problem but
>> were causing some performance regressions. This is a narrower fix
>> I had originally prepared before finding the trunk patches.
>>
>> This addresses only the specific problem of the and*i_1 patterns preferring
>> to match an earlier set of patterns "r,0,Z" when the source and dest registers
>> are the same, thus preferring to keep the instruction as an "andw" instead
>> of using a zero-extending move when the constant is 0xff. We now will
>> prefer to convert to a zero-extending move when the constant is 0xff and
>> the machine has LCP stalls.
>>
>> Google ref b/6615073.
>>
>> Tested with crosstool. Ok for google-4_7?
>>
>> 2012-08-23  Teresa Johnson  <tejohnson@google.com>
>>
>>         * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern
>>         first to catch an and with 0xff that would be turned into
>>         an andw on machines with length-changing prefix stalls,
>>         and instead convert it into a zero-extending move.
>>         (andsi_1, andhi_1): Ditto.
>>         * config/i386/constraints.md (La): Rename from "L".
>>         (Lh): New constraint to match possible LCP stalling ands.
>>         * config/i386/i386.c (ix86_binary_operator_ok): Update
>>         for rename of "L" constraint.
>>
>> Index: config/i386/i386.md
>> ===================================================================
>> --- config/i386/i386.md (revision 188251)
>> +++ config/i386/i386.md (working copy)
>> @@ -7697,10 +7697,10 @@
>>    "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")
>>
>>  (define_insn "*anddi_1"
>> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
>> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r")
>>         (and:DI
>> -        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
>> -        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
>> +        (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm")
>> +        (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La")))
>>     (clobber (reg:CC FLAGS_REG))]
>>    "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
>>  {
>> @@ -7738,8 +7738,8 @@
>>         return "and{q}\t{%2, %0|%0, %2}";
>>      }
>>  }
>
> Why is there a need to split L into Lh and La? Removing redundant
> 'and' seems beneficial.

I first did move the L condition first, which did fix one benchmark's
performance issue but I got better performance overall by splitting it
(so an "and" with 0xffff or 0xffffffffff might stay an and rather than
becoming a movz variant). I'm not sure why using an "and" instead of
converting it to a movz in these non-LCP situations matters though.

>
>
>> -  [(set_attr "type" "alu,alu,alu,imovx")
>> -   (set_attr "length_immediate" "*,*,*,0")
>> +  [(set_attr "type" "imovx,alu,alu,alu,imovx")
>> +   (set_attr "length_immediate" "0,*,*,*,0")
>>     (set (attr "prefix_rex")
>>       (if_then_else
>>         (and (eq_attr "type" "imovx")
>> @@ -7747,12 +7747,12 @@
>>                  (match_operand 1 "ext_QIreg_operand" "")))
>>         (const_string "1")
>>         (const_string "*")))
>> -   (set_attr "mode" "SI,DI,DI,SI")])
>> +   (set_attr "mode" "SI,SI,DI,DI,SI")])
>>
>>  (define_insn "*andsi_1"
>> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
>> -       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
>> -               (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
>> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
>> +       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm")
>> +               (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La")))
>>     (clobber (reg:CC FLAGS_REG))]
>>    "ix86_binary_operator_ok (AND, SImode, operands)"
>>  {
>> @@ -7783,7 +7783,7 @@
>>        return "and{l}\t{%2, %0|%0, %2}";
>>      }
>>  }
>> -  [(set_attr "type" "alu,alu,imovx")
>> +  [(set_attr "type" "imovx,alu,alu,imovx")
>>     (set (attr "prefix_rex")
>>       (if_then_else
>>         (and (eq_attr "type" "imovx")
>> @@ -7791,7 +7791,7 @@
>>                  (match_operand 1 "ext_QIreg_operand" "")))
>>         (const_string "1")
>>         (const_string "*")))
>> -   (set_attr "length_immediate" "*,*,0")
>> +   (set_attr "length_immediate" "0,*,*,0")
>>     (set_attr "mode" "SI")])
>>
>>  ;; See comment for addsi_1_zext why we do use nonimmediate_operand
>> @@ -7807,9 +7807,9 @@
>>     (set_attr "mode" "SI")])
>>
>>  (define_insn "*andhi_1"
>> -  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
>> -       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
>> -               (match_operand:HI 2 "general_operand" "rn,rm,L")))
>> +  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
>> +       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm")
>> +               (match_operand:HI 2 "general_operand" "Lh,rn,rm,L")))
>>     (clobber (reg:CC FLAGS_REG))]
>>    "ix86_binary_operator_ok (AND, HImode, operands)"
>>  {
>> @@ -7826,15 +7826,15 @@
>>        return "and{w}\t{%2, %0|%0, %2}";
>>      }
>>  }
>> -  [(set_attr "type" "alu,alu,imovx")
>> -   (set_attr "length_immediate" "*,*,0")
>> +  [(set_attr "type" "imovx,alu,alu,imovx")
>> +   (set_attr "length_immediate" "0,*,*,0")
>>     (set (attr "prefix_rex")
>>       (if_then_else
>>         (and (eq_attr "type" "imovx")
>>             (match_operand 1 "ext_QIreg_operand" ""))
>>         (const_string "1")
>>         (const_string "*")))
>> -   (set_attr "mode" "HI,HI,SI")])
>> +   (set_attr "mode" "SI,HI,HI,SI")])
>>
>>  ;; %%% Potential partial reg stall on alternative 2.  What to do?
>>  (define_insn "*andqi_1"
>> Index: config/i386/constraints.md
>> ===================================================================
>> --- config/i386/constraints.md  (revision 184516)
>> +++ config/i386/constraints.md  (working copy)
>> @@ -148,13 +148,19 @@
>>    (and (match_code "const_int")
>>         (match_test "IN_RANGE (ival, -128, 127)")))
>>
>> -(define_constraint "L"
>> +(define_constraint "La"
>>    "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF}
>>     for AND as a zero-extending move."
>>    (and (match_code "const_int")
>>         (match_test "ival == 0xff || ival == 0xffff
>>                     || ival == (HOST_WIDE_INT) 0xffffffff")))
>>
>> +(define_constraint "Lh"
>> +  "@code{0xFF} for AND as a zero-extending move when there are LCP stalls."
>> +  (and (match_code "const_int")
>> +       (match_test "ival == 0xff")
>> +       (match_test "TARGET_LCP_STALL")))
>
>
> This is necessary but not sufficient to generate LCP. For instance,
> not in adddi and addsi instructions.

I saw cases where andsi or anddi with 0xff was converted to an andw in
the end (possibly after some combining?).

Teresa

>
>
> thanks,
>
> David
>
>> +
>>  (define_constraint "M"
>>    "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
>>    (and (match_code "const_int")
>> Index: config/i386/i386.c
>> ===================================================================
>> --- config/i386/i386.c  (revision 188251)
>> +++ config/i386/i386.c  (working copy)
>> @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum
>>             && (mode == HImode
>>                 || mode == SImode
>>                 || (TARGET_64BIT && mode == DImode))
>> -           && satisfies_constraint_L (src2));
>> +           && satisfies_constraint_La (src2));
>>
>>    return true;
>>  }
>>
>>
>> --
>> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
Xinliang David Li - Aug. 23, 2012, 11:35 p.m.
Ok for now but some follow up may be needed.

thanks,

David

On Thu, Aug 23, 2012 at 4:21 PM, Teresa Johnson <tejohnson@google.com> wrote:
> On Thu, Aug 23, 2012 at 3:33 PM, Xinliang David Li <davidxl@google.com> wrote:
>> On Thu, Aug 23, 2012 at 2:01 PM, Teresa Johnson <tejohnson@google.com> wrote:
>>> This patch is for google branches only.
>>>
>>> This is the patch I plan to apply after reverting an earlier set of
>>> patches ported over from trunk that addressed the same problem but
>>> were causing some performance regressions. This is a narrower fix
>>> I had originally prepared before finding the trunk patches.
>>>
>>> This addresses only the specific problem of the and*i_1 patterns preferring
>>> to match an earlier set of patterns "r,0,Z" when the source and dest registers
>>> are the same, thus preferring to keep the instruction as an "andw" instead
>>> of using a zero-extending move when the constant is 0xff. We now will
>>> prefer to convert to a zero-extending move when the constant is 0xff and
>>> the machine has LCP stalls.
>>>
>>> Google ref b/6615073.
>>>
>>> Tested with crosstool. Ok for google-4_7?
>>>
>>> 2012-08-23  Teresa Johnson  <tejohnson@google.com>
>>>
>>>         * config/i386/i386.md (anddi_1): Add new r,qm,Lh pattern
>>>         first to catch an and with 0xff that would be turned into
>>>         an andw on machines with length-changing prefix stalls,
>>>         and instead convert it into a zero-extending move.
>>>         (andsi_1, andhi_1): Ditto.
>>>         * config/i386/constraints.md (La): Rename from "L".
>>>         (Lh): New constraint to match possible LCP stalling ands.
>>>         * config/i386/i386.c (ix86_binary_operator_ok): Update
>>>         for rename of "L" constraint.
>>>
>>> Index: config/i386/i386.md
>>> ===================================================================
>>> --- config/i386/i386.md (revision 188251)
>>> +++ config/i386/i386.md (working copy)
>>> @@ -7697,10 +7697,10 @@
>>>    "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")
>>>
>>>  (define_insn "*anddi_1"
>>> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
>>> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r")
>>>         (and:DI
>>> -        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
>>> -        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
>>> +        (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm")
>>> +        (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La")))
>>>     (clobber (reg:CC FLAGS_REG))]
>>>    "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
>>>  {
>>> @@ -7738,8 +7738,8 @@
>>>         return "and{q}\t{%2, %0|%0, %2}";
>>>      }
>>>  }
>>
>> Why is there a need to split L into Lh and La? Removing redundant
>> 'and' seems beneficial.
>
> I first did move the L condition first, which did fix one benchmark's
> performance issue but I got better performance overall by splitting it
> (so an "and" with 0xffff or 0xffffffffff might stay an and rather than
> becoming a movz variant). I'm not sure why using an "and" instead of
> converting it to a movz in these non-LCP situations matters though.
>
>>
>>
>>> -  [(set_attr "type" "alu,alu,alu,imovx")
>>> -   (set_attr "length_immediate" "*,*,*,0")
>>> +  [(set_attr "type" "imovx,alu,alu,alu,imovx")
>>> +   (set_attr "length_immediate" "0,*,*,*,0")
>>>     (set (attr "prefix_rex")
>>>       (if_then_else
>>>         (and (eq_attr "type" "imovx")
>>> @@ -7747,12 +7747,12 @@
>>>                  (match_operand 1 "ext_QIreg_operand" "")))
>>>         (const_string "1")
>>>         (const_string "*")))
>>> -   (set_attr "mode" "SI,DI,DI,SI")])
>>> +   (set_attr "mode" "SI,SI,DI,DI,SI")])
>>>
>>>  (define_insn "*andsi_1"
>>> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
>>> -       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
>>> -               (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
>>> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
>>> +       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm")
>>> +               (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La")))
>>>     (clobber (reg:CC FLAGS_REG))]
>>>    "ix86_binary_operator_ok (AND, SImode, operands)"
>>>  {
>>> @@ -7783,7 +7783,7 @@
>>>        return "and{l}\t{%2, %0|%0, %2}";
>>>      }
>>>  }
>>> -  [(set_attr "type" "alu,alu,imovx")
>>> +  [(set_attr "type" "imovx,alu,alu,imovx")
>>>     (set (attr "prefix_rex")
>>>       (if_then_else
>>>         (and (eq_attr "type" "imovx")
>>> @@ -7791,7 +7791,7 @@
>>>                  (match_operand 1 "ext_QIreg_operand" "")))
>>>         (const_string "1")
>>>         (const_string "*")))
>>> -   (set_attr "length_immediate" "*,*,0")
>>> +   (set_attr "length_immediate" "0,*,*,0")
>>>     (set_attr "mode" "SI")])
>>>
>>>  ;; See comment for addsi_1_zext why we do use nonimmediate_operand
>>> @@ -7807,9 +7807,9 @@
>>>     (set_attr "mode" "SI")])
>>>
>>>  (define_insn "*andhi_1"
>>> -  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
>>> -       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
>>> -               (match_operand:HI 2 "general_operand" "rn,rm,L")))
>>> +  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
>>> +       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm")
>>> +               (match_operand:HI 2 "general_operand" "Lh,rn,rm,L")))
>>>     (clobber (reg:CC FLAGS_REG))]
>>>    "ix86_binary_operator_ok (AND, HImode, operands)"
>>>  {
>>> @@ -7826,15 +7826,15 @@
>>>        return "and{w}\t{%2, %0|%0, %2}";
>>>      }
>>>  }
>>> -  [(set_attr "type" "alu,alu,imovx")
>>> -   (set_attr "length_immediate" "*,*,0")
>>> +  [(set_attr "type" "imovx,alu,alu,imovx")
>>> +   (set_attr "length_immediate" "0,*,*,0")
>>>     (set (attr "prefix_rex")
>>>       (if_then_else
>>>         (and (eq_attr "type" "imovx")
>>>             (match_operand 1 "ext_QIreg_operand" ""))
>>>         (const_string "1")
>>>         (const_string "*")))
>>> -   (set_attr "mode" "HI,HI,SI")])
>>> +   (set_attr "mode" "SI,HI,HI,SI")])
>>>
>>>  ;; %%% Potential partial reg stall on alternative 2.  What to do?
>>>  (define_insn "*andqi_1"
>>> Index: config/i386/constraints.md
>>> ===================================================================
>>> --- config/i386/constraints.md  (revision 184516)
>>> +++ config/i386/constraints.md  (working copy)
>>> @@ -148,13 +148,19 @@
>>>    (and (match_code "const_int")
>>>         (match_test "IN_RANGE (ival, -128, 127)")))
>>>
>>> -(define_constraint "L"
>>> +(define_constraint "La"
>>>    "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF}
>>>     for AND as a zero-extending move."
>>>    (and (match_code "const_int")
>>>         (match_test "ival == 0xff || ival == 0xffff
>>>                     || ival == (HOST_WIDE_INT) 0xffffffff")))
>>>
>>> +(define_constraint "Lh"
>>> +  "@code{0xFF} for AND as a zero-extending move when there are LCP stalls."
>>> +  (and (match_code "const_int")
>>> +       (match_test "ival == 0xff")
>>> +       (match_test "TARGET_LCP_STALL")))
>>
>>
>> This is necessary but not sufficient to generate LCP. For instance,
>> not in adddi and addsi instructions.
>
> I saw cases where andsi or anddi with 0xff was converted to an andw in
> the end (possibly after some combining?).
>
> Teresa
>
>>
>>
>> thanks,
>>
>> David
>>
>>> +
>>>  (define_constraint "M"
>>>    "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
>>>    (and (match_code "const_int")
>>> Index: config/i386/i386.c
>>> ===================================================================
>>> --- config/i386/i386.c  (revision 188251)
>>> +++ config/i386/i386.c  (working copy)
>>> @@ -16331,7 +16331,7 @@ ix86_binary_operator_ok (enum rtx_code code, enum
>>>             && (mode == HImode
>>>                 || mode == SImode
>>>                 || (TARGET_64BIT && mode == DImode))
>>> -           && satisfies_constraint_L (src2));
>>> +           && satisfies_constraint_La (src2));
>>>
>>>    return true;
>>>  }
>>>
>>>
>>> --
>>> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413
>
>
>
> --
> Teresa Johnson | Software Engineer | tejohnson@google.com | 408-460-2413

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 188251)
+++ config/i386/i386.md (working copy)
@@ -7697,10 +7697,10 @@ 
   "ix86_expand_binary_operator (AND, <MODE>mode, operands); DONE;")

 (define_insn "*anddi_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r")
        (and:DI
-        (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
-        (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+        (match_operand:DI 1 "nonimmediate_operand" "%qm,0,0,0,qm")
+        (match_operand:DI 2 "x86_64_szext_general_operand" "Lh,Z,re,rm,La")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
 {
@@ -7738,8 +7738,8 @@ 
        return "and{q}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "type" "alu,alu,alu,imovx")
-   (set_attr "length_immediate" "*,*,*,0")
+  [(set_attr "type" "imovx,alu,alu,alu,imovx")
+   (set_attr "length_immediate" "0,*,*,*,0")
    (set (attr "prefix_rex")
      (if_then_else
        (and (eq_attr "type" "imovx")
@@ -7747,12 +7747,12 @@ 
                 (match_operand 1 "ext_QIreg_operand" "")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "SI,DI,DI,SI")])
+   (set_attr "mode" "SI,SI,DI,DI,SI")])

 (define_insn "*andsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
-       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
-               (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
+       (and:SI (match_operand:SI 1 "nonimmediate_operand" "%qm,0,0,qm")
+               (match_operand:SI 2 "x86_64_general_operand" "Lh,re,rm,La")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (AND, SImode, operands)"
 {
@@ -7783,7 +7783,7 @@ 
       return "and{l}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "type" "alu,alu,imovx")
+  [(set_attr "type" "imovx,alu,alu,imovx")
    (set (attr "prefix_rex")
      (if_then_else
        (and (eq_attr "type" "imovx")
@@ -7791,7 +7791,7 @@ 
                 (match_operand 1 "ext_QIreg_operand" "")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "length_immediate" "*,*,0")
+   (set_attr "length_immediate" "0,*,*,0")
    (set_attr "mode" "SI")])

 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
@@ -7807,9 +7807,9 @@ 
    (set_attr "mode" "SI")])

 (define_insn "*andhi_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
-       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
-               (match_operand:HI 2 "general_operand" "rn,rm,L")))
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,r,r")
+       (and:HI (match_operand:HI 1 "nonimmediate_operand" "%qm,0,0,qm")
+               (match_operand:HI 2 "general_operand" "Lh,rn,rm,L")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (AND, HImode, operands)"
 {
@@ -7826,15 +7826,15 @@ 
       return "and{w}\t{%2, %0|%0, %2}";
     }
 }
-  [(set_attr "type" "alu,alu,imovx")
-   (set_attr "length_immediate" "*,*,0")
+  [(set_attr "type" "imovx,alu,alu,imovx")
+   (set_attr "length_immediate" "0,*,*,0")
    (set (attr "prefix_rex")
      (if_then_else
        (and (eq_attr "type" "imovx")
            (match_operand 1 "ext_QIreg_operand" ""))
        (const_string "1")
        (const_string "*")))
-   (set_attr "mode" "HI,HI,SI")])
+   (set_attr "mode" "SI,HI,HI,SI")])

 ;; %%% Potential partial reg stall on alternative 2.  What to do?
 (define_insn "*andqi_1"
Index: config/i386/constraints.md
===================================================================
--- config/i386/constraints.md  (revision 184516)
+++ config/i386/constraints.md  (working copy)
@@ -148,13 +148,19 @@ 
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -128, 127)")))

-(define_constraint "L"
+(define_constraint "La"
   "@code{0xFF}, @code{0xFFFF} or @code{0xFFFFFFFF}
    for AND as a zero-extending move."
   (and (match_code "const_int")
        (match_test "ival == 0xff || ival == 0xffff
                    || ival == (HOST_WIDE_INT) 0xffffffff")))

+(define_constraint "Lh"
+  "@code{0xFF} for AND as a zero-extending move when there are LCP stalls."
+  (and (match_code "const_int")
+       (match_test "ival == 0xff")
+       (match_test "TARGET_LCP_STALL")))
+
 (define_constraint "M"
   "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
   (and (match_code "const_int")
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 188251)
+++ config/i386/i386.c  (working copy)
@@ -16331,7 +16331,7 @@  ix86_binary_operator_ok (enum rtx_code code, enum
            && (mode == HImode
                || mode == SImode
                || (TARGET_64BIT && mode == DImode))
-           && satisfies_constraint_L (src2));
+           && satisfies_constraint_La (src2));

   return true;
 }