diff mbox

Allow all 1s of integer as standard SSE constants

Message ID CAFULd4b3WPYGr=uL=1D7RmnrYBeviVt0=-ckgT-8zJJcae+m3A@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak April 21, 2016, 10:18 a.m. UTC
On Thu, Apr 21, 2016 at 9:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 9:37 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
>>> standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
>>> pass mode to standard_sse_constant_p and standard_sse_constant_opcode
>>> to check if all 1s is available for target.
>>>
>>> Tested on Linux/x86-64.  OK for master?
>>
>> No.
>>
>> This patch should use "isa" attribute instead of adding even more
>> similar patterns. Also, please leave MEM_P checks, the rare C->m move
>> can be easily resolved by IRA.
>
> Actually, register_operand checks are indeed better, please disregard
> MEM_P recommendation.

So, something like attached untested RFC proto-patch, that lacks
wide-int handling.

Uros.

Comments

H.J. Lu April 21, 2016, 11:54 a.m. UTC | #1
On Thu, Apr 21, 2016 at 3:18 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 9:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 9:37 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
>>>> standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
>>>> pass mode to standard_sse_constant_p and standard_sse_constant_opcode
>>>> to check if all 1s is available for target.
>>>>
>>>> Tested on Linux/x86-64.  OK for master?
>>>
>>> No.
>>>
>>> This patch should use "isa" attribute instead of adding even more
>>> similar patterns. Also, please leave MEM_P checks, the rare C->m move
>>> can be easily resolved by IRA.
>>
>> Actually, register_operand checks are indeed better, please disregard
>> MEM_P recommendation.
>
> So, something like attached untested RFC proto-patch, that lacks
> wide-int handling.
>
> Uros.

+
+  else if (CONST_INT_P (x))
+    {
+      if (INTVAL (X) == HOST_WIDE_INT_M1
+  && TARGET_SSE2)
+ return 2;
+    }
+  else if (CONST_WIDE_INT_P (x))
+    {
+      if (.... something involving wi::minus-one ....
+  && TARGET_AVX2)
+ return 2;
+      if (....
+  && TARGET_AVX512F)
+ return 2;
+    }
+  else if (vector_all_ones_operand (x, mode))

All 1s may not use winde_int.  It has VOIDmode.
The mode is passed by

@@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode,
rtx operands[])
       && (CONSTANT_P (op1)
   || (SUBREG_P (op1)
       && CONSTANT_P (SUBREG_REG (op1))))
-      && !standard_sse_constant_p (op1))
+      && !standard_sse_constant_p (op1, mode))
     op1 = validize_mem (force_const_mem (mode, op1));

This is why I have

-standard_sse_constant_p (rtx x)
+standard_sse_constant_p (rtx x, machine_mode mode)
 {
-  machine_mode mode;
-
   if (!TARGET_SSE)
     return 0;

-  mode = GET_MODE (x);
-
+  if (mode == VOIDmode)
+    mode = GET_MODE (x);
+

since all 1s `x' may have VOIDmode when called from
ix86_expand_vector_move if mode isn't passed.
Uros Bizjak April 21, 2016, 12:15 p.m. UTC | #2
On Thu, Apr 21, 2016 at 1:54 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 3:18 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 9:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Thu, Apr 21, 2016 at 9:37 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
>>>>> standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
>>>>> pass mode to standard_sse_constant_p and standard_sse_constant_opcode
>>>>> to check if all 1s is available for target.
>>>>>
>>>>> Tested on Linux/x86-64.  OK for master?
>>>>
>>>> No.
>>>>
>>>> This patch should use "isa" attribute instead of adding even more
>>>> similar patterns. Also, please leave MEM_P checks, the rare C->m move
>>>> can be easily resolved by IRA.
>>>
>>> Actually, register_operand checks are indeed better, please disregard
>>> MEM_P recommendation.
>>
>> So, something like attached untested RFC proto-patch, that lacks
>> wide-int handling.
>>
>> Uros.
>
> +
> +  else if (CONST_INT_P (x))
> +    {
> +      if (INTVAL (X) == HOST_WIDE_INT_M1
> +  && TARGET_SSE2)
> + return 2;
> +    }
> +  else if (CONST_WIDE_INT_P (x))
> +    {
> +      if (.... something involving wi::minus-one ....
> +  && TARGET_AVX2)
> + return 2;
> +      if (....
> +  && TARGET_AVX512F)
> + return 2;
> +    }
> +  else if (vector_all_ones_operand (x, mode))
>
> All 1s may not use winde_int.  It has VOIDmode.
> The mode is passed by
>
> @@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode,
> rtx operands[])
>        && (CONSTANT_P (op1)
>    || (SUBREG_P (op1)
>        && CONSTANT_P (SUBREG_REG (op1))))
> -      && !standard_sse_constant_p (op1))
> +      && !standard_sse_constant_p (op1, mode))
>      op1 = validize_mem (force_const_mem (mode, op1));
>
> This is why I have
>
> -standard_sse_constant_p (rtx x)
> +standard_sse_constant_p (rtx x, machine_mode mode)
>  {
> -  machine_mode mode;
> -
>    if (!TARGET_SSE)
>      return 0;
>
> -  mode = GET_MODE (x);
> -
> +  if (mode == VOIDmode)
> +    mode = GET_MODE (x);
> +
>
> since all 1s `x' may have VOIDmode when called from
> ix86_expand_vector_move if mode isn't passed.

We know, that const_int (-1) is allowed with TARGET_SSE2 and that
const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
have to check AVX512F in standard_sse_constant_p, as it implies
TARGET_AVX2.

As said, it is the job of insn mode attributes to emit correct instruction.

Based on the above observations, mode checks for -1 are not needed in
standard_sse_constant_p.

Uros.
H.J. Lu April 21, 2016, 12:59 p.m. UTC | #3
On Thu, Apr 21, 2016 at 5:15 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 1:54 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 3:18 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Thu, Apr 21, 2016 at 9:42 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Thu, Apr 21, 2016 at 9:37 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>> On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>>>>>> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
>>>>>> standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
>>>>>> pass mode to standard_sse_constant_p and standard_sse_constant_opcode
>>>>>> to check if all 1s is available for target.
>>>>>>
>>>>>> Tested on Linux/x86-64.  OK for master?
>>>>>
>>>>> No.
>>>>>
>>>>> This patch should use "isa" attribute instead of adding even more
>>>>> similar patterns. Also, please leave MEM_P checks, the rare C->m move
>>>>> can be easily resolved by IRA.
>>>>
>>>> Actually, register_operand checks are indeed better, please disregard
>>>> MEM_P recommendation.
>>>
>>> So, something like attached untested RFC proto-patch, that lacks
>>> wide-int handling.
>>>
>>> Uros.
>>
>> +
>> +  else if (CONST_INT_P (x))
>> +    {
>> +      if (INTVAL (X) == HOST_WIDE_INT_M1
>> +  && TARGET_SSE2)
>> + return 2;
>> +    }
>> +  else if (CONST_WIDE_INT_P (x))
>> +    {
>> +      if (.... something involving wi::minus-one ....
>> +  && TARGET_AVX2)
>> + return 2;
>> +      if (....
>> +  && TARGET_AVX512F)
>> + return 2;
>> +    }
>> +  else if (vector_all_ones_operand (x, mode))
>>
>> All 1s may not use winde_int.  It has VOIDmode.
>> The mode is passed by
>>
>> @@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode,
>> rtx operands[])
>>        && (CONSTANT_P (op1)
>>    || (SUBREG_P (op1)
>>        && CONSTANT_P (SUBREG_REG (op1))))
>> -      && !standard_sse_constant_p (op1))
>> +      && !standard_sse_constant_p (op1, mode))
>>      op1 = validize_mem (force_const_mem (mode, op1));
>>
>> This is why I have
>>
>> -standard_sse_constant_p (rtx x)
>> +standard_sse_constant_p (rtx x, machine_mode mode)
>>  {
>> -  machine_mode mode;
>> -
>>    if (!TARGET_SSE)
>>      return 0;
>>
>> -  mode = GET_MODE (x);
>> -
>> +  if (mode == VOIDmode)
>> +    mode = GET_MODE (x);
>> +
>>
>> since all 1s `x' may have VOIDmode when called from
>> ix86_expand_vector_move if mode isn't passed.
>
> We know, that const_int (-1) is allowed with TARGET_SSE2 and that
> const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
> have to check AVX512F in standard_sse_constant_p, as it implies
> TARGET_AVX2.
>
> As said, it is the job of insn mode attributes to emit correct instruction.
>
> Based on the above observations, mode checks for -1 are not needed in
> standard_sse_constant_p.

void
ix86_expand_vector_move (machine_mode mode, rtx operands[])
{
  rtx op0 = operands[0], op1 = operands[1];
  /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
     psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
  unsigned int align = (TARGET_IAMCU
                        ? GET_MODE_BITSIZE (mode)
                        : GET_MODE_ALIGNMENT (mode));

  if (push_operand (op0, VOIDmode))
    op0 = emit_move_resolve_push (mode, op0);

  /* Force constants other than zero into memory.  We do not know how
     the instructions used to build constants modify the upper 64 bits
     of the register, once we have that information we may be able
     to handle some of them more efficiently.  */
  if (can_create_pseudo_p ()
      && register_operand (op0, mode)
      && (CONSTANT_P (op1)
          || (SUBREG_P (op1)
              && CONSTANT_P (SUBREG_REG (op1))))
      && !standard_sse_constant_p (op1))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

What should it return for  op1 == (VOIDmode) -1 when
TARGET_AVX is true and TARGET_AVX2 is false for
mode == TImode and mode == OImode?

    op1 = validize_mem (force_const_mem (mode, op1));
Uros Bizjak April 21, 2016, 1:33 p.m. UTC | #4
On Thu, Apr 21, 2016 at 2:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>> We know, that const_int (-1) is allowed with TARGET_SSE2 and that
>> const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
>> have to check AVX512F in standard_sse_constant_p, as it implies
>> TARGET_AVX2.
>>
>> As said, it is the job of insn mode attributes to emit correct instruction.
>>
>> Based on the above observations, mode checks for -1 are not needed in
>> standard_sse_constant_p.
>
> void
> ix86_expand_vector_move (machine_mode mode, rtx operands[])
> {
>   rtx op0 = operands[0], op1 = operands[1];
>   /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
>      psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
>   unsigned int align = (TARGET_IAMCU
>                         ? GET_MODE_BITSIZE (mode)
>                         : GET_MODE_ALIGNMENT (mode));
>
>   if (push_operand (op0, VOIDmode))
>     op0 = emit_move_resolve_push (mode, op0);
>
>   /* Force constants other than zero into memory.  We do not know how
>      the instructions used to build constants modify the upper 64 bits
>      of the register, once we have that information we may be able
>      to handle some of them more efficiently.  */
>   if (can_create_pseudo_p ()
>       && register_operand (op0, mode)
>       && (CONSTANT_P (op1)
>           || (SUBREG_P (op1)
>               && CONSTANT_P (SUBREG_REG (op1))))
>       && !standard_sse_constant_p (op1))
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>
> What should it return for  op1 == (VOIDmode) -1 when
> TARGET_AVX is true and TARGET_AVX2 is false for
> mode == TImode and mode == OImode?
>
>     op1 = validize_mem (force_const_mem (mode, op1));

Let me rethink and redesign this whole mess, so we will have
consistent predicates.

Uros.
H.J. Lu April 21, 2016, 1:43 p.m. UTC | #5
On Thu, Apr 21, 2016 at 6:33 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 2:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>> We know, that const_int (-1) is allowed with TARGET_SSE2 and that
>>> const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
>>> have to check AVX512F in standard_sse_constant_p, as it implies
>>> TARGET_AVX2.
>>>
>>> As said, it is the job of insn mode attributes to emit correct instruction.
>>>
>>> Based on the above observations, mode checks for -1 are not needed in
>>> standard_sse_constant_p.
>>
>> void
>> ix86_expand_vector_move (machine_mode mode, rtx operands[])
>> {
>>   rtx op0 = operands[0], op1 = operands[1];
>>   /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
>>      psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
>>   unsigned int align = (TARGET_IAMCU
>>                         ? GET_MODE_BITSIZE (mode)
>>                         : GET_MODE_ALIGNMENT (mode));
>>
>>   if (push_operand (op0, VOIDmode))
>>     op0 = emit_move_resolve_push (mode, op0);
>>
>>   /* Force constants other than zero into memory.  We do not know how
>>      the instructions used to build constants modify the upper 64 bits
>>      of the register, once we have that information we may be able
>>      to handle some of them more efficiently.  */
>>   if (can_create_pseudo_p ()
>>       && register_operand (op0, mode)
>>       && (CONSTANT_P (op1)
>>           || (SUBREG_P (op1)
>>               && CONSTANT_P (SUBREG_REG (op1))))
>>       && !standard_sse_constant_p (op1))
>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>
>> What should it return for  op1 == (VOIDmode) -1 when
>> TARGET_AVX is true and TARGET_AVX2 is false for
>> mode == TImode and mode == OImode?
>>
>>     op1 = validize_mem (force_const_mem (mode, op1));
>
> Let me rethink and redesign this whole mess, so we will have
> consistent predicates.

The problem is because -1 has no mode.  We can't tell
if -1 is a valid SSE constant without mode.  That is my
change to standard_sse_constant_p and
ix86_expand_vector_move is for.   It is sufficient for
all my tests, including benchmark runs.
Uros Bizjak April 21, 2016, 1:48 p.m. UTC | #6
On Thu, Apr 21, 2016 at 3:43 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 6:33 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 2:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>>>> We know, that const_int (-1) is allowed with TARGET_SSE2 and that
>>>> const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
>>>> have to check AVX512F in standard_sse_constant_p, as it implies
>>>> TARGET_AVX2.
>>>>
>>>> As said, it is the job of insn mode attributes to emit correct instruction.
>>>>
>>>> Based on the above observations, mode checks for -1 are not needed in
>>>> standard_sse_constant_p.
>>>
>>> void
>>> ix86_expand_vector_move (machine_mode mode, rtx operands[])
>>> {
>>>   rtx op0 = operands[0], op1 = operands[1];
>>>   /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
>>>      psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
>>>   unsigned int align = (TARGET_IAMCU
>>>                         ? GET_MODE_BITSIZE (mode)
>>>                         : GET_MODE_ALIGNMENT (mode));
>>>
>>>   if (push_operand (op0, VOIDmode))
>>>     op0 = emit_move_resolve_push (mode, op0);
>>>
>>>   /* Force constants other than zero into memory.  We do not know how
>>>      the instructions used to build constants modify the upper 64 bits
>>>      of the register, once we have that information we may be able
>>>      to handle some of them more efficiently.  */
>>>   if (can_create_pseudo_p ()
>>>       && register_operand (op0, mode)
>>>       && (CONSTANT_P (op1)
>>>           || (SUBREG_P (op1)
>>>               && CONSTANT_P (SUBREG_REG (op1))))
>>>       && !standard_sse_constant_p (op1))
>>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>>
>>> What should it return for  op1 == (VOIDmode) -1 when
>>> TARGET_AVX is true and TARGET_AVX2 is false for
>>> mode == TImode and mode == OImode?
>>>
>>>     op1 = validize_mem (force_const_mem (mode, op1));
>>
>> Let me rethink and redesign this whole mess, so we will have
>> consistent predicates.
>
> The problem is because -1 has no mode.  We can't tell
> if -1 is a valid SSE constant without mode.  That is my
> change to standard_sse_constant_p and
> ix86_expand_vector_move is for.   It is sufficient for
> all my tests, including benchmark runs.

I'm not against mode checks, but IMO, we have to do these checks in
predicates, where we know operand mode.

Uros.
H.J. Lu April 21, 2016, 1:54 p.m. UTC | #7
On Thu, Apr 21, 2016 at 6:48 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 3:43 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 6:33 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Thu, Apr 21, 2016 at 2:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>
>>>>> We know, that const_int (-1) is allowed with TARGET_SSE2 and that
>>>>> const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
>>>>> have to check AVX512F in standard_sse_constant_p, as it implies
>>>>> TARGET_AVX2.
>>>>>
>>>>> As said, it is the job of insn mode attributes to emit correct instruction.
>>>>>
>>>>> Based on the above observations, mode checks for -1 are not needed in
>>>>> standard_sse_constant_p.
>>>>
>>>> void
>>>> ix86_expand_vector_move (machine_mode mode, rtx operands[])
>>>> {
>>>>   rtx op0 = operands[0], op1 = operands[1];
>>>>   /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
>>>>      psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
>>>>   unsigned int align = (TARGET_IAMCU
>>>>                         ? GET_MODE_BITSIZE (mode)
>>>>                         : GET_MODE_ALIGNMENT (mode));
>>>>
>>>>   if (push_operand (op0, VOIDmode))
>>>>     op0 = emit_move_resolve_push (mode, op0);
>>>>
>>>>   /* Force constants other than zero into memory.  We do not know how
>>>>      the instructions used to build constants modify the upper 64 bits
>>>>      of the register, once we have that information we may be able
>>>>      to handle some of them more efficiently.  */
>>>>   if (can_create_pseudo_p ()
>>>>       && register_operand (op0, mode)
>>>>       && (CONSTANT_P (op1)
>>>>           || (SUBREG_P (op1)
>>>>               && CONSTANT_P (SUBREG_REG (op1))))
>>>>       && !standard_sse_constant_p (op1))
>>>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>>>
>>>> What should it return for  op1 == (VOIDmode) -1 when
>>>> TARGET_AVX is true and TARGET_AVX2 is false for
>>>> mode == TImode and mode == OImode?
>>>>
>>>>     op1 = validize_mem (force_const_mem (mode, op1));
>>>
>>> Let me rethink and redesign this whole mess, so we will have
>>> consistent predicates.
>>
>> The problem is because -1 has no mode.  We can't tell
>> if -1 is a valid SSE constant without mode.  That is my
>> change to standard_sse_constant_p and
>> ix86_expand_vector_move is for.   It is sufficient for
>> all my tests, including benchmark runs.
>
> I'm not against mode checks, but IMO, we have to do these checks in
> predicates, where we know operand mode.

I tried and it doesn't work since the correct mode may not be always
available in predicates.  Yes, they pass mode.  But they just do

mode = GET_MODE (op);

which returns VOIDmode for -1.
Uros Bizjak April 21, 2016, 1:59 p.m. UTC | #8
On Thu, Apr 21, 2016 at 3:54 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 6:48 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 3:43 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>> On Thu, Apr 21, 2016 at 6:33 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>> On Thu, Apr 21, 2016 at 2:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>
>>>>>> We know, that const_int (-1) is allowed with TARGET_SSE2 and that
>>>>>> const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
>>>>>> have to check AVX512F in standard_sse_constant_p, as it implies
>>>>>> TARGET_AVX2.
>>>>>>
>>>>>> As said, it is the job of insn mode attributes to emit correct instruction.
>>>>>>
>>>>>> Based on the above observations, mode checks for -1 are not needed in
>>>>>> standard_sse_constant_p.
>>>>>
>>>>> void
>>>>> ix86_expand_vector_move (machine_mode mode, rtx operands[])
>>>>> {
>>>>>   rtx op0 = operands[0], op1 = operands[1];
>>>>>   /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
>>>>>      psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
>>>>>   unsigned int align = (TARGET_IAMCU
>>>>>                         ? GET_MODE_BITSIZE (mode)
>>>>>                         : GET_MODE_ALIGNMENT (mode));
>>>>>
>>>>>   if (push_operand (op0, VOIDmode))
>>>>>     op0 = emit_move_resolve_push (mode, op0);
>>>>>
>>>>>   /* Force constants other than zero into memory.  We do not know how
>>>>>      the instructions used to build constants modify the upper 64 bits
>>>>>      of the register, once we have that information we may be able
>>>>>      to handle some of them more efficiently.  */
>>>>>   if (can_create_pseudo_p ()
>>>>>       && register_operand (op0, mode)
>>>>>       && (CONSTANT_P (op1)
>>>>>           || (SUBREG_P (op1)
>>>>>               && CONSTANT_P (SUBREG_REG (op1))))
>>>>>       && !standard_sse_constant_p (op1))
>>>>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>>>>
>>>>> What should it return for  op1 == (VOIDmode) -1 when
>>>>> TARGET_AVX is true and TARGET_AVX2 is false for
>>>>> mode == TImode and mode == OImode?
>>>>>
>>>>>     op1 = validize_mem (force_const_mem (mode, op1));
>>>>
>>>> Let me rethink and redesign this whole mess, so we will have
>>>> consistent predicates.
>>>
>>> The problem is because -1 has no mode.  We can't tell
>>> if -1 is a valid SSE constant without mode.  That is my
>>> change to standard_sse_constant_p and
>>> ix86_expand_vector_move is for.   It is sufficient for
>>> all my tests, including benchmark runs.
>>
>> I'm not against mode checks, but IMO, we have to do these checks in
>> predicates, where we know operand mode.
>
> I tried and it doesn't work since the correct mode may not be always
> available in predicates.  Yes, they pass mode.  But they just do
>
> mode = GET_MODE (op);
>
> which returns VOIDmode for -1.

Well, looking at generated gcc/insns-preds.c, the predicates do:

(mode == VOIDmode || GET_MODE (op) == mode).

They *check* and don't *assign* "mode" variable.

So, I see no problem checking "mode" variable (that gets the value
from the pattern) in the predicates.

Uros.
H.J. Lu April 21, 2016, 2:50 p.m. UTC | #9
On Thu, Apr 21, 2016 at 6:59 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 3:54 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Apr 21, 2016 at 6:48 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> On Thu, Apr 21, 2016 at 3:43 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>> On Thu, Apr 21, 2016 at 6:33 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>> On Thu, Apr 21, 2016 at 2:59 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>>>>
>>>>>>> We know, that const_int (-1) is allowed with TARGET_SSE2 and that
>>>>>>> const_wide_int (-1) is allowed with TARGET_AVX2. Probably we don't
>>>>>>> have to check AVX512F in standard_sse_constant_p, as it implies
>>>>>>> TARGET_AVX2.
>>>>>>>
>>>>>>> As said, it is the job of insn mode attributes to emit correct instruction.
>>>>>>>
>>>>>>> Based on the above observations, mode checks for -1 are not needed in
>>>>>>> standard_sse_constant_p.
>>>>>>
>>>>>> void
>>>>>> ix86_expand_vector_move (machine_mode mode, rtx operands[])
>>>>>> {
>>>>>>   rtx op0 = operands[0], op1 = operands[1];
>>>>>>   /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
>>>>>>      psABI since the biggest alignment is 4 byte for IA MCU psABI.  */
>>>>>>   unsigned int align = (TARGET_IAMCU
>>>>>>                         ? GET_MODE_BITSIZE (mode)
>>>>>>                         : GET_MODE_ALIGNMENT (mode));
>>>>>>
>>>>>>   if (push_operand (op0, VOIDmode))
>>>>>>     op0 = emit_move_resolve_push (mode, op0);
>>>>>>
>>>>>>   /* Force constants other than zero into memory.  We do not know how
>>>>>>      the instructions used to build constants modify the upper 64 bits
>>>>>>      of the register, once we have that information we may be able
>>>>>>      to handle some of them more efficiently.  */
>>>>>>   if (can_create_pseudo_p ()
>>>>>>       && register_operand (op0, mode)
>>>>>>       && (CONSTANT_P (op1)
>>>>>>           || (SUBREG_P (op1)
>>>>>>               && CONSTANT_P (SUBREG_REG (op1))))
>>>>>>       && !standard_sse_constant_p (op1))
>>>>>> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>>>>>>
>>>>>> What should it return for  op1 == (VOIDmode) -1 when
>>>>>> TARGET_AVX is true and TARGET_AVX2 is false for
>>>>>> mode == TImode and mode == OImode?
>>>>>>
>>>>>>     op1 = validize_mem (force_const_mem (mode, op1));
>>>>>
>>>>> Let me rethink and redesign this whole mess, so we will have
>>>>> consistent predicates.
>>>>
>>>> The problem is because -1 has no mode.  We can't tell
>>>> if -1 is a valid SSE constant without mode.  That is my
>>>> change to standard_sse_constant_p and
>>>> ix86_expand_vector_move is for.   It is sufficient for
>>>> all my tests, including benchmark runs.
>>>
>>> I'm not against mode checks, but IMO, we have to do these checks in
>>> predicates, where we know operand mode.
>>
>> I tried and it doesn't work since the correct mode may not be always
>> available in predicates.  Yes, they pass mode.  But they just do
>>
>> mode = GET_MODE (op);
>>
>> which returns VOIDmode for -1.
>
> Well, looking at generated gcc/insns-preds.c, the predicates do:
>
> (mode == VOIDmode || GET_MODE (op) == mode).
>
> They *check* and don't *assign* "mode" variable.
>
> So, I see no problem checking "mode" variable (that gets the value
> from the pattern) in the predicates.

This is an incomplete list:

combine.c:   && ! push_operand (dest, GET_MODE (dest)))
expr.c:  if (push_operand (x, GET_MODE (x)))
expr.c:  && ! push_operand (x, GET_MODE (x))))
gcse.c:   && ! push_operand (dest, GET_MODE (dest)))
gcse.c:  if (general_operand (exp, GET_MODE (reg)))
ifcvt.c:  if (! general_operand (cmp_a, GET_MODE (cmp_a))
ifcvt.c:      || ! general_operand (cmp_b, GET_MODE (cmp_b)))
ifcvt.c:      else if (general_operand (b, GET_MODE (b)))
ifcvt.c:  if (! general_operand (a, GET_MODE (a)) || tmp_a)
ifcvt.c:  if (! general_operand (b, GET_MODE (b)) || tmp_b)
ira-costs.c:      if (address_operand (op, GET_MODE (op))
ira-costs.c:      && general_operand (SET_SRC (set), GET_MODE (SET_SRC (set))))
lower-subreg.c:      if (GET_MODE (op_operand) != word_mode
lower-subreg.c:      && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
lower-subreg.c:                                         GET_MODE (op_operand),
lra-constraints.c: if (simplify_operand_subreg (i, GET_MODE (old)) ||
op_change_p)
optabs.c:  create_output_operand (&ops[0], target, GET_MODE (target));
optabs.c:  create_input_operand (&ops[1], op0, GET_MODE (op0));
postreload-gcse.c:      if (! push_operand (dest, GET_MODE (dest)))
postreload-gcse.c:  && general_operand (src, GET_MODE (src))
postreload-gcse.c:  && general_operand (dest, GET_MODE (dest))
postreload-gcse.c:  && general_operand (src, GET_MODE (src))

IRA and LRA use GET_MODE and pass it to predicates.
Uros Bizjak April 21, 2016, 4:31 p.m. UTC | #10
On Thu, Apr 21, 2016 at 4:50 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>>> I tried and it doesn't work since the correct mode may not be always
>>> available in predicates.  Yes, they pass mode.  But they just do
>>>
>>> mode = GET_MODE (op);
>>>
>>> which returns VOIDmode for -1.
>>
>> Well, looking at generated gcc/insns-preds.c, the predicates do:
>>
>> (mode == VOIDmode || GET_MODE (op) == mode).
>>
>> They *check* and don't *assign* "mode" variable.
>>
>> So, I see no problem checking "mode" variable (that gets the value
>> from the pattern) in the predicates.
>
> This is an incomplete list:
>
> combine.c:   && ! push_operand (dest, GET_MODE (dest)))
> expr.c:  if (push_operand (x, GET_MODE (x)))
> expr.c:  && ! push_operand (x, GET_MODE (x))))
> gcse.c:   && ! push_operand (dest, GET_MODE (dest)))
> gcse.c:  if (general_operand (exp, GET_MODE (reg)))
> ifcvt.c:  if (! general_operand (cmp_a, GET_MODE (cmp_a))
> ifcvt.c:      || ! general_operand (cmp_b, GET_MODE (cmp_b)))
> ifcvt.c:      else if (general_operand (b, GET_MODE (b)))
> ifcvt.c:  if (! general_operand (a, GET_MODE (a)) || tmp_a)
> ifcvt.c:  if (! general_operand (b, GET_MODE (b)) || tmp_b)
> ira-costs.c:      if (address_operand (op, GET_MODE (op))
> ira-costs.c:      && general_operand (SET_SRC (set), GET_MODE (SET_SRC (set))))
> lower-subreg.c:      if (GET_MODE (op_operand) != word_mode
> lower-subreg.c:      && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
> lower-subreg.c:                                         GET_MODE (op_operand),
> lra-constraints.c: if (simplify_operand_subreg (i, GET_MODE (old)) ||
> op_change_p)
> optabs.c:  create_output_operand (&ops[0], target, GET_MODE (target));
> optabs.c:  create_input_operand (&ops[1], op0, GET_MODE (op0));
> postreload-gcse.c:      if (! push_operand (dest, GET_MODE (dest)))
> postreload-gcse.c:  && general_operand (src, GET_MODE (src))
> postreload-gcse.c:  && general_operand (dest, GET_MODE (dest))
> postreload-gcse.c:  && general_operand (src, GET_MODE (src))
>
> IRA and LRA use GET_MODE and pass it to predicates.

I don't know what are you trying to prove here ...

Uros.
H.J. Lu April 21, 2016, 4:37 p.m. UTC | #11
On Thu, Apr 21, 2016 at 9:31 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Thu, Apr 21, 2016 at 4:50 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>
>>>> I tried and it doesn't work since the correct mode may not be always
>>>> available in predicates.  Yes, they pass mode.  But they just do
>>>>
>>>> mode = GET_MODE (op);
>>>>
>>>> which returns VOIDmode for -1.
>>>
>>> Well, looking at generated gcc/insns-preds.c, the predicates do:
>>>
>>> (mode == VOIDmode || GET_MODE (op) == mode).
>>>
>>> They *check* and don't *assign* "mode" variable.
>>>
>>> So, I see no problem checking "mode" variable (that gets the value
>>> from the pattern) in the predicates.
>>
>> This is an incomplete list:
>>
>> combine.c:   && ! push_operand (dest, GET_MODE (dest)))
>> expr.c:  if (push_operand (x, GET_MODE (x)))
>> expr.c:  && ! push_operand (x, GET_MODE (x))))
>> gcse.c:   && ! push_operand (dest, GET_MODE (dest)))
>> gcse.c:  if (general_operand (exp, GET_MODE (reg)))
>> ifcvt.c:  if (! general_operand (cmp_a, GET_MODE (cmp_a))
>> ifcvt.c:      || ! general_operand (cmp_b, GET_MODE (cmp_b)))
>> ifcvt.c:      else if (general_operand (b, GET_MODE (b)))
>> ifcvt.c:  if (! general_operand (a, GET_MODE (a)) || tmp_a)
>> ifcvt.c:  if (! general_operand (b, GET_MODE (b)) || tmp_b)
>> ira-costs.c:      if (address_operand (op, GET_MODE (op))
>> ira-costs.c:      && general_operand (SET_SRC (set), GET_MODE (SET_SRC (set))))
>> lower-subreg.c:      if (GET_MODE (op_operand) != word_mode
>> lower-subreg.c:      && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
>> lower-subreg.c:                                         GET_MODE (op_operand),
>> lra-constraints.c: if (simplify_operand_subreg (i, GET_MODE (old)) ||
>> op_change_p)
>> optabs.c:  create_output_operand (&ops[0], target, GET_MODE (target));
>> optabs.c:  create_input_operand (&ops[1], op0, GET_MODE (op0));
>> postreload-gcse.c:      if (! push_operand (dest, GET_MODE (dest)))
>> postreload-gcse.c:  && general_operand (src, GET_MODE (src))
>> postreload-gcse.c:  && general_operand (dest, GET_MODE (dest))
>> postreload-gcse.c:  && general_operand (src, GET_MODE (src))
>>
>> IRA and LRA use GET_MODE and pass it to predicates.
>
> I don't know what are you trying to prove here ...

The "mode" argument passed to predates can't be used to determine if
-1 is a valid SSE constant.
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0687701..572f5bf 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10777,7 +10777,23 @@  standard_sse_constant_p (rtx x)
   
   if (x == const0_rtx || x == CONST0_RTX (mode))
     return 1;
-  if (vector_all_ones_operand (x, mode))
+
+  else if (CONST_INT_P (x))
+    {
+      if (INTVAL (X) == HOST_WIDE_INT_M1
+	  && TARGET_SSE2)
+	return 2;
+    }
+  else if (CONST_WIDE_INT_P (x))
+    {
+      if (.... something involving wi::minus-one ....
+	  && TARGET_AVX2)
+	return 2;
+      if (....
+	  && TARGET_AVX512F)
+	return 2;
+    }
+  else if (vector_all_ones_operand (x, mode))
     switch (mode)
       {
       case V16QImode:
@@ -10811,53 +10827,70 @@  standard_sse_constant_p (rtx x)
 const char *
 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
 {
+  machine_mode insn_mode = get_attr_mode (insn);
+
   switch (standard_sse_constant_p (x))
     {
     case 1:
-      switch (get_attr_mode (insn))
+      switch (insn_mode)
 	{
 	case MODE_XI:
 	  return "vpxord\t%g0, %g0, %g0";
-	case MODE_V16SF:
-	  return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
-				 : "vpxord\t%g0, %g0, %g0";
-	case MODE_V8DF:
-	  return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
-				 : "vpxorq\t%g0, %g0, %g0";
+	case MODE_OI:
+	  return (TARGET_AVX512VL
+		  ? "vpxord\t%x0, %x0, %x0"
+		  : "vpxor\t%x0, %x0, %x0");
 	case MODE_TI:
-	  return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
-				 : "%vpxor\t%0, %d0";
-	case MODE_V2DF:
-	  return "%vxorpd\t%0, %d0";
-	case MODE_V4SF:
-	  return "%vxorps\t%0, %d0";
+	  return (TARGET_AVX512VL
+		  ? "vpxord\t%t0, %t0, %t0"
+		  : "%vpxor\t%0, %d0");
 
-	case MODE_OI:
-	  return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
-				 : "vpxor\t%x0, %x0, %x0";
+	case MODE_V8DF:
+	  return (TARGET_AVX512DQ
+		  ? "vxorpd\t%g0, %g0, %g0"
+		  : "vpxorq\t%g0, %g0, %g0");
 	case MODE_V4DF:
 	  return "vxorpd\t%x0, %x0, %x0";
+	case MODE_V2DF:
+	  return "%vxorpd\t%0, %d0";
+
+	case MODE_V16SF:
+	  return (TARGET_AVX512DQ
+		  ? "vxorps\t%g0, %g0, %g0"
+		  : "vpxord\t%g0, %g0, %g0");
 	case MODE_V8SF:
 	  return "vxorps\t%x0, %x0, %x0";
+	case MODE_V4SF:
+	  return "%vxorps\t%0, %d0";
 
 	default:
 	  break;
 	}
 
     case 2:
-      if (TARGET_AVX512VL
-	  || get_attr_mode (insn) == MODE_XI
-	  || get_attr_mode (insn) == MODE_V8DF
-	  || get_attr_mode (insn) == MODE_V16SF)
-	return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
-      if (TARGET_AVX)
-	return "vpcmpeqd\t%0, %0, %0";
-      else
-	return "pcmpeqd\t%0, %0";
+      switch (GET_MODE_SIZE (insn_mode))
+	{
+	case 64:
+	  gcc_assert (TARGET_AVX512F);
+	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+	case 32:
+	  gcc_assert (TARGET_AVX2);
+	  return (TARGET_AVX512VL
+		  ? "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+		  : "vpcmpeqd\t%0, %0, %0");
+	case 16:
+	  gcc_assert (TARGET_SSE2);
+	  return (TARGET_AVX512VL
+		  ? "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
+		  : "pcmpeqd\t%0, %0");
+	default:
+	  break;
+	}
 
     default:
       break;
     }
+
   gcc_unreachable ();
 }
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 38eb98c..3337968 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1970,9 +1970,11 @@ 
    (set_attr "length_immediate" "1")])
 
 (define_insn "*movxi_internal_avx512f"
-  [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m")
-	(match_operand:XI 1 "vector_move_operand"  "C ,vm,v"))]
-  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  [(set (match_operand:XI 0 "nonimmediate_operand"		"=v,v ,m")
+	(match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))]
+  "TARGET_AVX512F
+   && (register_operand (operands[0], XImode)
+       || register_operand (operands[1], XImode))"
 {
   switch (which_alternative)
     {
@@ -1994,9 +1996,11 @@ 
    (set_attr "mode" "XI")])
 
 (define_insn "*movoi_internal_avx"
-  [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m")
-	(match_operand:OI 1 "vector_move_operand"  "C ,vm,v"))]
-  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  [(set (match_operand:OI 0 "nonimmediate_operand"		"=v,v,v ,m")
+	(match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,C,vm,v"))]
+  "TARGET_AVX
+   && (register_operand (operands[0], OImode)
+       || register_operand (operands[1], OImode))"
 {
   switch (get_attr_type (insn))
     {
@@ -2028,7 +2032,8 @@ 
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "sselog1,ssemov,ssemov")
+  [(set_attr "isa" "avx2,*,*,*")
+   (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
    (set_attr "prefix" "vex")
    (set (attr "mode")
 	(cond [(ior (match_operand 0 "ext_sse_reg_operand")
@@ -2036,17 +2041,21 @@ 
 		 (const_string "XI")
 	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "V8SF")
-	       (and (eq_attr "alternative" "2")
+	       (and (eq_attr "alternative" "3")
 		    (match_test "TARGET_SSE_TYPELESS_STORES"))
 		 (const_string "V8SF")
 	      ]
 	      (const_string "OI")))])
 
 (define_insn "*movti_internal"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m")
-	(match_operand:TI 1 "general_operand"      "riFo,re,C,vm,v"))]
-  "(TARGET_64BIT || TARGET_SSE)
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v ,v,v ,m")
+	(match_operand:TI 1 "general_operand"	   "riFo,re,BC,C,vm,v"))]
+  "(TARGET_64BIT
+    && !(MEM_P (operands[0]) && MEM_P (operands[1])))
+   || (TARGET_SSE
+       && nonimmediate_or_sse_const_operand (operands[1], TImode)
+       && (register_operand (operands[0], TImode)
+	   || register_operand (operands[1], TImode)))"
 {
   switch (get_attr_type (insn))
     {
@@ -2083,8 +2092,8 @@ 
       gcc_unreachable ();
     }
 }
-  [(set_attr "isa" "x64,x64,*,*,*")
-   (set_attr "type" "multi,multi,sselog1,ssemov,ssemov")
+  [(set_attr "isa" "x64,x64,sse2,*,*,*")
+   (set_attr "type" "multi,multi,sselog1,sselog1,ssemov,ssemov")
    (set (attr "prefix")
      (if_then_else (eq_attr "type" "sselog1,ssemov")
        (const_string "maybe_vex")
@@ -2098,7 +2107,7 @@ 
 	       (ior (not (match_test "TARGET_SSE2"))
 		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
 		 (const_string "V4SF")
-	       (and (eq_attr "alternative" "4")
+	       (and (eq_attr "alternative" "5")
 		    (match_test "TARGET_SSE_TYPELESS_STORES"))
 		 (const_string "V4SF")
 	       (match_test "TARGET_AVX")