diff mbox

Allow all 1s of integer as standard SSE constants

Message ID 20160420195359.GA3113@intel.com
State New
Headers show

Commit Message

H.J. Lu April 20, 2016, 7:53 p.m. UTC
Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
pass mode to standard_sse_constant_p and standard_sse_constant_opcode
to check if all 1s is available for target.

Tested on Linux/x86-64.  OK for master?

BTW, it will be used to fix

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70155


H.J.
---
	* config/i386/i386-protos.h (standard_sse_constant_p): Take
	machine_mode with VOIDmode as default.
	* config/i386/i386.c (standard_sse_constant_p): Get mode if
	it is VOIDmode.  Return 2 for all 1s of integer in supported
	modes.
	(ix86_expand_vector_move): Pass mode to standard_sse_constant_p.
	* config/i386/i386.md (*movxi_internal_avx512f): Replace
	vector_move_operand with nonimmediate_or_sse_const_operand and
	use BC instead of C in constraint.  Check register_operand
	instead of MEM_P.  Pass mode to standard_sse_constant_opcode.
	(*movoi_internal_avx): Disabled for TARGET_AVX2.  Check
	register_operand instead of MEM_P.
	(*movoi_internal_avx2): New pattern.
	(*movti_internal_sse): Likewise.
	(*movti_internal): Renamed to ...
	(*movti_internal_sse2): This.  Require SSE2.  Use BC instead of
	C in constraint. Check register_operand instead of MEM_P in
	32-bit mode.
---
 gcc/config/i386/i386-protos.h |   2 +-
 gcc/config/i386/i386.c        |  27 ++++++++---
 gcc/config/i386/i386.md       | 104 ++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 121 insertions(+), 12 deletions(-)

Comments

Uros Bizjak April 21, 2016, 7:37 a.m. UTC | #1
On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
> standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
> pass mode to standard_sse_constant_p and standard_sse_constant_opcode
> to check if all 1s is available for target.
>
> Tested on Linux/x86-64.  OK for master?

No.

This patch should use "isa" attribute instead of adding even more
similar patterns. Also, please leave MEM_P checks, the rare C->m move
can be easily resolved by IRA.

Also, the mode checks should be in the predicate,
standard_sse_constant_p just validates if the constant is allowed by
ISA.

Uros.

> BTW, it will be used to fix
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70155
>
>
> H.J.
> ---
>         * config/i386/i386-protos.h (standard_sse_constant_p): Take
>         machine_mode with VOIDmode as default.
>         * config/i386/i386.c (standard_sse_constant_p): Get mode if
>         it is VOIDmode.  Return 2 for all 1s of integer in supported
>         modes.
>         (ix86_expand_vector_move): Pass mode to standard_sse_constant_p.
>         * config/i386/i386.md (*movxi_internal_avx512f): Replace
>         vector_move_operand with nonimmediate_or_sse_const_operand and
>         use BC instead of C in constraint.  Check register_operand
>         instead of MEM_P.  Pass mode to standard_sse_constant_opcode.
>         (*movoi_internal_avx): Disabled for TARGET_AVX2.  Check
>         register_operand instead of MEM_P.
>         (*movoi_internal_avx2): New pattern.
>         (*movti_internal_sse): Likewise.
>         (*movti_internal): Renamed to ...
>         (*movti_internal_sse2): This.  Require SSE2.  Use BC instead of
>         C in constraint. Check register_operand instead of MEM_P in
>         32-bit mode.
> ---
>  gcc/config/i386/i386-protos.h |   2 +-
>  gcc/config/i386/i386.c        |  27 ++++++++---
>  gcc/config/i386/i386.md       | 104 ++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 121 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index ff47bc1..cf54189 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -50,7 +50,7 @@ extern bool ix86_using_red_zone (void);
>  extern int standard_80387_constant_p (rtx);
>  extern const char *standard_80387_constant_opcode (rtx);
>  extern rtx standard_80387_constant_rtx (int);
> -extern int standard_sse_constant_p (rtx);
> +extern int standard_sse_constant_p (rtx, machine_mode = VOIDmode);
>  extern const char *standard_sse_constant_opcode (rtx_insn *, rtx);
>  extern bool symbolic_reference_mentioned_p (rtx);
>  extern bool extended_reg_mentioned_p (rtx);
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 6379313..dd951c2 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -10766,18 +10766,31 @@ standard_80387_constant_rtx (int idx)
>     in supported SSE/AVX vector mode.  */
>
>  int
> -standard_sse_constant_p (rtx x)
> +standard_sse_constant_p (rtx x, machine_mode mode)
>  {
> -  machine_mode mode;
> -
>    if (!TARGET_SSE)
>      return 0;
>
> -  mode = GET_MODE (x);
> -
> +  if (mode == VOIDmode)
> +    mode = GET_MODE (x);
> +
>    if (x == const0_rtx || x == CONST0_RTX (mode))
>      return 1;
> -  if (vector_all_ones_operand (x, mode))
> +  if (CONST_INT_P (x))
> +    {
> +      /* If mode != VOIDmode, standard_sse_constant_p must be called:
> +        1. On TImode with SSE2.
> +        2. On OImode with AVX2.
> +        3. On XImode with AVX512F.
> +       */
> +      if ((HOST_WIDE_INT) INTVAL (x) == HOST_WIDE_INT_M1
> +         && (mode == VOIDmode
> +             || (mode == TImode && TARGET_SSE2)
> +             || (mode == OImode && TARGET_AVX2)
> +             || (mode == XImode && TARGET_AVX512F)))
> +       return 2;
> +    }
> +  else if (vector_all_ones_operand (x, mode))
>      switch (mode)
>        {
>        case V16QImode:
> @@ -18758,7 +18771,7 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
>        && (CONSTANT_P (op1)
>           || (SUBREG_P (op1)
>               && CONSTANT_P (SUBREG_REG (op1))))
> -      && !standard_sse_constant_p (op1))
> +      && !standard_sse_constant_p (op1, mode))
>      op1 = validize_mem (force_const_mem (mode, op1));
>
>    /* We need to check memory alignment for SSE mode since attribute
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index babd0a4..75227aa 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1971,8 +1971,10 @@
>
>  (define_insn "*movxi_internal_avx512f"
>    [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m")
> -       (match_operand:XI 1 "vector_move_operand"  "C ,vm,v"))]
> -  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> +       (match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))]
> +  "TARGET_AVX512F
> +   && (register_operand (operands[0], XImode)
> +       || register_operand (operands[1], XImode))"
>  {
>    switch (which_alternative)
>      {
> @@ -1996,7 +1998,10 @@
>  (define_insn "*movoi_internal_avx"
>    [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m")
>         (match_operand:OI 1 "vector_move_operand"  "C ,vm,v"))]
> -  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
> +  "TARGET_AVX
> +   && !TARGET_AVX2
> +   && (register_operand (operands[0], OImode)
> +       || register_operand (operands[1], OImode))"
>  {
>    switch (get_attr_type (insn))
>      {
> @@ -2042,10 +2047,62 @@
>               ]
>               (const_string "OI")))])
>
> -(define_insn "*movti_internal"
> +(define_insn "*movoi_internal_avx2"
> +  [(set (match_operand:OI 0 "nonimmediate_operand"             "=v,v ,m")
> +       (match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))]
> +  "TARGET_AVX2
> +   && (register_operand (operands[0], OImode)
> +       || register_operand (operands[1], OImode))"
> +{
> +  switch (get_attr_type (insn))
> +    {
> +    case TYPE_SSELOG1:
> +      return standard_sse_constant_opcode (insn, operands[1]);
> +
> +    case TYPE_SSEMOV:
> +      if (misaligned_operand (operands[0], OImode)
> +         || misaligned_operand (operands[1], OImode))
> +       {
> +         if (get_attr_mode (insn) == MODE_V8SF)
> +           return "vmovups\t{%1, %0|%0, %1}";
> +         else if (get_attr_mode (insn) == MODE_XI)
> +           return "vmovdqu32\t{%1, %0|%0, %1}";
> +         else
> +           return "vmovdqu\t{%1, %0|%0, %1}";
> +       }
> +      else
> +       {
> +         if (get_attr_mode (insn) == MODE_V8SF)
> +           return "vmovaps\t{%1, %0|%0, %1}";
> +         else if (get_attr_mode (insn) == MODE_XI)
> +           return "vmovdqa32\t{%1, %0|%0, %1}";
> +         else
> +           return "vmovdqa\t{%1, %0|%0, %1}";
> +       }
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "type" "sselog1,ssemov,ssemov")
> +   (set_attr "prefix" "vex")
> +   (set (attr "mode")
> +       (cond [(ior (match_operand 0 "ext_sse_reg_operand")
> +                   (match_operand 1 "ext_sse_reg_operand"))
> +                (const_string "XI")
> +              (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
> +                (const_string "V8SF")
> +              (and (eq_attr "alternative" "2")
> +                   (match_test "TARGET_SSE_TYPELESS_STORES"))
> +                (const_string "V8SF")
> +             ]
> +             (const_string "OI")))])
> +
> +(define_insn "*movti_internal_sse"
>    [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m")
>         (match_operand:TI 1 "general_operand"      "riFo,re,C,vm,v"))]
>    "(TARGET_64BIT || TARGET_SSE)
> +   && !TARGET_SSE2
>     && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
>  {
>    switch (get_attr_type (insn))
> @@ -2061,6 +2118,45 @@
>          to stack may result in unaligned memory access.  */
>        if (misaligned_operand (operands[0], TImode)
>           || misaligned_operand (operands[1], TImode))
> +       return "%vmovups\t{%1, %0|%0, %1}";
> +      else
> +       return "%vmovaps\t{%1, %0|%0, %1}";
> +
> +    default:
> +      gcc_unreachable ();
> +    }
> +}
> +  [(set_attr "isa" "x64,x64,*,*,*")
> +   (set_attr "type" "multi,multi,sselog1,ssemov,ssemov")
> +   (set_attr "prefix" "orig")
> +   (set (attr "mode")
> +       (cond [(eq_attr "alternative" "0,1")
> +                (const_string "DI")]
> +              (const_string "TI")))])
> +
> +(define_insn "*movti_internal_sse2"
> +  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v, v ,m")
> +       (match_operand:TI 1 "general_operand"      "riFo,re,BC,vm,v"))]
> +  "TARGET_SSE2
> +   && ((TARGET_64BIT
> +       && !(MEM_P (operands[0]) && MEM_P (operands[1])))
> +       || (!TARGET_64BIT
> +          && (register_operand (operands[0], TImode)
> +              || register_operand (operands[1], TImode))))"
> +{
> +  switch (get_attr_type (insn))
> +    {
> +    case TYPE_MULTI:
> +      return "#";
> +
> +    case TYPE_SSELOG1:
> +      return standard_sse_constant_opcode (insn, operands[1]);
> +
> +    case TYPE_SSEMOV:
> +      /* TDmode values are passed as TImode on the stack.  Moving them
> +        to stack may result in unaligned memory access.  */
> +      if (misaligned_operand (operands[0], TImode)
> +         || misaligned_operand (operands[1], TImode))
>         {
>           if (get_attr_mode (insn) == MODE_V4SF)
>             return "%vmovups\t{%1, %0|%0, %1}";
> --
> 2.5.5
>
Uros Bizjak April 21, 2016, 7:42 a.m. UTC | #2
On Thu, Apr 21, 2016 at 9:37 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Wed, Apr 20, 2016 at 9:53 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
>> Since all 1s in TImode is standard SSE2 constants, all 1s in OImode is
>> standard AVX2 constants and all 1s in XImode is standard AVX512F constants,
>> pass mode to standard_sse_constant_p and standard_sse_constant_opcode
>> to check if all 1s is available for target.
>>
>> Tested on Linux/x86-64.  OK for master?
>
> No.
>
> This patch should use "isa" attribute instead of adding even more
> similar patterns. Also, please leave MEM_P checks, the rare C->m move
> can be easily resolved by IRA.

Actually, register_operand checks are indeed better, please disregard
MEM_P recommendation.

Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ff47bc1..cf54189 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -50,7 +50,7 @@  extern bool ix86_using_red_zone (void);
 extern int standard_80387_constant_p (rtx);
 extern const char *standard_80387_constant_opcode (rtx);
 extern rtx standard_80387_constant_rtx (int);
-extern int standard_sse_constant_p (rtx);
+extern int standard_sse_constant_p (rtx, machine_mode = VOIDmode);
 extern const char *standard_sse_constant_opcode (rtx_insn *, rtx);
 extern bool symbolic_reference_mentioned_p (rtx);
 extern bool extended_reg_mentioned_p (rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6379313..dd951c2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10766,18 +10766,31 @@  standard_80387_constant_rtx (int idx)
    in supported SSE/AVX vector mode.  */
 
 int
-standard_sse_constant_p (rtx x)
+standard_sse_constant_p (rtx x, machine_mode mode)
 {
-  machine_mode mode;
-
   if (!TARGET_SSE)
     return 0;
 
-  mode = GET_MODE (x);
-  
+  if (mode == VOIDmode)
+    mode = GET_MODE (x);
+
   if (x == const0_rtx || x == CONST0_RTX (mode))
     return 1;
-  if (vector_all_ones_operand (x, mode))
+  if (CONST_INT_P (x))
+    {
+      /* If mode != VOIDmode, standard_sse_constant_p must be called:
+	 1. On TImode with SSE2.
+	 2. On OImode with AVX2.
+	 3. On XImode with AVX512F.
+       */
+      if ((HOST_WIDE_INT) INTVAL (x) == HOST_WIDE_INT_M1
+	  && (mode == VOIDmode
+	      || (mode == TImode && TARGET_SSE2)
+	      || (mode == OImode && TARGET_AVX2)
+	      || (mode == XImode && TARGET_AVX512F)))
+	return 2;
+    }
+  else if (vector_all_ones_operand (x, mode))
     switch (mode)
       {
       case V16QImode:
@@ -18758,7 +18771,7 @@  ix86_expand_vector_move (machine_mode mode, rtx operands[])
       && (CONSTANT_P (op1)
 	  || (SUBREG_P (op1)
 	      && CONSTANT_P (SUBREG_REG (op1))))
-      && !standard_sse_constant_p (op1))
+      && !standard_sse_constant_p (op1, mode))
     op1 = validize_mem (force_const_mem (mode, op1));
 
   /* We need to check memory alignment for SSE mode since attribute
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index babd0a4..75227aa 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1971,8 +1971,10 @@ 
 
 (define_insn "*movxi_internal_avx512f"
   [(set (match_operand:XI 0 "nonimmediate_operand" "=v,v ,m")
-	(match_operand:XI 1 "vector_move_operand"  "C ,vm,v"))]
-  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+	(match_operand:XI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))]
+  "TARGET_AVX512F
+   && (register_operand (operands[0], XImode)
+       || register_operand (operands[1], XImode))"
 {
   switch (which_alternative)
     {
@@ -1996,7 +1998,10 @@ 
 (define_insn "*movoi_internal_avx"
   [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m")
 	(match_operand:OI 1 "vector_move_operand"  "C ,vm,v"))]
-  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_AVX
+   && !TARGET_AVX2
+   && (register_operand (operands[0], OImode)
+       || register_operand (operands[1], OImode))"
 {
   switch (get_attr_type (insn))
     {
@@ -2042,10 +2047,62 @@ 
 	      ]
 	      (const_string "OI")))])
 
-(define_insn "*movti_internal"
+(define_insn "*movoi_internal_avx2"
+  [(set (match_operand:OI 0 "nonimmediate_operand"		"=v,v ,m")
+	(match_operand:OI 1 "nonimmediate_or_sse_const_operand" "BC,vm,v"))]
+  "TARGET_AVX2
+   && (register_operand (operands[0], OImode)
+       || register_operand (operands[1], OImode))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      if (misaligned_operand (operands[0], OImode)
+	  || misaligned_operand (operands[1], OImode))
+	{
+	  if (get_attr_mode (insn) == MODE_V8SF)
+	    return "vmovups\t{%1, %0|%0, %1}";
+	  else if (get_attr_mode (insn) == MODE_XI)
+	    return "vmovdqu32\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqu\t{%1, %0|%0, %1}";
+	}
+      else
+	{
+	  if (get_attr_mode (insn) == MODE_V8SF)
+	    return "vmovaps\t{%1, %0|%0, %1}";
+	  else if (get_attr_mode (insn) == MODE_XI)
+	    return "vmovdqa32\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovdqa\t{%1, %0|%0, %1}";
+	}
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set (attr "mode")
+	(cond [(ior (match_operand 0 "ext_sse_reg_operand")
+		    (match_operand 1 "ext_sse_reg_operand"))
+		 (const_string "XI")
+	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+		 (const_string "V8SF")
+	       (and (eq_attr "alternative" "2")
+		    (match_test "TARGET_SSE_TYPELESS_STORES"))
+		 (const_string "V8SF")
+	      ]
+	      (const_string "OI")))])
+
+(define_insn "*movti_internal_sse"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m")
 	(match_operand:TI 1 "general_operand"      "riFo,re,C,vm,v"))]
   "(TARGET_64BIT || TARGET_SSE)
+   && !TARGET_SSE2
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -2061,6 +2118,45 @@ 
 	 to stack may result in unaligned memory access.  */
       if (misaligned_operand (operands[0], TImode)
 	  || misaligned_operand (operands[1], TImode))
+	return "%vmovups\t{%1, %0|%0, %1}";
+      else
+	return "%vmovaps\t{%1, %0|%0, %1}";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "x64,x64,*,*,*")
+   (set_attr "type" "multi,multi,sselog1,ssemov,ssemov")
+   (set_attr "prefix" "orig")
+   (set (attr "mode")
+	(cond [(eq_attr "alternative" "0,1")
+		 (const_string "DI")]
+	       (const_string "TI")))])
+
+(define_insn "*movti_internal_sse2"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v, v ,m")
+	(match_operand:TI 1 "general_operand"      "riFo,re,BC,vm,v"))]
+  "TARGET_SSE2
+   && ((TARGET_64BIT
+	&& !(MEM_P (operands[0]) && MEM_P (operands[1])))
+       || (!TARGET_64BIT
+	   && (register_operand (operands[0], TImode)
+	       || register_operand (operands[1], TImode))))"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_MULTI:
+      return "#";
+
+    case TYPE_SSELOG1:
+      return standard_sse_constant_opcode (insn, operands[1]);
+
+    case TYPE_SSEMOV:
+      /* TDmode values are passed as TImode on the stack.  Moving them
+	 to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+	  || misaligned_operand (operands[1], TImode))
 	{
 	  if (get_attr_mode (insn) == MODE_V4SF)
 	    return "%vmovups\t{%1, %0|%0, %1}";