diff mbox series

[v3] x86: Optimize load of const all 1s FP vectors

Message ID CAMe9rOqivmF+DezRNpox5g02+mO=TCkN-WuK05Bb0h3E5yy95A@mail.gmail.com
State New
Headers show
Series [v3] x86: Optimize load of const all 1s FP vectors | expand

Commit Message

H.J. Lu Aug. 9, 2021, 5:46 p.m. UTC
On Mon, Aug 9, 2021 at 8:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Mon, Aug 9, 2021 at 5:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Sun, Aug 8, 2021 at 1:23 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Sat, Aug 7, 2021 at 4:41 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > Update vector_all_ones_operand to return true for const all 1s float
> > > > vectors.
> > > >
> > > > gcc/
> > > >
> > > >         PR target/101804
> > > >         * config/i386/predicates.md (vector_all_ones_operand): Return
> > > >         true for const all 1s float vectors.
> > > >
> > > > gcc/testsuite/
> > > >
> > > >         PR target/101804
> > > >         * gcc.target/i386/avx2-gather-2.c: Pass -march=skylake instead
> > > >         of "-mavx2 -mtune=skylake".  Scan vpcmpeqd.
> > >
> > > No, vector_all_ones_operand is intended to be integer minus-one. Use
> > > float_vector_all_ones_operand in a specific place, where it is needed.
> > >
> >
> > Like this?
>
> Please also add a new constraint, BC is intended for integer values.
>
> Uros.

Here is the v3 patch with the new BF constraint.  OK for master?

Thanks.

Comments

Uros Bizjak Aug. 9, 2021, 6:53 p.m. UTC | #1
On Mon, Aug 9, 2021 at 7:47 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Aug 9, 2021 at 8:27 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Mon, Aug 9, 2021 at 5:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Sun, Aug 8, 2021 at 1:23 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > On Sat, Aug 7, 2021 at 4:41 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > Update vector_all_ones_operand to return true for const all 1s float
> > > > > vectors.
> > > > >
> > > > > gcc/
> > > > >
> > > > >         PR target/101804
> > > > >         * config/i386/predicates.md (vector_all_ones_operand): Return
> > > > >         true for const all 1s float vectors.
> > > > >
> > > > > gcc/testsuite/
> > > > >
> > > > >         PR target/101804
> > > > >         * gcc.target/i386/avx2-gather-2.c: Pass -march=skylake instead
> > > > >         of "-mavx2 -mtune=skylake".  Scan vpcmpeqd.
> > > >
> > > > No, vector_all_ones_operand is intended to be integer minus-one. Use
> > > > float_vector_all_ones_operand in a specific place, where it is needed.
> > > >
> > >
> > > Like this?
> >
> > Please also add a new constraint, BC is intended for integer values.
> >
> > Uros.
>
> Here is the v3 patch with the new BF constraint.  OK for master?

OK with some comment fixes.

+;;  C  Integer SSE constant -1 operand.
+;;  F  Floating-point SSE constant -1 operand.

Maybe we should simply say "... SSE constant with all bits set" here.
"... SSE constant -1" is ambiguous, someone can interpret this as a
constant -1.0.

-  "@internal SSE constant -1 operand."
+  "@internal integer SSE constant -1 operand."

Also here.

+(define_constraint "BF"
+  "@internal floating-point SSE constant -1 operand."

And here.

Thanks,
Uros.
diff mbox series

Patch

From 6d4f8d82ad2c6d284c2c7afc199af27749da6418 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 6 Aug 2021 12:32:01 -0700
Subject: [PATCH v3] x86: Optimize load of const all 1s FP vectors

Check float_vector_all_ones_operand for vector floating-point modes to
optimize load of const all 1s floating-point vectors.

gcc/

	PR target/101804
	* config/i386/constraints.md (BC): Document for integer SSE
	constant -1 operand.
	(BF): New constraint for const all 1s floating-point vectors.
	* config/i386/i386.c (standard_sse_constant_p): Likewise.
	(standard_sse_constant_opcode): Likewise.
	* config/i386/sse.md (sseconstm1): New mode attribute.
	(mov<mode>_internal): Replace BC with <sseconstm1>.

gcc/testsuite/

	PR target/101804
	* gcc.target/i386/avx2-gather-2.c: Pass -march=skylake instead
	of "-mavx2 -mtune=skylake".  Scan vpcmpeqd.
---
 gcc/config/i386/constraints.md                | 10 ++++++++--
 gcc/config/i386/i386.c                        | 11 +++++++++--
 gcc/config/i386/sse.md                        | 11 ++++++++++-
 gcc/testsuite/gcc.target/i386/avx2-gather-2.c |  3 ++-
 4 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 4aa28a5621c..5a8c52b52e0 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -166,7 +166,8 @@  (define_register_constraint "YW"
 ;;  s  Sibcall memory operand, not valid for TARGET_X32
 ;;  w  Call memory operand, not valid for TARGET_X32
 ;;  z  Constant call address operand.
-;;  C  SSE constant operand.
+;;  C  Integer SSE constant -1 operand.
+;;  F  Floating-point SSE constant -1 operand.
 
 (define_constraint "Bf"
   "@internal Flags register operand."
@@ -216,11 +217,16 @@  (define_constraint "Bz"
   (match_operand 0 "constant_call_address_operand"))
 
 (define_constraint "BC"
-  "@internal SSE constant -1 operand."
+  "@internal integer SSE constant -1 operand."
   (and (match_test "TARGET_SSE")
        (ior (match_test "op == constm1_rtx")
 	    (match_operand 0 "vector_all_ones_operand"))))
 
+(define_constraint "BF"
+  "@internal floating-point SSE constant -1 operand."
+  (and (match_test "TARGET_SSE")
+       (match_operand 0 "float_vector_all_ones_operand")))
+
 ;; Integer constant constraints.
 (define_constraint "Wb"
   "Integer constant in the range 0 @dots{} 7, for 8-bit shifts."
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index aea224ab235..4d4ab6a03d6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5073,7 +5073,11 @@  standard_sse_constant_p (rtx x, machine_mode pred_mode)
   if (x == const0_rtx || const0_operand (x, mode))
     return 1;
 
-  if (x == constm1_rtx || vector_all_ones_operand (x, mode))
+  if (x == constm1_rtx
+      || vector_all_ones_operand (x, mode)
+      || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+	   || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
+	  && float_vector_all_ones_operand (x, mode)))
     {
       /* VOIDmode integer constant, get mode from the predicate.  */
       if (mode == VOIDmode)
@@ -5171,7 +5175,10 @@  standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	  gcc_unreachable ();
 	}
     }
-  else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
+  else if (x == constm1_rtx
+	   || vector_all_ones_operand (x, mode)
+	   || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+	       && float_vector_all_ones_operand (x, mode)))
     {
       enum attr_mode insn_mode = get_attr_mode (insn);
       
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a46a2373547..5255d42900e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -777,6 +777,15 @@  (define_mode_attr sseinsnmode
    (V4SF "V4SF") (V2DF "V2DF")
    (TI "TI")])
 
+;; SSE constant -1 constraint
+(define_mode_attr sseconstm1
+  [(V64QI "BC") (V32HI "BC") (V16SI "BC") (V8DI "BC") (V4TI "BC")
+   (V32QI "BC") (V16HI "BC") (V8SI "BC") (V4DI "BC") (V2TI "BC")
+   (V16QI "BC") (V8HI "BC") (V4SI "BC") (V2DI "BC") (V1TI "BC")
+   (V16SF "BF") (V8DF "BF")
+   (V8SF "BF") (V4DF "BF")
+   (V4SF "BF") (V2DF "BF")])
+
 ;; Mapping of vector modes to corresponding mask size
 (define_mode_attr avx512fmaskmode
   [(V64QI "DI") (V32QI "SI") (V16QI "HI")
@@ -1056,7 +1065,7 @@  (define_insn "mov<mode>_internal"
   [(set (match_operand:VMOVE 0 "nonimmediate_operand"
 	 "=v,v ,v ,m")
 	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
-	 " C,BC,vm,v"))]
+	 " C,<sseconstm1>,vm,v"))]
   "TARGET_SSE
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
index 1a704afd834..ad5ef73107c 100644
--- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c
@@ -1,6 +1,7 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details -mtune=skylake" } */
+/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */
 
 #include "avx2-gather-1.c"
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 16 "vect" } } */
+/* { dg-final { scan-assembler "vpcmpeqd" } } */
-- 
2.31.1