diff mbox series

Improve QImode extraction without SSE4.1 (PR tree-optimization/91201)

Message ID 20190802075429.GI2726@tucnak
State New
Headers show
Series Improve QImode extraction without SSE4.1 (PR tree-optimization/91201) | expand

Commit Message

Jakub Jelinek Aug. 2, 2019, 7:54 a.m. UTC
Hi!

As mentioned in the PR, for SSE4.1 we use pextrb for vec_extractv16qiqi,
but at least for element 0 we store the vector into memory and load the
single byte from there and we can just use movd instead.

The following patch does that, just skips it for the case when we know we'll
go through memory anyway.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

For the non-Os case, perhaps we could also handle other (elt & 3) == 0 cases
which V4SImode ix86_expand_vector_extract also handles through shuffles
(pshufd or unpck*) before the movd, just not sure if it is a win.

2019-08-01  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/91201
	* config/i386/i386-expand.c (ix86_expand_vector_extract): For elt == 0
	V16QImode extraction without sse4.1 try to use V4SImode lowpart
	extraction.

	* gcc.target/i386/sse2-pr91201-3.c: New test.
	* gcc.target/i386/sse2-pr91201-4.c: New test.
	* gcc.target/i386/sse2-pr91201-5.c: New test.
	* gcc.target/i386/sse2-pr91201-6.c: New test.


	Jakub

Comments

Uros Bizjak Aug. 2, 2019, 8:21 a.m. UTC | #1
On Fri, Aug 2, 2019 at 9:54 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> As mentioned in the PR, for SSE4.1 we use pextrb for vec_extractv16qiqi,
> but at least for element 0 we store the vector into memory and load the
> single byte from there and we can just use movd instead.
>
> The following patch does that, just skips it for the case when we know we'll
> go through memory anyway.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> For the non-Os case, perhaps we could also handle other (elt & 3) == 0 cases
> which V4SImode ix86_expand_vector_extract also handles through shuffles
> (pshufd or unpck*) before the movd, just not sure if it is a win.
>
> 2019-08-01  Jakub Jelinek  <jakub@redhat.com>
>
>         PR tree-optimization/91201
>         * config/i386/i386-expand.c (ix86_expand_vector_extract): For elt == 0
>         V16QImode extraction without sse4.1 try to use V4SImode lowpart
>         extraction.
>
>         * gcc.target/i386/sse2-pr91201-3.c: New test.
>         * gcc.target/i386/sse2-pr91201-4.c: New test.
>         * gcc.target/i386/sse2-pr91201-5.c: New test.
>         * gcc.target/i386/sse2-pr91201-6.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386-expand.c.jj    2019-07-30 09:12:34.578959902 +0200
> +++ gcc/config/i386/i386-expand.c       2019-08-01 10:41:22.545887899 +0200
> @@ -14706,6 +14706,17 @@ ix86_expand_vector_extract (bool mmx_ok,
>
>      case E_V16QImode:
>        use_vec_extr = TARGET_SSE4_1;
> +      if (!use_vec_extr
> +         && TARGET_SSE2
> +         && elt == 0
> +         && (optimize_insn_for_size_p () || TARGET_INTER_UNIT_MOVES_FROM_VEC))
> +       {
> +         tmp = gen_reg_rtx (SImode);
> +         ix86_expand_vector_extract (false, tmp, gen_lowpart (V4SImode, vec),
> +                                     0);
> +         emit_insn (gen_rtx_SET (target, gen_lowpart (QImode, tmp)));
> +         return;
> +       }
>        break;
>
>      case E_V8SFmode:
> --- gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c.jj   2019-08-01 10:59:07.508887273 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c      2019-08-01 11:07:42.925165205 +0200
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic -masm=att" } */
> +/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
> +/* { dg-final { scan-assembler-not "\\(%" } } */
> +
> +typedef unsigned char V __attribute__((vector_size (16)));
> +
> +unsigned char
> +foo (V x)
> +{
> +  return x[0];
> +}
> --- gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c.jj   2019-08-01 10:59:33.539497274 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c      2019-08-01 11:07:35.304279381 +0200
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile } */
> +/* { dg-options "-Os -msse2 -mno-sse3 -mtune=generic -masm=att" } */
> +/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
> +/* { dg-final { scan-assembler-not "\\(%" } } */
> +
> +typedef unsigned char V __attribute__((vector_size (16)));
> +
> +unsigned char
> +foo (V x)
> +{
> +  return x[0];
> +}
> --- gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c.jj   2019-08-01 11:01:07.994082143 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c      2019-08-01 11:07:19.230520198 +0200
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=k8 -masm=att" } */
> +/* { dg-final { scan-assembler-not "\tmovd\t%xmm0, %eax" } } */
> +/* { dg-final { scan-assembler "\tmov(zbl|b)\t\[^\n\r]*\\(%" } } */
> +
> +typedef unsigned char V __attribute__((vector_size (16)));
> +
> +unsigned char
> +foo (V x)
> +{
> +  return x[0];
> +}
> --- gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c.jj   2019-08-01 11:03:34.896881216 +0200
> +++ gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c      2019-08-01 11:07:27.564395340 +0200
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/91201 */
> +/* { dg-do compile } */
> +/* { dg-options "-Os -msse2 -mno-sse3 -mtune=k8 -masm=att" } */
> +/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
> +/* { dg-final { scan-assembler-not "\\(%" } } */
> +
> +typedef unsigned char V __attribute__((vector_size (16)));
> +
> +unsigned char
> +foo (V x)
> +{
> +  return x[0];
> +}
>
>         Jakub
diff mbox series

Patch

--- gcc/config/i386/i386-expand.c.jj	2019-07-30 09:12:34.578959902 +0200
+++ gcc/config/i386/i386-expand.c	2019-08-01 10:41:22.545887899 +0200
@@ -14706,6 +14706,17 @@  ix86_expand_vector_extract (bool mmx_ok,
 
     case E_V16QImode:
       use_vec_extr = TARGET_SSE4_1;
+      if (!use_vec_extr
+	  && TARGET_SSE2
+	  && elt == 0
+	  && (optimize_insn_for_size_p () || TARGET_INTER_UNIT_MOVES_FROM_VEC))
+	{
+	  tmp = gen_reg_rtx (SImode);
+	  ix86_expand_vector_extract (false, tmp, gen_lowpart (V4SImode, vec),
+				      0);
+	  emit_insn (gen_rtx_SET (target, gen_lowpart (QImode, tmp)));
+	  return;
+	}
       break;
 
     case E_V8SFmode:
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c.jj	2019-08-01 10:59:07.508887273 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-3.c	2019-08-01 11:07:42.925165205 +0200
@@ -0,0 +1,13 @@ 
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler-not "\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c.jj	2019-08-01 10:59:33.539497274 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-4.c	2019-08-01 11:07:35.304279381 +0200
@@ -0,0 +1,13 @@ 
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-Os -msse2 -mno-sse3 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler-not "\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c.jj	2019-08-01 11:01:07.994082143 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-5.c	2019-08-01 11:07:19.230520198 +0200
@@ -0,0 +1,13 @@ 
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse3 -mtune=k8 -masm=att" } */
+/* { dg-final { scan-assembler-not "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler "\tmov(zbl|b)\t\[^\n\r]*\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}
--- gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c.jj	2019-08-01 11:03:34.896881216 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-pr91201-6.c	2019-08-01 11:07:27.564395340 +0200
@@ -0,0 +1,13 @@ 
+/* PR tree-optimization/91201 */
+/* { dg-do compile } */
+/* { dg-options "-Os -msse2 -mno-sse3 -mtune=k8 -masm=att" } */
+/* { dg-final { scan-assembler "\tmovd\t%xmm0, %eax" } } */
+/* { dg-final { scan-assembler-not "\\(%" } } */
+
+typedef unsigned char V __attribute__((vector_size (16)));
+
+unsigned char
+foo (V x)
+{
+  return x[0];
+}