From cddd242a3465301a6488a30368cb0d5fef11a550 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sat, 15 Sep 2018 20:54:42 -0700
Subject: [PATCH] x86: Add pmovzx/pmovsx patterns with SI/DI operands
Add pmovzx/pmovsx patterns with SI and DI operands for pmovzx/pmovsx
instructions which only read the low 4 or 8 bytes from the source.
The new patterns allow 8 byte register operands so that they work with
__m64 arguments passed in XMM registers. If we don't need to handle
__m64 arguments, we can only allow memory operands.
gcc/
PR target/87317
* config/i386/sse.md (*sse4_1_<code>v8qiv8hi2<mask_name>): New
pattern.
(*avx2_<code>v8qiv8si2<mask_name>): Likewise.
(*sse4_1_<code>v4qiv4si2<mask_name>): Likewise.
(*sse4_1_<code>v4hiv4si2<mask_name>): Likewise.
(*avx2_<code>v4hiv4di2<mask_name>): Likewise.
(*sse4_1_<code>v2hiv2di2<mask_name>): Likewise.
(*sse4_1_<code>v2siv2di2<mask_name>): Likewise.
gcc/testsuite/
PR target/87317
* gcc.target/i386/pr87317-1.c: New file.
* gcc.target/i386/pr87317-2.c: Likewise.
* gcc.target/i386/pr87317-3.c: Likewise.
* gcc.target/i386/pr87317-4.c: Likewise.
* gcc.target/i386/pr87317-5.c: Likewise.
* gcc.target/i386/pr87317-6.c: Likewise.
* gcc.target/i386/pr87317-7.c: Likewise.
* gcc.target/i386/pr87317-8.c: Likewise.
* gcc.target/i386/pr87317-9.c: Likewise.
* gcc.target/i386/pr87317-10.c: Likewise.
* gcc.target/i386/pr87317-11.c: Likewise.
* gcc.target/i386/pr87317-12.c: Likewise.
* gcc.target/i386/pr87317-13.c: Likewise.
* gcc.target/i386/pr87317-14.c: Likewise.
* gcc.target/i386/pr87317-15.c: Likewise.
xx
---
gcc/config/i386/sse.md | 134 +++++++++++++++++++++
gcc/testsuite/gcc.target/i386/pr87317-1.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-10.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-11.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-12.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-13.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-14.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-15.c | 22 ++++
gcc/testsuite/gcc.target/i386/pr87317-2.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-3.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-4.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-5.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-6.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-7.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-8.c | 13 ++
gcc/testsuite/gcc.target/i386/pr87317-9.c | 13 ++
16 files changed, 338 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-10.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-11.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-12.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-13.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-14.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-15.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-3.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-4.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-5.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-6.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-7.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-8.c
create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-9.c
@@ -15891,6 +15891,26 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+ (any_extend:V8HI
+ (vec_select:V8QI
+ (subreg:V16QI
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" "Yrm,*xm,vm")
+ (const_int 0)) 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
+ "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
@@ -15917,6 +15937,25 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_insn "*avx2_<code>v8qiv8si2<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
+ (any_extend:V8SI
+ (vec_select:V8QI
+ (subreg:V16QI
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" "vm")
+ (const_int 0)) 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX2 && <mask_avx512vl_condition>"
+ "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
[(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
(any_extend:V4SI
@@ -15932,6 +15971,28 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_insn "*sse4_1_<code>v4qiv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+ (any_extend:V4SI
+ (vec_select:V4QI
+ (subreg:V16QI
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "memory_operand" "m,*m,m"))
+ (const_vector:V4SI
+ [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])
+ (const_int 1)) 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+ "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
@@ -15968,6 +16029,24 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_insn "*sse4_1_<code>v4hiv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+ (any_extend:V4SI
+ (vec_select:V4HI
+ (subreg:V8HI
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" "Yrm,*xm,vm")
+ (const_int 0)) 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+ "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
@@ -16035,6 +16114,23 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_insn "*avx2_<code>v4hiv4di2<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
+ (any_extend:V4DI
+ (vec_select:V4HI
+ (subreg:V8HI
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" "vm")
+ (const_int 0)) 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX2 && <mask_avx512vl_condition>"
+ "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "mode" "OI")])
+
(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
(any_extend:V2DI
@@ -16049,6 +16145,27 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_insn "*sse4_1_<code>v2hiv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+ (any_extend:V2DI
+ (vec_select:V2HI
+ (subreg:V8HI
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "memory_operand" "m,*m,m"))
+ (const_vector:V4SI
+ [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])
+ (const_int 1)) 0)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+ "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
(define_insn "avx512f_<code>v8siv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
@@ -16084,6 +16201,23 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "TI")])
+(define_insn "*sse4_1_<code>v2siv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+ (any_extend:V2DI
+ (vec_select:V2SI
+ (subreg:V4SI
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" "Yrm,*xm,vm")
+ (const_int 0)) 0)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+ "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "mode" "TI")])
+
;; ptestps/ptestpd are very similar to comiss and ucomiss when
;; setting FLAGS_REG. But it is not a really compare instruction.
(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+ data = _mm_cvtepu8_epi16(data);
+ _mm_storeu_si128((__m128i*)dst, data);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_cvtsi32_si128(*(int*)ptr);
+ data = _mm_cvtepu8_epi32(data);
+ _mm_storeu_si128((__m128i*)dst, data);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
+ __m256i x = _mm256_cvtepu16_epi64(data);
+ _mm256_storeu_si256((__m256i*)dst, x);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
+ __m256i x = _mm256_cvtepu8_epi32(data);
+ _mm256_storeu_si256((__m256i*)dst, x);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+ __m128i y = _mm_movpi64_epi64(x);
+ __m256i z = _mm256_cvtepu8_epi32 (y);
+ _mm256_storeu_si256((__m256i*)dst, z);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+ __m128i y = _mm_movpi64_epi64(x);
+ __m256i z = _mm256_cvtepu16_epi64 (y);
+ _mm256_storeu_si256((__m256i*)dst, z);
+}
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -march=haswell" } */
+/* { dg-final { scan-assembler-times "vpmovsxwq" 1 } } */
+
+#include <immintrin.h>
+
+#define MAX 4
+
+long long int dst[MAX];
+short src[MAX];
+
+void
+foo (void)
+{
+ int i;
+ for (i = 0; i < MAX; i += 4)
+ {
+ __m128i data = _mm_cvtsi64_si128(*(long long int*)(src + i));
+ __m256i x = _mm256_cvtepi16_epi64(data);
+ _mm256_storeu_si256((__m256i*)(dst + i), x);
+ }
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+ data = _mm_cvtepi16_epi32(data);
+ _mm_storeu_si128((__m128i*)dst, data);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+ data = _mm_cvtepi32_epi64(data);
+ _mm_storeu_si128((__m128i*)dst, data);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+ __m128i y = _mm_movpi64_epi64(x);
+ __m128i z = _mm_cvtepu8_epi16(y);
+ _mm_storeu_si128((__m128i*)dst, z);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+ __m128i y = _mm_movpi64_epi64(x);
+ __m128i z = _mm_cvtepi16_epi32(y);
+ _mm_storeu_si128((__m128i*)dst, z);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+ __m128i y = _mm_movpi64_epi64(x);
+ __m128i z = _mm_cvtepi32_epi64 (y);
+ _mm_storeu_si128((__m128i*)dst, z);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_cvtsi32_si128(*(int*)ptr);
+ data = _mm_cvtepu8_epi32(data);
+ _mm_storeu_si128((__m128i*)dst, data);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+ __m128i data = _mm_cvtsi32_si128(*(int*)ptr);
+ data = _mm_cvtepu16_epi64(data);
+ _mm_storeu_si128((__m128i*)dst, data);
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+int
+f (void *ptr)
+{
+ __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+ data = _mm_cvtepu8_epi16(data);
+ return _mm_cvtsi128_si32(data);
+}
--
2.17.2