diff mbox series

[2/3] vect: Support v4hi -> v4qi.

Message ID 20240523063742.2333446-3-lin1.hu@intel.com
State New
Headers show
Series Optimize __builtin_convertvector for x86-64-v4 and | expand

Commit Message

Hu, Lin1 May 23, 2024, 6:37 a.m. UTC
gcc/ChangeLog:

    PR target/107432
    * config/i386/mmx.md (truncv4hiv4qi2): New define_insn.

gcc/testsuite/ChangeLog:

    PR target/107432
    * gcc.target/i386/pr107432-6.c: Add test.
---
 gcc/config/i386/mmx.md                     | 10 ++++++++++
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 12 +++++++++++-
 gcc/testsuite/gcc.target/i386/pr107432-6.c | 19 ++++++++++++++++---
 3 files changed, 37 insertions(+), 4 deletions(-)

Comments

Hongtao Liu May 27, 2024, 2:11 a.m. UTC | #1
On Thu, May 23, 2024 at 2:38 PM Hu, Lin1 <lin1.hu@intel.com> wrote:
>
> gcc/ChangeLog:
>
>     PR target/107432
>     * config/i386/mmx.md (truncv4hiv4qi2): New define_insn.
>
> gcc/testsuite/ChangeLog:
>
>     PR target/107432
>     * gcc.target/i386/pr107432-6.c: Add test.
> ---
>  gcc/config/i386/mmx.md                     | 10 ++++++++++
>  gcc/testsuite/gcc.target/i386/pr107432-1.c | 12 +++++++++++-
>  gcc/testsuite/gcc.target/i386/pr107432-6.c | 19 ++++++++++++++++---
>  3 files changed, 37 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 5f342497885..30f0d88af9f 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -4883,6 +4883,16 @@ (define_insn "truncv2hiv2qi2"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "TI")])
>
> +(define_insn "truncv4hiv4qi2"
> +  [(set (match_operand:V4QI 0 "register_operand" "=v")
> +       (truncate:V4QI
> +         (match_operand:V4HI 1 "register_operand" "v")))]
> +  "TARGET_AVX512VL && TARGET_AVX512BW"
Please also add TARGET_MMX_WITH_SSE since v4hi is 64-bit vector.
Others LGTM.
> +  "vpmovwb\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "TI")])
> +
>  (define_mode_iterator V2QI_V2HI [V2QI V2HI])
>  (define_insn "truncv2si<mode>2"
>    [(set (match_operand:V2QI_V2HI 0 "register_operand" "=v")
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> index a4f37447eb4..e0c7ffc8e5b 100644
> --- a/gcc/testsuite/gcc.target/i386/pr107432-1.c
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> @@ -7,7 +7,7 @@
>  /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
>  /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
>  /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
> -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 10 } } */
>
>  #include <x86intrin.h>
>
> @@ -113,6 +113,11 @@ __v2qi     mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
>    return __builtin_convertvector((__v2hi)a, __v2qi);
>  }
>
> +__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a)
> +{
> +  return __builtin_convertvector((__v4hi)a, __v4qi);
> +}
> +
>  __v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
>  {
>    return __builtin_convertvector((__v8hi)a, __v8qi);
> @@ -218,6 +223,11 @@ __v2qu     mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
>    return __builtin_convertvector((__v2hu)a, __v2qu);
>  }
>
> +__v4qu mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a)
> +{
> +  return __builtin_convertvector((__v4hu)a, __v4qu);
> +}
> +
>  __v8qu mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
>  {
>    return __builtin_convertvector((__v8hu)a, __v8qu);
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c
> index 4a68a10b089..7d3717d45bc 100644
> --- a/gcc/testsuite/gcc.target/i386/pr107432-6.c
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
> @@ -8,11 +8,14 @@
>  /* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */
>  /* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
>  /* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */
> -/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
> -/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
> +/* { dg-final { scan-assembler-times "vcvttph2w" 4 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttph2w" 5 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttph2uw" 4 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vcvttph2uw" 5 { target { ! ia32 } } } } */
>  /* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */
>  /* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */
> -/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */
>
>  #include <x86intrin.h>
>
> @@ -103,6 +106,11 @@ __v2qi     mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
>    return __builtin_convertvector((__v2hf)a, __v2qi);
>  }
>
> +__v4qi mm64_cvtph_epi8_builtin_convertvector(__v4hf a)
> +{
> +  return __builtin_convertvector((__v4hf)a, __v4qi);
> +}
> +
>  __v8qi mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
>  {
>    return __builtin_convertvector((__v8hf)a, __v8qi);
> @@ -123,6 +131,11 @@ __v2qu     mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
>    return __builtin_convertvector((__v2hf)a, __v2qu);
>  }
>
> +__v4qu mm64_cvtph_epu8_builtin_convertvector(__v4hf a)
> +{
> +  return __builtin_convertvector((__v4hf)a, __v4qu);
> +}
> +
>  __v8qu mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
>  {
>    return __builtin_convertvector((__v8hf)a, __v8qu);
> --
> 2.31.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5f342497885..30f0d88af9f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -4883,6 +4883,16 @@  (define_insn "truncv2hiv2qi2"
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_insn "truncv4hiv4qi2"
+  [(set (match_operand:V4QI 0 "register_operand" "=v")
+	(truncate:V4QI
+	  (match_operand:V4HI 1 "register_operand" "v")))]
+  "TARGET_AVX512VL && TARGET_AVX512BW"
+  "vpmovwb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 (define_mode_iterator V2QI_V2HI [V2QI V2HI])
 (define_insn "truncv2si<mode>2"
   [(set (match_operand:V2QI_V2HI 0 "register_operand" "=v")
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c
index a4f37447eb4..e0c7ffc8e5b 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -7,7 +7,7 @@ 
 /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 10 } } */
 
 #include <x86intrin.h>
 
@@ -113,6 +113,11 @@  __v2qi	mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
   return __builtin_convertvector((__v2hi)a, __v2qi);
 }
 
+__v4qi	mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a)
+{
+  return __builtin_convertvector((__v4hi)a, __v4qi);
+}
+
 __v8qi	mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hi)a, __v8qi);
@@ -218,6 +223,11 @@  __v2qu	mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
   return __builtin_convertvector((__v2hu)a, __v2qu);
 }
 
+__v4qu	mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a)
+{
+  return __builtin_convertvector((__v4hu)a, __v4qu);
+}
+
 __v8qu	mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hu)a, __v8qu);
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c
index 4a68a10b089..7d3717d45bc 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-6.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -8,11 +8,14 @@ 
 /* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
-/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 4 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 5 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw" 4 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw" 5 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */
 
 #include <x86intrin.h>
 
@@ -103,6 +106,11 @@  __v2qi	mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
   return __builtin_convertvector((__v2hf)a, __v2qi);
 }
 
+__v4qi	mm64_cvtph_epi8_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector((__v4hf)a, __v4qi);
+}
+
 __v8qi	mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
 {
   return __builtin_convertvector((__v8hf)a, __v8qi);
@@ -123,6 +131,11 @@  __v2qu	mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
   return __builtin_convertvector((__v2hf)a, __v2qu);
 }
 
+__v4qu	mm64_cvtph_epu8_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector((__v4hf)a, __v4qu);
+}
+
 __v8qu	mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
 {
   return __builtin_convertvector((__v8hf)a, __v8qu);